# Pytorch Intro

- Finish 60 minute blitz from pytorch tutorials
- Implement character embedding rnn in pytorch, based on theano section of fastai lesson 6

### Tensor operations
- Basically all the numpy ndarray functions are supported
- Can be converted from and to numpy - np and torch tensors and then linked and modifications to one affect the other

In [1]:
import torch

In [2]:
x = torch.rand(5, 3)
print(x)
print(x.size())


 0.6646  0.0988  0.7094
 0.9374  0.2605  0.5926
 0.8577  0.9507  0.8256
 0.5294  0.1175  0.5782
 0.1380  0.2708  0.8770
[torch.FloatTensor of size 5x3]

torch.Size([5, 3])


In [3]:
b = x.numpy()
print(b)

[[ 0.66459256  0.09881883  0.70937097]
 [ 0.93740928  0.26048753  0.59258026]
 [ 0.85772985  0.95066363  0.82564968]
 [ 0.52941418  0.11749824  0.57819259]
 [ 0.13795471  0.27078828  0.87696105]]


In [4]:
x = torch.from_numpy(b)
print(x)


 0.6646  0.0988  0.7094
 0.9374  0.2605  0.5926
 0.8577  0.9507  0.8256
 0.5294  0.1175  0.5782
 0.1380  0.2708  0.8770
[torch.FloatTensor of size 5x3]



### Autograd - Variables and Functions
- Variable wraps a tensor, has `.data` for raw Tensor data, `.grad` for gradient (computed after calling `backward()` on an output Variable)

In [5]:
from torch.autograd import Variable
x = Variable(torch.ones(2, 2), requires_grad=True)
print(x)

Variable containing:
 1  1
 1  1
[torch.FloatTensor of size 2x2]



In [6]:
y = x + 2
print(y.grad_fn)

<torch.autograd.function.AddConstantBackward object at 0x7f71327f17c8>


In [7]:
z = y * y * 3
out = z.mean()
print(z.grad_fn)

<torch.autograd.function.MulConstantBackward object at 0x7f71327f18b8>


In [8]:
# compute gradients
out.backward()
print(x.grad)

Variable containing:
 4.5000  4.5000
 4.5000  4.5000
[torch.FloatTensor of size 2x2]



### Neural Networks
- a neural network is a class (subclassing `nn.Module`) that defines a set of e.g. `nn.Linear` layers and a method `forward(input)` that takes a Variable input and outputs another Variable (`backward()` created by autograd)
- `net.parameters` are learnable weights of model

In [9]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5) # 1 input channel, 6 output channel, 5x5 convolution
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16*5*5, 120) # 400 inputs, 120 outputs
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), 2) # apply conv1, relu, and 2x2 max pooling
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x)) # flatten (x.view reshapes tensor)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.softmax(self.fc3(x))
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:] # all dimensions except batch size
        flattened = 1
        for i in size:
            flattened *= i
        return flattened

net = Net()
print(net)

Net (
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear (400 -> 120)
  (fc2): Linear (120 -> 84)
  (fc3): Linear (84 -> 10)
)


In [10]:
params = list(net.parameters())
print(len(params))
print(params[0].size())  # conv1's .weight

10
torch.Size([6, 1, 5, 5])


In [11]:
import torch.optim as optim
input = Variable(torch.randn(1, 1, 32, 32))
target = Variable(torch.arange(1, 11))
optimizer = optim.SGD(net.parameters(), lr=0.01)

out = net(input)
loss = nn.MSELoss()(out, target)
print(loss)

Variable containing:
 37.3953
[torch.FloatTensor of size 1]



In [19]:
for i in range(10):
    optimizer.zero_grad() # zero gradients so they can be recalculated
    out = net(input) # run forward pass
    loss = nn.MSELoss()(out, target) # calculate loss
    loss.backward() # get gradients with respect to loss
    optimizer.step() # step weights
print(loss)

Variable containing:
 36.6001
[torch.FloatTensor of size 1]



# Text Generation

### Preprocess Text

In [1]:
import numpy as np
import os

path = '/home/ubuntu/fastai-data/rvb/scripts.txt'

In [2]:
# load text and get character set
text = open(path).read().lower()[:]
print('corpus length:', len(text))

chars = sorted(list(set(text)))
print('char list: '+''.join(chars))
vocab_size = len(chars)
print('total chars:', vocab_size)

# create character to index lookup table
char_indices = dict((character, index) for index, character in enumerate(chars))
# turn text into list of character indices
idx = [char_indices[c] for c in text]

corpus length: 1902635
char list: 
 !"#$%&'()*+,-./0123456789:;<>?[]abcdefghijklmnopqrstuvwxyz ¡¿àáäèéêíñóöú
total chars: 75


In [3]:
maxlen = 64 # predict next character from preceding n characters
sequences = []
next_chars = []
for i in range(len(idx) - maxlen + 1):
    sequences.append(idx[i:i+maxlen]) # get every sequence of length maxlen in text
    next_chars.append(idx[i+1:i+maxlen+1])

In [4]:
def one_hot(sequence):
    categorical = np.zeros((len(sequence), vocab_size))
    for li in range(len(sequence)):
        letter_id = sequence[li]
        categorical[li,letter_id] = 1
    return categorical

In [None]:
# convert to 2d numpy arrays - each row is a sequence, column values are characters in that sequence
sequences = np.concatenate([one_hot(seq) for seq in sequences[:-2]])
next_chars = np.concatenate([one_hot(seq) for seq in next_chars[:-2]])

sequences.shape, next_chars.shape

### Create Pytorch Model

In [None]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F

In [74]:
class CharRNN(nn.Module):
    
    def __init__(self, vocab_size=vocab_size, hidden_size=256):
        super(CharRNN, self).__init__()
        
        self.hidden_size = hidden_size
        self.i2h = nn.Linear(vocab_size, hidden_size) # input to hidden layer
        self.h2h = nn.Linear(256, 256) # hidden to hidden layer
        self.h2o = nn.Linear(256, vocab_size) # hidden to output layer
        self.dropout = nn.Dropout(0.05)
        
    def forward(self, x, hidden):
        x = F.relu(self.i2h(x))
        hidden = x + self.dropout(F.relu(self.h2h(hidden)))
        output = F.softmax(self.h2o(hidden))
        return output, hidden
    
    def init_hidden(self):
        return Variable(torch.zeros(1, self.hidden_size))

rnn = CharRNN()
print(rnn)

CharRNN (
  (i2h): Linear (75 -> 256)
  (h2h): Linear (256 -> 256)
  (h2o): Linear (256 -> 75)
  (dropout): Dropout (p = 0.05)
)


In [None]:
# sampling function for model
def sample(rnn, start_text='\n'):
    idx = char_indices[start_char[-1]]
    prev_char = Variable(one_hot(idx))
    output_text = start_text
    
    hidden = rnn.init_hidden()
    for i in range(length):
        output_char, hidden = rnn(prev_char, hidden)
        prev_char = output_char
        start_text += char_indice
        

In [None]:
# training function for one sequence
def train(rnn, input_sequence, target_sequence, loss_func=nn.NLLLoss(), lr=0.01):
    hidden = rnn.init_hidden() # initialize hidden state to 0s
    rnn.zero_grad() # reset gradients so they can be recalculated
    loss = 0
    
    for i in range(len(input_sequence)):
        output_char, hidden = rnn(input[i], hidden) # rnn predicts next character
        loss += loss_func(output_char, target[j]) # add loss for that character to total loss
            
    loss.backward() # recalculate gradients
        
    for p in rnn.parameters(): # update weights
        p.data.add_(-lr, p.grad.data)
        
    print('loss: '+str(loss.data[0] /input_sequence.size()[0])) # print average per-character loss
    print('sample: '+sample(net)) # print sample sequence

In [None]:
num_epochs = 10

input = Variable(torch.from_numpy(sequences))
target = Variable(torch.from_numpy(next_chars))