# In this notebook, I construct novel sentences using LSTM network


In [1]:
import numpy as np
from torch import nn
import torch.nn.functional as F
import torch

### loading the data:

In [2]:
with open('data/anna.txt') as file:
    text = file.read()
    
text[0:50]

'Chapter 1\n\n\nHappy families are all alike; every un'

### one-hot encoding of the text:

In [3]:
vocab = tuple(set(text))
int2char = dict(enumerate(vocab))
char2int = {char: i for i, char in int2char.items()}
encoded_text = np.array([char2int[char] for char in text])  # [95, 6, 13, 95, ...] corresponding to text

In [4]:
encoded_text[:50]

array([15, 14,  3, 30, 12, 18, 65, 38, 76, 57, 57, 57, 22,  3, 30, 30, 37,
       38, 58,  3, 44, 13, 41, 13, 18, 24, 38,  3, 65, 18, 38,  3, 41, 41,
       38,  3, 41, 13, 64, 18, 16, 38, 18,  0, 18, 65, 37, 38, 32, 66])

### one-hot encoding of the data:

In [5]:
def one_hot_encode(arr, n_labels):    #n_labels is the number of unique words (vocabulary), arr is the text
    
    one_hot = np.zeros((np.multiply(*arr.shape), n_labels), dtype=np.float32)

    one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1.    #this creates the on-hot encoded vector
    
    one_hot = one_hot.reshape((*arr.shape, n_labels))

    return one_hot

### getting batches of the data:

In [6]:
def get_batches(encoded_text, n_seq, n_steps):
    batch_size = n_seq * n_steps
    n_batches = len(encoded_text) // batch_size
    encoded_text = encoded_text[:n_batches*batch_size]          # drop some data to get only full batches
    encoded_text = encoded_text.reshape((n_seq, -1))        
    
    for i in range(0, encoded_text.shape[1], n_steps):  # iterate on the columns to get the batches
        x = encoded_text[:, i:i+n_steps]
        y = np.zeros_like(x)
        try:
            y[:, :-1], y[:, -1] = x[:, 1:], encoded_text[:, i+n_steps]
        except IndexError:
            y[:, :-1], y[:, -1] = x[:, 1:], encoded_text[:, 0]
        yield x, y                              # x, y are generators that you can use next() on

In [7]:
batches = get_batches(encoded_text, 10, 50)
x, y = next(batches)
print('x\n', x[:10, :10])
print('\ny\n', y[:10, :10])
print(x.shape, y.shape)

x
 [[15 14  3 30 12 18 65 38 76 57]
 [38  3 44 38 66 46 12 38 21 46]
 [ 0 13 66 60 57 57 42 25 18 24]
 [66 38 20 32 65 13 66 21 38 14]
 [38 13 12 38 13 24 51 38 24 13]
 [38 29 12 38 59  3 24 57 46 66]
 [14 18 66 38 68 46 44 18 38 58]
 [16 38  2 32 12 38 66 46 59 38]
 [12 38 13 24 66  9 12 60 38  4]
 [38 24  3 13 20 38 12 46 38 14]]

y
 [[14  3 30 12 18 65 38 76 57 57]
 [ 3 44 38 66 46 12 38 21 46 13]
 [13 66 60 57 57 42 25 18 24 51]
 [38 20 32 65 13 66 21 38 14 13]
 [13 12 38 13 24 51 38 24 13 65]
 [29 12 38 59  3 24 57 46 66 41]
 [18 66 38 68 46 44 18 38 58 46]
 [38  2 32 12 38 66 46 59 38 24]
 [38 13 24 66  9 12 60 38  4 14]
 [24  3 13 20 38 12 46 38 14 18]]
(10, 50) (10, 50)


## defining the network:

In [35]:
class CharRNN(nn.Module):
    
    def __init__(self, vocab, n_steps=100, n_hidden=256, n_layers=4, drop_prop=0.5, lr=0.001):
        # n_steps: number of elements in each sequence in each batch
        # n_hidden: number of output elements of the intermediate layers
        # n_layers: number of LSTM layers to use
        
        super().__init__()
        self.drop_prop = drop_prop
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.n_steps = n_steps
        self.lr = lr
        
        self.vocab = vocab
        self.int2char = dict(enumerate(self.vocab))
        self.char2int = {char: i for i, char in self.int2char.items()}
        
        self.lstm = nn.LSTM(len(self.vocab), n_hidden, n_layers, dropout=drop_prop, batch_first=True)
        self.dropout = nn.Dropout(drop_prop)
        self.fc = nn.Linear(n_hidden, len(self.vocab))
        
        self.init_weights()
        
        
        
    def forward(self, x, hc):
        x, (h, c) = self.lstm(x, hc)
        x = self.dropout(x)
        x = self.fc(x.view(x.shape[0]*x.shape[1], self.n_hidden))
        
        return x, (h, c)
    
    def predict(self, x, h=None, cuda=False):
        if cuda:
            self.cuda()
        else:
            self.cpu()
            
        x = np.array([[self.char2int[x]]])
        x = one_hot_encode(x, len(self.char2int))
        x = torch.from_numpy(x)
        
        if h == None:
            h = init_hidden(1)
            
        h = tuple([each.data for each in h])
        
        if cuda:
            x = x.cuda()
            
        out, h = self.forward(x, h)
        out = F.softmax(out).data
        
        out = out.numpy().squeeze()
        
        if cuda:
            out.cpu()
        
        return self.int2char[np.argmax(out)]
    
    def init_weights(self):
        self.fc.bias.data.fill_(0)
        self.fc.weight.data.uniform_(-1, 1)
        
        
    def init_hidden(self, n_seqs):
        ''' Initializes hidden state '''
        # Create two new tensors with sizes n_layers x n_seqs x n_hidden,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data
        return (weight.new(self.n_layers, n_seqs, self.n_hidden).zero_(),
                weight.new(self.n_layers, n_seqs, self.n_hidden).zero_())
        

In [14]:
if 'net' in locals():
    del net
    
net = CharRNN(vocab, n_hidden=512)
print(net)

CharRNN(
  (lstm): LSTM(83, 512, num_layers=4, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5)
  (fc): Linear(in_features=512, out_features=83, bias=True)
)


In [15]:
def train(net, encoded_text, epochs=10, n_seq=10, n_steps=50, lr=0.001, cuda=True, clip=5, print_every=10):
    
    net.train(True)
    opt = torch.optim.Adam(net.parameters(), lr)
    criterion = nn.CrossEntropyLoss()
    
    train_txt, val_txt = encoded_text[:-int(0.2*len(encoded_text))], encoded_text[-int(0.2*len(encoded_text)):]
    if cuda:
        net.cuda()
        
    counter = 0
    n_chars = len(net.vocab)
    for e in range(epochs):
        h = net.init_hidden(n_seq)
        for x, y in get_batches(encoded_text, n_seq, n_steps):
            counter +=1
            x = one_hot_encode(x, n_chars)
            inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
            if cuda:
                inputs, targets = inputs.cuda(), targets.cuda()
                
            h = tuple([each.data for each in h])
            
            net.zero_grad()
            output, h = net(inputs, h)
            loss = criterion(output, targets.view(n_seq*n_steps))
            loss.backward()
            
                   # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            nn.utils.clip_grad_norm_(net.parameters(), clip)

            opt.step()
            
            if counter % print_every == 0:
                
                # Get validation loss
                val_h = net.init_hidden(n_seq)
                val_losses = []
                for x, y in get_batches(val_txt, n_seq, n_steps):
                    # One-hot encode our data and make them Torch tensors
                    x = one_hot_encode(x, n_chars)
                    x, y = torch.from_numpy(x), torch.from_numpy(y)
                    
                    # Creating new variables for the hidden state, otherwise
                    # we'd backprop through the entire training history
                    val_h = tuple([each.data for each in val_h])
                    
                    inputs, targets = x, y
                    if cuda:
                        inputs, targets = inputs.cuda(), targets.cuda()

                    output, val_h = net.forward(inputs, val_h)
                    val_loss = criterion(output, targets.view(n_seq*n_steps))
                
                    val_losses.append(val_loss.item())
                
                print("Epoch: {}/{}...".format(e+1, epochs),
                      "Step: {}...".format(counter),
                      "Loss: {:.4f}...".format(loss.item()),
                      "Val Loss: {:.4f}".format(np.mean(val_losses)))

In [16]:
train(net, encoded_text, epochs=10, n_seq=128, n_steps=100)

Epoch: 1/10... Step: 10... Loss: 3.3681... Val Loss: 3.3758
Epoch: 1/10... Step: 20... Loss: 3.2802... Val Loss: 3.2862
Epoch: 1/10... Step: 30... Loss: 3.2550... Val Loss: 3.2658
Epoch: 1/10... Step: 40... Loss: 3.2312... Val Loss: 3.2467
Epoch: 1/10... Step: 50... Loss: 3.2093... Val Loss: 3.2275
Epoch: 1/10... Step: 60... Loss: 3.1494... Val Loss: 3.1705
Epoch: 1/10... Step: 70... Loss: 3.0485... Val Loss: 3.0506
Epoch: 1/10... Step: 80... Loss: 2.9688... Val Loss: 2.9597
Epoch: 1/10... Step: 90... Loss: 2.9170... Val Loss: 2.8981
Epoch: 1/10... Step: 100... Loss: 2.8296... Val Loss: 2.8334
Epoch: 1/10... Step: 110... Loss: 2.7464... Val Loss: 2.7424
Epoch: 1/10... Step: 120... Loss: 2.6562... Val Loss: 2.6452
Epoch: 1/10... Step: 130... Loss: 2.5755... Val Loss: 2.5752
Epoch: 1/10... Step: 140... Loss: 2.5268... Val Loss: 2.5232
Epoch: 1/10... Step: 150... Loss: 2.4682... Val Loss: 2.4816
Epoch: 2/10... Step: 160... Loss: 2.4284... Val Loss: 2.4525
Epoch: 2/10... Step: 170... Loss:

Epoch: 9/10... Step: 1350... Loss: 1.3277... Val Loss: 1.3211
Epoch: 9/10... Step: 1360... Loss: 1.3016... Val Loss: 1.3200
Epoch: 9/10... Step: 1370... Loss: 1.2869... Val Loss: 1.3185
Epoch: 9/10... Step: 1380... Loss: 1.3175... Val Loss: 1.3164
Epoch: 9/10... Step: 1390... Loss: 1.3051... Val Loss: 1.3168
Epoch: 10/10... Step: 1400... Loss: 1.2517... Val Loss: 1.3133
Epoch: 10/10... Step: 1410... Loss: 1.2434... Val Loss: 1.3107
Epoch: 10/10... Step: 1420... Loss: 1.2910... Val Loss: 1.3107
Epoch: 10/10... Step: 1430... Loss: 1.2985... Val Loss: 1.3112
Epoch: 10/10... Step: 1440... Loss: 1.2687... Val Loss: 1.3094
Epoch: 10/10... Step: 1450... Loss: 1.2806... Val Loss: 1.3056
Epoch: 10/10... Step: 1460... Loss: 1.2990... Val Loss: 1.3046
Epoch: 10/10... Step: 1470... Loss: 1.3028... Val Loss: 1.3032
Epoch: 10/10... Step: 1480... Loss: 1.2938... Val Loss: 1.3012
Epoch: 10/10... Step: 1490... Loss: 1.2919... Val Loss: 1.3023
Epoch: 10/10... Step: 1500... Loss: 1.2578... Val Loss: 1.29

### Save the model

In [17]:
model_name = '4_LSTMs.net'

checkpoint = {'n_hidden': net.n_hidden,
              'n_layers': net.n_layers,
              'state_dict': net.state_dict(),
              'tokens': net.vocab}

with open(model_name, 'wb') as f:
    torch.save(checkpoint, f)

### Inference / generating sentences

In [33]:
def generate(net, length, first_letters, cuda=True):
    
    if cuda:
        net.cuda()
    else:
        net.cpu()
        
    net.eval()
    
    chars = [char for char in first_letters]
    h = net.init_hidden(1)
    for i in range(length):
        char = net.predict(chars[-1], h=h, cuda=cuda)
        chars.append(char)
        
    return ''.join(chars)

In [34]:
generated = generate(net, 50, "I lov")
print(generated)



RuntimeError: Can't call numpy() on Variable that requires grad. Use var.detach().numpy() instead.