# Character-level LSTM in PyTorch

In [1]:
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F

In [2]:
#Loading the data

with open('data/anna.txt') as f:
    text = f.read()
print(text[:100])

Chapter 1


Happy families are all alike; every unhappy family is unhappy in its own
way.

Everythin


In [3]:
text[:100]

'Chapter 1\n\n\nHappy families are all alike; every unhappy family is unhappy in its own\nway.\n\nEverythin'

### Tokenization

In [4]:
#Encoding the text - map each character to an integer and vice versa

chars = tuple(set(text))
int2char = dict(enumerate(chars))
char2int = {ch:ii for ii,ch in int2char.items()}

encoded = np.array([char2int[ch] for ch in text])

In [5]:
encoded[:100]

array([24, 47, 54, 11, 73, 41, 56, 35, 81, 63, 63, 63, 30, 54, 11, 11, 51,
       35,  0, 54, 68, 77, 69, 77, 41,  2, 35, 54, 56, 41, 35, 54, 69, 69,
       35, 54, 69, 77,  3, 41, 14, 35, 41, 79, 41, 56, 51, 35, 15, 10, 47,
       54, 11, 11, 51, 35,  0, 54, 68, 77, 69, 51, 35, 77,  2, 35, 15, 10,
       47, 54, 11, 11, 51, 35, 77, 10, 35, 77, 73,  2, 35, 46, 17, 10, 63,
       17, 54, 51, 36, 63, 63, 80, 79, 41, 56, 51, 73, 47, 77, 10])

### Pre-processing the data

In [2]:
#One-hot encoding
def one_hot_encode(arr,n_labels):
    
    #Initialize the encoded array
    one_hot = np.zeros((arr.size,n_labels),dtype=np.float32)
    
    #Fill the appropriate elements with ones
    one_hot[np.arange(one_hot.shape[0]),arr.flatten()] = 1
    
    #Reshape it to get to final one-hot encoded array
    one_hot = one_hot.reshape((*arr.shape,n_labels))
    
    return one_hot

In [11]:
#Testing the one-hot encoding function
test_seq = np.array([1,3,5,7])
test_one_hot = one_hot_encode(test_seq,8)
print(test_one_hot)

[[0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1.]]


### Making training mini-batches

In [3]:
def get_batches(arr, batch_size, seq_length):
    '''Create a generator that returns batches of size
       batch_size x seq_length from arr.
       
       Arguments
       ---------
       arr: Array you want to make batches from
       batch_size: Batch size, the number of sequences per batch
       seq_length: Number of encoded chars in a sequence
    '''
    
    #Get the number of batches we can make
    n_batches = arr.size // (batch_size*seq_length)
    
    #Total number of characters to keep from the array
    arr = arr[:batch_size*seq_length*n_batches]
    
    #Reshape into batch_size rows
    arr = arr.reshape(batch_size,-1)
    
    #Iterate over the batches using a window of size seq_length
    for n in range(0,arr.shape[1],seq_length):
        #The features
        x = arr[:,n:n+seq_length]
        
        #The targets
        y = np.zeros_like(x)
        try:
            y[:,:-1], y[:,-1] = x[:,1:], arr[:,n+seq_length]
        except IndexError:
            y[:,:-1], y[:,-1] = x[:,1:], arr[:,0]
        
        yield x,y

**Testing the implementation of above function**

In [9]:
batches = get_batches(encoded,8,50)
x, y = next(batches)

In [10]:
#Printing the first 10 items in a sequence
print('x\n',x[:10,:10])
print('y\n',y[:10,:10])

x
 [[24 47 54 11 73 41 56 35 81 63]
 [ 2 46 10 35 73 47 54 73 35 54]
 [41 10  8 35 46 56 35 54 35  0]
 [ 2 35 73 47 41 35 37 47 77 41]
 [35  2 54 17 35 47 41 56 35 73]
 [37 15  2  2 77 46 10 35 54 10]
 [35 52 10 10 54 35 47 54  8 35]
 [18 42 69 46 10  2  3 51 36 35]]
y
 [[47 54 11 73 41 56 35 81 63 63]
 [46 10 35 73 47 54 73 35 54 73]
 [10  8 35 46 56 35 54 35  0 46]
 [35 73 47 41 35 37 47 77 41  0]
 [ 2 54 17 35 47 41 56 35 73 41]
 [15  2  2 77 46 10 35 54 10  8]
 [52 10 10 54 35 47 54  8 35  2]
 [42 69 46 10  2  3 51 36 35 25]]


## Defining the model

In [4]:
#Checking if GPU is available
train_on_gpu = torch.cuda.is_available()
if train_on_gpu:
    print("Training on GPU...")
else:
    print("GPU not available..Training on CPU...")

Training on GPU...


In [5]:
class CharRNN(nn.Module):
    
    def __init__(self, tokens, n_hidden=256, n_layers=2, drop_prob=0.5, lr=0.001):
        super().__init__()
        self.drop_prob = drop_prob
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.lr = lr
        
        #Creating character dictionaries
        self.chars = tokens
        self.int2char = dict(enumerate(self.chars))
        self.char2int = {ch:ii for ii,ch in self.int2char.items()}
        
        #Defining the layers of the model
        self.lstm = nn.LSTM(input_size=len(self.chars), hidden_size=self.n_hidden, num_layers=self.n_layers,
                            batch_first=True, dropout=self.drop_prob)
        self.dropout = nn.Dropout(p=self.drop_prob)
        self.fc = nn.Linear(self.n_hidden,len(self.chars))
    
    def forward(self, x, hidden):
        '''
        Forward pass through the network.
        These inputs are x, and the hidden state/cell state `hidden`.
        '''
        
        #Get the outputs and new hidden state from the LSTM
        r_output, hidden = self.lstm(x,hidden)
        
        #Pass the output through dropout layer
        out = self.dropout(r_output)
        
        #Stack up LSTM outputs using view
        out = out.contiguous().view(-1,self.n_hidden)
        
        #Finally pass the output through the fully-connected layer
        out = self.fc(out)
        
        return out,hidden
    
    def init_hidden(self,batch_size):
        ''' Initializes the hidden state '''
        #Create two new tensors with sizes n_layers x batch_size x n_hidden 
        #initialized to zero, for hidden state and cell state for LSTM
        weight = next(self.parameters()).data
        
        if train_on_gpu:
            hidden = (weight.new(self.n_layers,batch_size,self.n_hidden).zero_().cuda(),
                      weight.new(self.n_layers,batch_size,self.n_hidden).zero_().cuda())
        else:
            hidden = (weight.new(self.n_layers,batch_size,self.n_hidden).zero_(),
                      weight.new(self.n_layers,batch_size,self.n_hidden).zero_())
            
        return hidden

## Defining the training function

In [17]:
def train(net, data, epochs=10, batch_size=10, seq_length=50, lr=0.001, clip=5, val_frac=0.1, print_every=10):
    '''
    Training a network
    Arguments
    ----------
    net: CharRNN Network
    data: text data to train our network
    epochs: Number of epochs to train
    batch_size: Number of mini-sequences per mini-batch, aka batch size
    seq_length: Number of character steps per mini-batch
    lr: learning rate
    clip: gradient clipping
    val_frac: Fraction of data to hold out for validation
    print_every: Number of steps for printing training and validation loss
    '''
    
    net.train()
    
    optimizer = torch.optim.Adam(net.parameters(),lr=lr)
    criterion = nn.CrossEntropyLoss()
    
    #Creating training and validation data
    val_idx = int(len(data)*(1-val_frac))
    data, val_data = data[:val_idx], data[val_idx:]
    
    if train_on_gpu:
        net.cuda()
    
    counter = 0
    n_chars = len(net.chars)
    for e in range(epochs):
        #Initialize the hidden state
        h = net.init_hidden(batch_size)
        
        for x,y in get_batches(data,batch_size,seq_length):
            counter += 1
            
            #One-hot encoding our data to feed into network
            x = one_hot_encode(x,n_chars)
            inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
            
            if train_on_gpu:
                inputs, targets = inputs.cuda(), targets.cuda()
            
            #Creating new variables for hidden state
            h = tuple([each.data for each in h])
            
            net.zero_grad()
            
            #Getting output from the model
            output, h = net(inputs,h)
            
            #Calculating the loss and performing backpropagation
            loss = criterion(output, targets.view(batch_size*seq_length).long())
            loss.backward()
            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            nn.utils.clip_grad_norm_(net.parameters(), clip)
            optimizer.step()
            
            #Loss stats
            if counter%print_every == 0:
                #Get validation loss
                val_h = net.init_hidden(batch_size)
                val_losses = []
                net.eval()
                
                for x,y in get_batches(val_data, batch_size, seq_length):
                    x = one_hot_encode(x,n_chars)
                    inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
                    
                    val_h = tuple([each.data for each in val_h])
                    
                    if train_on_gpu:
                        inputs, targets = inputs.cuda(), targets.cuda()
                    
                    output, val_h = net(inputs,val_h)
                    val_loss = criterion(output, targets.view(batch_size*seq_length).long())
                    val_losses.append(val_loss.item())
                
                net.train()
                
                print("Epoch: {}/{}...".format(e+1,epochs),
                      "Step: {}...".format(counter),
                      "Loss: {:.4f}...".format(loss.item()),
                      "Val Loss: {:.4f}".format(np.mean(val_losses)))

### Instantiating the model

In [18]:
#Setting the model hyperparameters
n_hidden = 512
n_layers = 2

net = CharRNN(chars,n_hidden,n_layers)
print(net)

CharRNN(
  (lstm): LSTM(83, 512, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=512, out_features=83, bias=True)
)


In [19]:
#Setting the training hyperparameters
batch_size = 128
seq_length = 100
n_epochs = 20

train(net, encoded, epochs=n_epochs, batch_size=batch_size, seq_length=seq_length)

Epoch: 1/20... Step: 10... Loss: 3.2670... Val Loss: 3.2083
Epoch: 1/20... Step: 20... Loss: 3.1565... Val Loss: 3.1376
Epoch: 1/20... Step: 30... Loss: 3.1504... Val Loss: 3.1250
Epoch: 1/20... Step: 40... Loss: 3.1168... Val Loss: 3.1195
Epoch: 1/20... Step: 50... Loss: 3.1422... Val Loss: 3.1180
Epoch: 1/20... Step: 60... Loss: 3.1212... Val Loss: 3.1161
Epoch: 1/20... Step: 70... Loss: 3.1088... Val Loss: 3.1156
Epoch: 1/20... Step: 80... Loss: 3.1233... Val Loss: 3.1132
Epoch: 1/20... Step: 90... Loss: 3.1252... Val Loss: 3.1088
Epoch: 1/20... Step: 100... Loss: 3.1095... Val Loss: 3.1000
Epoch: 1/20... Step: 110... Loss: 3.0934... Val Loss: 3.0786
Epoch: 1/20... Step: 120... Loss: 3.0337... Val Loss: 3.0314
Epoch: 1/20... Step: 130... Loss: 3.0193... Val Loss: 2.9601
Epoch: 2/20... Step: 140... Loss: 2.9199... Val Loss: 2.8837
Epoch: 2/20... Step: 150... Loss: 2.8279... Val Loss: 2.8028
Epoch: 2/20... Step: 160... Loss: 2.7573... Val Loss: 2.7042
Epoch: 2/20... Step: 170... Loss:

Epoch: 10/20... Step: 1350... Loss: 1.4321... Val Loss: 1.4530
Epoch: 10/20... Step: 1360... Loss: 1.4274... Val Loss: 1.4540
Epoch: 10/20... Step: 1370... Loss: 1.4284... Val Loss: 1.4506
Epoch: 10/20... Step: 1380... Loss: 1.4670... Val Loss: 1.4450
Epoch: 10/20... Step: 1390... Loss: 1.4625... Val Loss: 1.4472
Epoch: 11/20... Step: 1400... Loss: 1.4750... Val Loss: 1.4467
Epoch: 11/20... Step: 1410... Loss: 1.4805... Val Loss: 1.4431
Epoch: 11/20... Step: 1420... Loss: 1.4697... Val Loss: 1.4359
Epoch: 11/20... Step: 1430... Loss: 1.4367... Val Loss: 1.4379
Epoch: 11/20... Step: 1440... Loss: 1.4639... Val Loss: 1.4330
Epoch: 11/20... Step: 1450... Loss: 1.3875... Val Loss: 1.4320
Epoch: 11/20... Step: 1460... Loss: 1.4059... Val Loss: 1.4319
Epoch: 11/20... Step: 1470... Loss: 1.4028... Val Loss: 1.4290
Epoch: 11/20... Step: 1480... Loss: 1.4238... Val Loss: 1.4260
Epoch: 11/20... Step: 1490... Loss: 1.4180... Val Loss: 1.4273
Epoch: 11/20... Step: 1500... Loss: 1.4058... Val Loss:

Epoch: 20/20... Step: 2660... Loss: 1.2646... Val Loss: 1.3187
Epoch: 20/20... Step: 2670... Loss: 1.2773... Val Loss: 1.3161
Epoch: 20/20... Step: 2680... Loss: 1.2712... Val Loss: 1.3122
Epoch: 20/20... Step: 2690... Loss: 1.2551... Val Loss: 1.3127
Epoch: 20/20... Step: 2700... Loss: 1.2776... Val Loss: 1.3075
Epoch: 20/20... Step: 2710... Loss: 1.2353... Val Loss: 1.3109
Epoch: 20/20... Step: 2720... Loss: 1.2305... Val Loss: 1.3135
Epoch: 20/20... Step: 2730... Loss: 1.2332... Val Loss: 1.3097
Epoch: 20/20... Step: 2740... Loss: 1.2333... Val Loss: 1.3116
Epoch: 20/20... Step: 2750... Loss: 1.2425... Val Loss: 1.3108
Epoch: 20/20... Step: 2760... Loss: 1.2299... Val Loss: 1.3074
Epoch: 20/20... Step: 2770... Loss: 1.2702... Val Loss: 1.3058
Epoch: 20/20... Step: 2780... Loss: 1.2870... Val Loss: 1.3042


### Saving checkpoint

In [6]:
model_name = 'char_rnn_1.net'

checkpoint = {'n_hidden':net.n_hidden,
              'n_layers':net.n_layers,
              'state_dict':net.state_dict(),
              'tokens':net.chars}

with open(model_name,'wb') as f:
    torch.save(checkpoint,f)

NameError: name 'net' is not defined

## Making Predictions

In [7]:
def predict(net, char, h=None, top_k=None):
    '''
    Given a character, predict the next character
    Returns the predicted character and hidden state.
    '''
    
    #Tensor inputs
    x = np.array([[net.char2int[char]]])
    x = one_hot_encode(x,len(net.chars))
    inputs = torch.from_numpy(x)
    
    if train_on_gpu:
        inputs = inputs.cuda()
        
    #Detach hidden state from history
    h = tuple([each.data for each in h])
    
    #Get the output from the model
    out, h = net(inputs,h)
    
    #Get the character probabilities
    p = F.softmax(out, dim=1).data
    if train_on_gpu:
        p = p.cpu()
    
    #Get top characters
    if top_k is None:
        top_ch = np.arange(len(net.chars))
    else:
        p, top_ch = p.topk(top_k)
        top_ch = top_ch.numpy().squeeze()
        
    #Selecting the most likely character with some element of randomness
    p = p.numpy().squeeze()
    char = np.random.choice(top_ch,p=p/p.sum())
    
    return net.int2char[char], h

### Priming and generating text

In [8]:
def sample(net, size, prime="The", top_k=None):
    if train_on_gpu:
        net.cuda()
    else:
        net.cpu()
    
    net.eval()
    
    #Firstly run through the prime characters
    chars = [ch for ch in prime]
    h = net.init_hidden(1)
    for ch in prime:
        char, h = predict(net, ch, h, top_k=top_k)
        
    chars.append(char)
    
    for ii in range(size):
        char,h = predict(net,chars[-1],h,top_k=top_k)
        chars.append(char)
    
    return ''.join(chars)

In [23]:
print(sample(net,1000,"Robin",top_k=5))

Robing, at the delicate stirr, and the dates was a church at the
subject, the man and the conversation with
hands with a change of weary at
the carry of his faily and song. A blindering were a man of hands of the
manship.

"I can, thank you to be in him? Tometime that I'm a considering that it's a man who
say one to be in the same arrish, with the soun, of we may tell you his
sense, that's too, we shall seem it?" he said, smiling, "but I was a condition, I didn't believe that you would not
speak
into the crubled friends in their pincers, they seem to start. But the children and woman
was new table, said in such a still. Well, and so, as though if you are such all the marshal. In horrible sister--her son's
chance with
the menthal of the
same, when you see her hat in
such thing,
but
the choose of them, we have been the plass, the fact with shame of hands."

Sveazhsky did not know what was in the softing, he showed them one of the condition of the money. And the
same time.



Chapter 12



### Loading a checkpoint

In [9]:
#Here we are loading a pre-trained model 'char_rnn_1.net' that trained on 20 epochs
with open('char_rnn_1.net','rb') as f:
    checkpoint = torch.load(f)

loaded_model = CharRNN(checkpoint['tokens'],checkpoint['n_hidden'],checkpoint['n_layers'])
loaded_model.load_state_dict(checkpoint['state_dict'])

<All keys matched successfully>

In [10]:
print(sample(loaded_model,3000,top_k=5,prime="Sunil "))

Sunil and talk to her, as though he
sear his heed badd of all of the
chingles to her. And it was something, and
he had anyone to tell the mother's, took it.

Alexey Alexandrovitch had night as the same time to say to her, as had something hopely began to go
to be anything with husbands of him the same partical end
of hands and
carriage, having been
anger for the painted at the stands of her head. He wished, and
straight into her faility. The dress she saw that she was never to be the
subject of the servants of the same. Alexey Alexandrovitch answered the country of the party. Sergey
Ivanovitch, which was in her eyes oncome her second, taking her head, and showing her through harmly friend, and there was no chair
and saying a look on his began from that, always had been said, sitting on what was nothing and stood far and heard that she could not help his soul about that, had not taken him all. She went on.

"I am not ask her for the carriage," she said, smiling, "where so you wants to s

In [13]:
print(sample(loaded_model,5000,top_k=3,prime="Something"))

Something, too had to say thas it seemed, between, and the charming started oneserf, to the princess and her side, which should
be still to be said. At took him with a stand of hand,
and the same time they was something and talked to the station to the son was a care to talk
about him as they
could be a lattle overcome and her father. He was so much that he had thought that in his face as he saw all at once to
say about the cause, that he had not been a long while, and sating a carriage, and his state in a character of her heart.

"What, that you want to say it to tell me, and I will go over horror of that man that they were a present discassion, then they has so much a state on to see him."

"Yes, but I'll stand on anyone. We have been told you." And he went to stop things.

"What do you
thought they want to see her," said Levin. "I'm not
a children. And there was a committee."

And the sound of his study and still seeing the carriage,
and she would never have been a single smoother.


In [25]:
#Sample using loaded model
print(sample(loaded_model,2000,top_k=5,prime="He said "))

He said a moment, began walking out of that things, and straight in a smile. The strange there was a men in
his subject at the more and complexe of, though anyone, and taking off himself.
She were so saying that
it's so such as impossible to see them, brandy them to successful
into the crowd,
and there was to sudden the pasence, there are a look of
hatitures that he had as though shill and have been to difficult first the middle of a man with
her, and," she said to her,
said sometimes.

Stepan Arkadyevitch, and as he did not talk to herself, with which, he was said as she was going, but to be delighted it. He was so say that he had an helper on his horses and the man whether the courses of weaking of the
mother, he had nithtring what seemed in
a straight that had been a straight of
his chatter of his brother's
cases of three things of her country.
She was since
the corder that with simply carriage.

"I am the friend and son's so much more friend of all to her."

"Ah! why do you
mean? W

# END OF NOTEBOOK