In [2]:
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F

In [3]:
with open('anna.txt', 'r') as f:
    text = f.read()

In [4]:
chars = tuple(set(text))

int2char = dict(enumerate(chars))

char2int = {ch: ii for ii, ch in int2char.items()}

encoded = np.array([char2int[ch] for ch in text])

In [5]:
def one_hot_encode(arr, n_labels):
    one_hot = np.zeros((np.multiply(*arr.shape), n_labels), dtype = np.float32)
    one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1.
    one_hot = one_hot.reshape((*arr.shape), n_labels)
    return one_hot

In [6]:
def get_batches(arr, n_seqs, n_steps):
    batch_size = n_seqs*n_steps
    n_batches = len(arr)//batch_size
    
    arr = arr[:batch_size*n_batches]
    arr = arr.reshape((n_seqs, -1))
    for n in range(0, arr.shape[1], n_steps):
        x = arr[:, n:n+n_steps]
        y = np.zeros_like(x)
        try:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, n+n_steps]
        except IndexError:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]
        yield x, y

In [7]:
batches = get_batches(encoded, 10, 50)
x, y = next(batches)

In [22]:
class CharRNN(nn.Module):
    def __init__(self, tokens, n_steps = 100, n_hidden = 256, n_layers = 2, drop_prob = 0.5, lr = 0.001):
        super().__init__()
        self.drop_prob = drop_prob
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.n_steps = n_steps
        self.lr = lr
        
        self.chars = tokens
        self.int2char = dict(enumerate(self.chars))
        self.char2int = {ch: ii for ii, ch in self.int2char.items()}
        
        self.lstm = nn.LSTM(len(self.chars), n_hidden, n_layers, dropout = drop_prob, batch_first = True)
        
        self.dropout = nn.Dropout(drop_prob)
        
        self.fc = nn.Linear(n_hidden, len(self.chars))
        
        self.init_weights()
        
    def forward(self, x, hc):
        
        x, (h, c) = self.lstm(x, hc)
        
        x = self.dropout(x)
        
        x = x.view(x.size()[0]*x.size()[1], self.n_hidden)
        
        x = self.fc(x)
        
        return x, (h, c)
    
    def predict(self, char, h=None, cuda=False, top_k=None):
        if cuda:
            self.cuda()
        else:
            self.cpu()
        
        if h is None:
            h = self.init_hidden(1)
        
        x = np.array([[self.char2int[char]]])
        x = one_hot_encode(x, len(self.chars))
        
        inputs = torch.from_numpy(x)
        
        if cuda:
            inputs = inputs.cuda()
            
        h = tuple([each.data for each in h])
        out, h = self.forward(inputs, h)
        
        p = F.softmax(out, dim=1).data
        
        
        if cuda:
            p = p.cpu()
        
        if top_k is None:
            top_ch = np.arange(len(self.chars))
        else:
            p, top_ch = p.topk(top_k)
            top_ch = top_ch.numpy().squeeze()
        
        p = p.numpy().squeeze()

        char = np.random.choice(top_ch, p=p/p.sum())
            
        return self.int2char[char], h
    
    def init_weights(self):
        initrange = 0.1
        
        self.fc.bias.data.fill_(0)
        
        self.fc.weight.data.uniform_(-1, 1)
        
    def init_hidden(self, n_seqs):
        weight = next(self.parameters()).data
        return (weight.new(self.n_layers, n_seqs, self.n_hidden).zero_(),
                weight.new(self.n_layers, n_seqs, self.n_hidden).zero_())

In [9]:
def train(net, data, epochs = 10, n_seqs = 10, n_steps = 50, lr = 0.001, clip=5, val_frac=0.1, cuda = False, print_every = 10):
    
    net.train()
    opt = torch.optim.Adam(net.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    
    val_idx = int(len(data)*(1-val_frac))
    print(val_idx)
    data, val_data = data[:val_idx], data[val_idx:]
    
    if cuda:
        net.cuda()
        
    counter = 0
    
    n_chars = len(net.chars)
    
    for e in range(epochs):
        
        h = net.init_hidden(n_seqs)
        
        for x, y in get_batches(data, n_seqs, n_steps):
            
            counter += 1
            
            x = one_hot_encode(x, n_chars)
            
            inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
            
            if cuda:
                inputs = inputs.cuda()
                targets = targets.cuda()
                
            h = tuple([each.data for each in h])
            
            net.zero_grad()
            
            output, h = net.forward(inputs, h)
            
            loss = criterion(output, targets.view(n_seqs*n_steps).type(torch.cuda.LongTensor))
            
            loss.backward()
            
            nn.utils.clip_grad_norm_(net.parameters(), clip)
            
            opt.step()
            
            
            if counter%print_every == 0:
                val_h = net.init_hidden(n_seqs)
                val_losses = []
                
                for x, y in get_batches(val_data, n_seqs, n_steps):
                    
                    x = one_hot_encode(x, n_chars)
                    x, y = torch.from_numpy(x), torch.from_numpy(y)
                    
                    val_h = tuple([each.data for each in val_h])
                    
                    inputs, targets = x, y
                    
                    if cuda:
                        inputs, targets = inputs.cuda(), targets.cuda()
                    
                    output, val_h = net.forward(inputs, val_h)
                    
                    val_loss = criterion(output, targets.view(n_seqs*n_steps).type(torch.cuda.LongTensor))
                    
                    val_losses.append(val_loss.item())
                    
                print("Epoch: {}/{}...".format(e+1, epochs),
                      "Step: {}...".format(counter),
                      "Loss: {:.4f}...".format(loss.item()),
                      "Val Loss: {:.4f}".format(np.mean(val_losses)))
                    

In [25]:
if 'net' in locals():
    del net

In [26]:
net = CharRNN(chars, n_hidden=512, n_layers=2)

print(net)

CharRNN(
  (lstm): LSTM(83, 512, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=512, out_features=83, bias=True)
)


In [27]:
n_seqs, n_steps = 100, 100

train(net, encoded, epochs=25, n_seqs=n_seqs, n_steps=n_steps, lr=0.001, cuda=True, print_every=10)

1786699
Epoch: 1/25... Step: 10... Loss: 3.3395... Val Loss: 3.3297
Epoch: 1/25... Step: 20... Loss: 3.2291... Val Loss: 3.2229
Epoch: 1/25... Step: 30... Loss: 3.1236... Val Loss: 3.0955
Epoch: 1/25... Step: 40... Loss: 2.9309... Val Loss: 2.9310
Epoch: 1/25... Step: 50... Loss: 2.7629... Val Loss: 2.7460
Epoch: 1/25... Step: 60... Loss: 2.6320... Val Loss: 2.6336
Epoch: 1/25... Step: 70... Loss: 2.5720... Val Loss: 2.5573
Epoch: 1/25... Step: 80... Loss: 2.4992... Val Loss: 2.5009
Epoch: 1/25... Step: 90... Loss: 2.4422... Val Loss: 2.4586
Epoch: 1/25... Step: 100... Loss: 2.4024... Val Loss: 2.4241
Epoch: 1/25... Step: 110... Loss: 2.3639... Val Loss: 2.3916
Epoch: 1/25... Step: 120... Loss: 2.3545... Val Loss: 2.3615
Epoch: 1/25... Step: 130... Loss: 2.3358... Val Loss: 2.3318
Epoch: 1/25... Step: 140... Loss: 2.2406... Val Loss: 2.3067
Epoch: 1/25... Step: 150... Loss: 2.2694... Val Loss: 2.2823
Epoch: 1/25... Step: 160... Loss: 2.2060... Val Loss: 2.2600
Epoch: 1/25... Step: 170.

Epoch: 8/25... Step: 1350... Loss: 1.3613... Val Loss: 1.5041
Epoch: 8/25... Step: 1360... Loss: 1.3957... Val Loss: 1.4984
Epoch: 8/25... Step: 1370... Loss: 1.3858... Val Loss: 1.4930
Epoch: 8/25... Step: 1380... Loss: 1.3741... Val Loss: 1.4989
Epoch: 8/25... Step: 1390... Loss: 1.3961... Val Loss: 1.5036
Epoch: 8/25... Step: 1400... Loss: 1.3761... Val Loss: 1.4988
Epoch: 8/25... Step: 1410... Loss: 1.3804... Val Loss: 1.4965
Epoch: 8/25... Step: 1420... Loss: 1.3876... Val Loss: 1.4954
Epoch: 9/25... Step: 1430... Loss: 1.4046... Val Loss: 1.4889
Epoch: 9/25... Step: 1440... Loss: 1.3457... Val Loss: 1.4873
Epoch: 9/25... Step: 1450... Loss: 1.3604... Val Loss: 1.4860
Epoch: 9/25... Step: 1460... Loss: 1.3820... Val Loss: 1.4802
Epoch: 9/25... Step: 1470... Loss: 1.3668... Val Loss: 1.4837
Epoch: 9/25... Step: 1480... Loss: 1.3557... Val Loss: 1.4812
Epoch: 9/25... Step: 1490... Loss: 1.3854... Val Loss: 1.4831
Epoch: 9/25... Step: 1500... Loss: 1.3990... Val Loss: 1.4723
Epoch: 9

Epoch: 15/25... Step: 2660... Loss: 1.2461... Val Loss: 1.3745
Epoch: 15/25... Step: 2670... Loss: 1.2626... Val Loss: 1.3778
Epoch: 16/25... Step: 2680... Loss: 1.2097... Val Loss: 1.3718
Epoch: 16/25... Step: 2690... Loss: 1.1998... Val Loss: 1.3763
Epoch: 16/25... Step: 2700... Loss: 1.2040... Val Loss: 1.3698
Epoch: 16/25... Step: 2710... Loss: 1.2180... Val Loss: 1.3725
Epoch: 16/25... Step: 2720... Loss: 1.2182... Val Loss: 1.3711
Epoch: 16/25... Step: 2730... Loss: 1.2211... Val Loss: 1.3716
Epoch: 16/25... Step: 2740... Loss: 1.2236... Val Loss: 1.3747
Epoch: 16/25... Step: 2750... Loss: 1.2327... Val Loss: 1.3766
Epoch: 16/25... Step: 2760... Loss: 1.2135... Val Loss: 1.3687
Epoch: 16/25... Step: 2770... Loss: 1.1591... Val Loss: 1.3756
Epoch: 16/25... Step: 2780... Loss: 1.2153... Val Loss: 1.3684
Epoch: 16/25... Step: 2790... Loss: 1.2006... Val Loss: 1.3708
Epoch: 16/25... Step: 2800... Loss: 1.2159... Val Loss: 1.3664
Epoch: 16/25... Step: 2810... Loss: 1.1844... Val Loss:

Epoch: 23/25... Step: 3970... Loss: 1.1109... Val Loss: 1.3426
Epoch: 23/25... Step: 3980... Loss: 1.1000... Val Loss: 1.3393
Epoch: 23/25... Step: 3990... Loss: 1.1773... Val Loss: 1.3373
Epoch: 23/25... Step: 4000... Loss: 1.1495... Val Loss: 1.3447
Epoch: 23/25... Step: 4010... Loss: 1.1545... Val Loss: 1.3402
Epoch: 23/25... Step: 4020... Loss: 1.1130... Val Loss: 1.3417
Epoch: 23/25... Step: 4030... Loss: 1.1317... Val Loss: 1.3400
Epoch: 23/25... Step: 4040... Loss: 1.1373... Val Loss: 1.3419
Epoch: 23/25... Step: 4050... Loss: 1.1283... Val Loss: 1.3434
Epoch: 23/25... Step: 4060... Loss: 1.1407... Val Loss: 1.3374
Epoch: 23/25... Step: 4070... Loss: 1.1412... Val Loss: 1.3511
Epoch: 23/25... Step: 4080... Loss: 1.1311... Val Loss: 1.3405
Epoch: 23/25... Step: 4090... Loss: 1.1337... Val Loss: 1.3411
Epoch: 24/25... Step: 4100... Loss: 1.1571... Val Loss: 1.3445
Epoch: 24/25... Step: 4110... Loss: 1.1055... Val Loss: 1.3329
Epoch: 24/25... Step: 4120... Loss: 1.1130... Val Loss:

In [28]:
model_name = 'lstm_epoch.net'

checkpoint = {'n_hidden': net.n_hidden,
              'n_layers': net.n_layers,
              'state_dict': net.state_dict(),
              'tokens': net.chars}

with open(model_name, 'wb') as f:
    torch.save(checkpoint, f)

In [29]:
def sample(net, size, prime='The', top_k=None, cuda=False):
        
    if cuda:
        net.cuda()
    else:
        net.cpu()

    net.eval()
    
    # First off, run through the prime characters
    chars = [ch for ch in prime]
    
    h = net.init_hidden(1)
    
    for ch in prime:
        char, h = net.predict(ch, h, cuda=cuda, top_k=top_k)

    chars.append(char)
    
    # Now pass in the previous character and get a new one
    for ii in range(size):
        
        char, h = net.predict(chars[-1], h, cuda=cuda, top_k=top_k)
        chars.append(char)

    return ''.join(chars)

In [32]:
print(sample(net, 2000, prime='Patty', top_k=5, cuda=True))

Patty, and something
true, though simply he had the course, and he came into the figure
what his face was impossible to be carried.

And that he had been told his white the stopping seemed the cloth to go out
of the most since and her first thing. He could not help having become that
her husband had taken up to think of it, and was at a lady, and saw
that he had a little arrived, he could not see her, she had bound, and
had never seen and chestly it was that his highel peacor was at the first
time that her hands and that in his hostility with the carriages and finests
had tasted, she could not tell that show hardly of such the same thing that
it was time to be calm, and a man--and will to be the same somathing about
it, and he was not to blame from her state of the prince shook her forehead
and sort of theater. The satisfaction had come.

To him when she was to be said to a luduroth, and all that inseed one words
and a child, the crowd, and the searer had said in society.

The cried an