In [77]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
from keras.utils import to_categorical

In [37]:
with open("anna.txt", "r") as f:
    text = f.read()

In [56]:
# create the look-up dictionaries
int2char = dict(enumerate(tuple(set(text))))
char2int = {ch: int_ for (int_, ch) in int2char.items()}

# encode the text
encoded = np.array([char2int[char] for char in text])

In [60]:
def get_batches(arr, n_seqs, n_steps):
    '''Create a generator that returns batches of size
       n_seqs x n_steps from arr.
       
       Arguments
       ---------
       arr: Array you want to make batches from
       n_seqs: Batch size, the number of sequences per batch
       n_steps: Number of sequence steps per batch
    '''
    
    batch_size = n_seqs * n_steps
    n_batches = len(arr)//batch_size
    
    # Keep only enough characters to make full batches
    arr = arr[:n_batches * batch_size]
    # Reshape into n_seqs rows
    arr = arr.reshape((n_seqs, -1))
    
    for n in range(0, arr.shape[1], n_steps):
        # The features
        x = arr[:, n:n+n_steps]
        # The targets, shifted by one
        y = np.zeros_like(x)
        try:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, n+n_steps]
        except IndexError:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]
        yield x, y

In [146]:
class CharLSTM(nn.Module):
    def __init__(self, hidden_dim, n_layers, input_dim, output_dim, drop_p):
        super(CharLSTM, self).__init__()
        
        self.n_layers = n_layers
        self.hidden_dim = hidden_dim
        
        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=hidden_dim, 
                            num_layers=2, batch_first=True, dropout=drop_p)
        
        self.fc = nn.Linear(in_features=hidden_dim, out_features=output_dim)
        
        self.dropout = nn.Dropout(p=drop_p)
        
    def forward(self, x):
        
        x, (h, c) = self.lstm(x)
        
        x = self.dropout(x)
        
        x = x.view(x.size()[0] * x.size()[1], -1)
        
        x = self.fc(x)
        
        return x
    
    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        return (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_(),
                weight.new(self.n_layers, batch_size, self.hidden_dim).zero_())

In [147]:
net = CharLSTM(hidden_dim=512, n_layers=2, input_dim=len(char2int), output_dim=len(int2char), drop_p=0.5)

In [149]:
net.train()

opt = optim.Adam(net.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

n_chars = len(char2int)

counter = 0

for epoch in range(3):
    
#     h = net.init_hidden(batch_size=128)
    
    for x, y in get_batches(encoded, n_seqs=128, n_steps=100):
        counter += 1
        
        x = to_categorical(x, num_classes=n_chars)
        
        inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
        
        opt.zero_grad()
        
        output = net(inputs)
        
        loss = criterion(output, targets.view(128*100))
        
        loss.backward()
        
        opt.step()
        
        if counter % 10 == 0: 
            print("Loss: ", loss.item())

Loss:  4.416091442108154
Loss:  4.376684665679932
Loss:  4.321014404296875
Loss:  4.181906700134277
Loss:  3.660640239715576
Loss:  3.5493686199188232
Loss:  3.3289577960968018
Loss:  3.272291660308838
Loss:  3.2953405380249023
Loss:  3.303837537765503
Loss:  3.2842986583709717
Loss:  3.261878252029419
Loss:  3.258011817932129


KeyboardInterrupt: 