In [1]:
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim

In [2]:
train_on_gpu = torch.cuda.is_available()
print(train_on_gpu)
with open('assets/jane-eyre.txt', 'r') as f:
    text = f.read()

text[:100]

True


'Jane Eyre\n\nthat parent of crime—an insult to piety, that regent of God\non earth. I would suggest to '

In [3]:
text = text.lower()
chars = tuple(set(text))
int_to_char = dict(enumerate(chars))
char_to_int = {ch: i for i, ch in int_to_char.items()}
print(chars, len(chars))
encoded = np.array([char_to_int[char] for char in text])
encoded[:100]

('8', 't', '0', 'x', ';', 'q', '5', 'p', '(', 'm', 'z', '!', ')', '7', ',', '.', ':', '’', '6', 'l', '&', 'd', 'w', 'u', 'e', '\n', 'a', '3', 'o', 'v', 'b', ' ', '\x0c', 'r', '‘', '—', 'f', 's', '-', '4', '1', 'j', 'h', '2', 'y', 'i', '?', '9', 'n', 'c', 'g', 'k') 52


array([41, 26, 48, 24, 31, 24, 44, 33, 24, 25, 25,  1, 42, 26,  1, 31,  7,
       26, 33, 24, 48,  1, 31, 28, 36, 31, 49, 33, 45,  9, 24, 35, 26, 48,
       31, 45, 48, 37, 23, 19,  1, 31,  1, 28, 31,  7, 45, 24,  1, 44, 14,
       31,  1, 42, 26,  1, 31, 33, 24, 50, 24, 48,  1, 31, 28, 36, 31, 50,
       28, 21, 25, 28, 48, 31, 24, 26, 33,  1, 42, 15, 31, 45, 31, 22, 28,
       23, 19, 21, 31, 37, 23, 50, 50, 24, 37,  1, 31,  1, 28, 31])

In [4]:
def one_hot_vector(arr, n_labels):
    one_hot = np.zeros((arr.size, n_labels), dtype=np.float32)
    one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1

    one_hot = one_hot.reshape((*arr.shape,n_labels))

    return one_hot

In [5]:
one_hot_vector(encoded[:3], len(chars))

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        1., 0., 0., 0.]], dtype=float32)

In [6]:
def get_batches(arr, seq_len, batch_size):
    batch_total = seq_len * batch_size
    total_batches = len(arr)//batch_total
    arr = arr[:batch_total*total_batches]
    arr = arr.reshape((batch_size, -1))
    for n in range(0, arr.shape[1], seq_len):
        x = arr[:, n:n+seq_len]
        y = np.zeros_like(x)
        try:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, n+seq_len]
        except IndexError:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]
        yield x, y 

In [7]:
batches = get_batches(encoded, 10, 50)
x, y = next(batches)

print("X: ", x[:10])
print("Y: ", y[:10])

X:  [[41 26 48 24 31 24 44 33 24 25]
 [33 24  7 24 26  1 24 21 31  1]
 [31 28 30 19 45 50 24 21 31  1]
 [ 3  7 33 24 37 37 45 29 24 31]
 [51 14 25 22 42 45 49 42 31 42]
 [ 4 31 26 31 30 33 45 24 36 31]
 [14 31 26 48 21 31  1 28 31 22]
 [31 36 28 33 31  1 42 24 31  1]
 [31 49 33 28 22 21 24 21 31 37]
 [ 9 30 24 33 24 21 31 21 24 37]]
Y:  [[26 48 24 31 24 44 33 24 25 25]
 [24  7 24 26  1 24 21 31  1 42]
 [28 30 19 45 50 24 21 31  1 28]
 [ 7 33 24 37 37 45 29 24 31 37]
 [14 25 22 42 45 49 42 31 42 26]
 [31 26 31 30 33 45 24 36 31 24]
 [31 26 48 21 31  1 28 31 22 42]
 [36 28 33 31  1 42 24 31  1 33]
 [49 33 28 22 21 24 21 31 37 49]
 [30 24 33 24 21 31 21 24 37 49]]


In [8]:
class ModelLSTM(nn.Module):
    def __init__(self, unique_chars, n_hidden, n_layers, drop_prob = 0.5, lr = 0.005):
        super().__init__()

        self.n_hidden = n_hidden
        self.n_layers = n_layers
        self.drop_prob = drop_prob
        self.lr = lr

        self.chars = unique_chars
        self.int_to_char = dict(enumerate(self.chars))
        self.char_to_int = {ch:ii for ii, ch in int_to_char.items()}

        self.lstm = nn.LSTM(len(self.chars), n_hidden, n_layers, batch_first = True, dropout = drop_prob)

        self.dropout = nn.Dropout(p=self.drop_prob)
        self.fc = nn.Linear(n_hidden, len(self.chars))

    def forward(self, x, hidden):

        r_out, hidden = self.lstm(x, hidden)
        out = self.dropout(r_out)

        out = out.contiguous().view(-1, self.n_hidden)

        out = self.fc(out)

        return out, hidden

    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data

        hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda(),
                  weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda())
        
        return hidden

In [9]:
def train(model, data, checkpoint, epoches = 50, batch_size = 130, seq_len = 105, lr = 0.005, print_step = 50, clip = 5, data_frac = 0.2):
    model.train()

    optimizer = optim.Adam(model.parameters(), lr = lr)
    criterion = nn.CrossEntropyLoss()

    val_idx = int(len(data) * (1- data_frac))
    data, val_data = data[:val_idx], data[val_idx: ]

    model.cuda()
    n_chars = len(model.chars)
    counter = 0
    val_loss_min = np.Inf
    for epoch in range(epoches + 1):
        hidden = model.init_hidden(batch_size)

        for x, y in get_batches(data, seq_len, batch_size):
            counter += 1
            x = one_hot_vector(x, n_chars)
            x, y = torch.from_numpy(x).cuda(), torch.from_numpy(y).cuda()

            hidden = tuple([h.data for h in hidden])
            optimizer.zero_grad()
            output, hidden = model(x, hidden)
            loss = criterion(output, y.view(batch_size*seq_len).long())
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), clip)
            optimizer.step()

            if counter%print_step == 0:
                model.eval()
                val_hidden = model.init_hidden(batch_size)
                val_loss = []

                for x, y in get_batches(val_data, seq_len, batch_size):
                    x = one_hot_vector(x, n_chars)
                    x, y = torch.from_numpy(x).cuda(), torch.from_numpy(y).cuda()

                    hidden = tuple([h.data for h in hidden])
                    output, val_hidden = model(x, hidden)
                    v_loss = criterion(output, y.view(batch_size * seq_len).long())
                    val_loss.append(v_loss.item())
                val_loss_mean = np.mean(val_loss)
                if val_loss_min > val_loss_mean:
                    print("SAving the model")
                    checkpoint['state_dict'] = model.state_dict()
                    val_loss_min = val_loss_mean
                model.train()
                print("Epoch: {}/{}\tstep: {}\tloss: {:.6f}\tval_loss: {:.6f}".format(epoch, epoches, counter, loss, val_loss_mean))
    return checkpoint

In [10]:
n_hidden = 500
n_layers = 2

model = ModelLSTM(chars, n_hidden, n_layers)
print(model)

ModelLSTM(
  (lstm): LSTM(52, 500, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=500, out_features=52, bias=True)
)


In [11]:
batch_size = 130
seq_len = 105
epoches = 50
checkpoint = {}
checkpoint = train(model, encoded, checkpoint, epoches, batch_size, seq_len)

SAving the model
Epoch: 0/50	step: 50	loss: 3.142976	val_loss: 3.131405
SAving the model
Epoch: 1/50	step: 100	loss: 3.051948	val_loss: 3.045013
SAving the model
Epoch: 2/50	step: 150	loss: 2.847439	val_loss: 2.841233
SAving the model
Epoch: 3/50	step: 200	loss: 2.478062	val_loss: 2.426440
SAving the model
Epoch: 4/50	step: 250	loss: 2.299975	val_loss: 2.242040
SAving the model
Epoch: 4/50	step: 300	loss: 2.208694	val_loss: 2.116657
SAving the model
Epoch: 5/50	step: 350	loss: 2.109591	val_loss: 2.024257
SAving the model
Epoch: 6/50	step: 400	loss: 2.028883	val_loss: 1.951760
SAving the model
Epoch: 7/50	step: 450	loss: 1.957888	val_loss: 1.893900
SAving the model
Epoch: 8/50	step: 500	loss: 1.921939	val_loss: 1.845848
SAving the model
Epoch: 9/50	step: 550	loss: 1.882979	val_loss: 1.804886
SAving the model
Epoch: 9/50	step: 600	loss: 1.878345	val_loss: 1.763526
SAving the model
Epoch: 10/50	step: 650	loss: 1.813301	val_loss: 1.728702
SAving the model
Epoch: 11/50	step: 700	loss: 1.763

In [12]:
model_name = 'lstm_char.net'

checkpoint['n_hidden'] = model.n_hidden
checkpoint['n_layers'] = model.n_layers
checkpoint['unique_chars'] = model.chars
with open(model_name, 'wb') as f:
    torch.save(checkpoint, f)

In [13]:
import torch.nn.functional as F

In [14]:
def predict(model, char, hidden = None, top_k = None):
    x = np.array([[model.char_to_int[char]]])
    x = one_hot_vector(x, len(model.chars))
    inputs = torch.from_numpy(x).cuda()

    hidden = tuple([h for h in hidden])
    out, hidden = model(inputs, hidden)

    p= F.softmax(out, dim = 1).data
    p = p.cpu()

    if top_k is None:
        top_ch = np.arange(len(model.chars))
    else:
        p, top_ch = p.topk(top_k)
        top_ch = top_ch.numpy().squeeze()
    p = p.numpy().squeeze()
    char = np.random.choice(top_ch, p = p/p.sum())

    return model.int_to_char[char], hidden

In [15]:
def sample(model, size, prime="it was", top_k = None):
    model.cuda()
    model.eval()
    chars = [ch for ch in prime]
    hidden = model.init_hidden(1)
    for ch in prime:
        char, hidden = predict(model, ch, hidden, top_k=top_k)
    chars.append(char)

    for ii in range(size):
        char, hidden = predict(model, chars[-1], hidden, top_k=top_k)
        chars.append(char)

    return ''.join(chars)

In [16]:
with open('lstm_char.net', 'rb') as f:
    checkpoint = torch.load(f)

loaded = ModelLSTM(checkpoint['unique_chars'], n_hidden=checkpoint['n_hidden'], n_layers=checkpoint['n_layers'])
loaded.load_state_dict(checkpoint['state_dict'])

<All keys matched successfully>

In [18]:
print(sample(loaded, 2000, top_k=5, prime="that guy loves her"))

that guy loves here and second
his cheek and class; i wish to say i see that they were at all rochester, that half thought to assume to see, seemed to
him in silence: the character of the subject i was sure, as i had better. the
fine proprietor was to be conscientively on a servant. in
short that i was often by the pity-moon supposed. i saw a girl watched
the last to me, sometimes and i continued. then stopped with the
ploom was the first to be called over the casements: at this thing i had been ready to
strength to the fire and country, but she liked a lady in miss miller and mind, at least, the toil, such could be sure,
where the desire of the face of me. he had been too shrank as a strange trees with
that candle and serve to an expression of those way, that the closet
i had seen all was, and she went up to as its fierle. he could not see an and shape that this subject, i suppose,
was indifficult to be too like herself. i was to be
thinking and could see this thought of a face. i liv