In [1]:
import text_data
from CustomLSTM import CustomLSTM
import numpy as np
import torch
import torch.nn as nn
from tqdm import tqdm

# Data preprocessing and model compiling

In [2]:
corpus = text_data.Corpus("data/wikitext")

In [3]:
n_tokens = len(corpus.dictionary)
input_sz = 200
hidden_sz = 128
seq_length = 40
epochs = 1

In [4]:
# torch.cuda.is_available() checks and returns a Boolean True if a GPU is available, else it'll return False
is_cuda = torch.cuda.is_available()

# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.
if is_cuda:
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

In [5]:
model = nn.Sequential(
    nn.Embedding(n_tokens, input_sz),
    CustomLSTM(input_sz = input_sz, hidden_sz = hidden_sz, return_states = False, return_sequences = False),
    nn.Linear(hidden_sz, n_tokens)).float().to(device)

In [6]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.002)

In [7]:
def train(inputs, targets):
        """
        Train 1 time
        :param inputs: Tensor[batch, timestep, channels]
        :param targets: Torch tensor [batch, timestep, channels]
        :return: float loss
        """
        logits = model(inputs)

        loss = criterion(logits.view(-1, n_tokens),
                         targets.long().view(-1))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        return loss.item()

In [8]:
train_data = text_data.TextDataset(corpus.train, receptive_fields = seq_length, sample_size = seq_length)

In [9]:
train_loader = torch.utils.data.DataLoader(train_data, batch_size = 256, shuffle = False)

# Training

In [10]:
model.train()
for e in range(epochs):
    for b in tqdm(train_loader):
        inp, out = b
        loss = train(inp, out)
        
    print(f'[{e + 1}/{epochs}] loss: {loss}')

100%|██████████████████████████████████████████████████████████████████████████████| 8159/8159 [12:11<00:00, 11.16it/s]

[1/1] loss: 6.314547061920166





# Evaluating

In [12]:
sentence = corpus.test[0:40].unsqueeze(0).cuda()
generated = sentence

In [13]:
decoder = corpus.dictionary.idx2word

In [14]:
print("Generating text with seed:")
' '.join([decoder[i] for i in generated.tolist()[0]])

Generating text with seed:


'<eos> = Robert Boulter = <eos> <eos> Robert Boulter is an English film , television and theatre actor . He had a guest @-@ starring role on the television series The Bill in 2000 . This was followed by a'

In [19]:
sample_size = 40
softmax = nn.Softmax(dim = -1)
for i in range(50): # Generating 10 consecutive words
    y_hats = model(sentence)
    preds = torch.argmax(softmax(y_hats), dim = -1).unsqueeze(0)
    generated = torch.cat((generated, preds), dim=1)
    sentence = generated[:,-sample_size:]

l_gen = generated.tolist()[0]
gen_text = ' '.join([decoder[i] for i in l_gen])
print(gen_text)

<eos> = Robert Boulter = <eos> <eos> Robert Boulter is an English film , television and theatre actor . He had a guest @-@ starring role on the television series The Bill in 2000 . This was followed by a number of the main sequence of the main sequence . <eos> <eos> = = = = = = = = = = = = = = = = = = = = = = = = = = = = = <eos> <eos> The first recorded in the first game , the most common starling is a star , and the other of the star 's works . <eos> <eos> = = = = = = = = = = = = = = = = = = = = = = = = = = = = = <eos>
