In [1]:
import text_data
import networks
import numpy as np
import torch
import torch.nn as nn
from tqdm import tqdm

# Data preprocessing and model compiling

In [2]:
corpus = text_data.Corpus("data/wikitext")

In [3]:
n_tokens = len(corpus.dictionary)

In [4]:
model = networks.WaveNet(layer_size = 6, stack_size = 3, n_tokens = n_tokens, in_channels = 256, res_channels = 512)
if torch.cuda.is_available():
    model.cuda()

In [5]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.002)

In [6]:
def train(inputs, targets):
        """
        Train 1 time
        :param inputs: Tensor[batch, timestep, channels]
        :param targets: Torch tensor [batch, timestep, channels]
        :return: float loss
        """
        preds, logits = model(inputs)

        loss = criterion(logits.view(-1, n_tokens),
                         targets.long().view(-1))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        return loss.item()

In [7]:
model.receptive_fields

190

In [8]:
train_data = text_data.TextDataset(corpus.train, model.receptive_fields, sample_size = 280)

In [9]:
train_loader = torch.utils.data.DataLoader(train_data, batch_size = 32, shuffle = False)

# Training

In [10]:
epochs = 1

In [11]:
torch.cuda.empty_cache()

In [12]:
model.train()
for e in range(epochs):
    for b in tqdm(train_loader):
        inp, out = b
        loss = train(inp, out)
        
    print(f'[{e + 1}/{epochs}] loss: {loss}')

100%|████████████████████████████████████████████████████████████████████████████████| 718/718 [18:18<00:00,  1.53s/it]

[1/1] loss: 6.704344749450684





# Evaluating

In [13]:
sentence = corpus.test[0:190].unsqueeze(0).cuda()
generated = sentence

In [14]:
decoder = corpus.dictionary.idx2word

In [15]:
print("Generating text with seed:")
' '.join([decoder[i] for i in generated.tolist()[0]])

Generating text with seed:


'<eos> = Robert Boulter = <eos> <eos> Robert Boulter is an English film , television and theatre actor . He had a guest @-@ starring role on the television series The Bill in 2000 . This was followed by a starring role in the play Herons written by Simon Stephens , which was performed in 2001 at the Royal Court Theatre . He had a guest role in the television series Judge John Deed in 2002 . In 2004 Boulter landed a role as " Craig " in the episode " Teddy \'s Story " of the television series The Long Firm ; he starred alongside actors Mark Strong and Derek Jacobi . He was cast in the 2005 theatre productions of the Philip Ridley play Mercury Fur , which was performed at the Drum Theatre in Plymouth and the Menier Chocolate Factory in London . He was directed by John Tiffany and starred alongside Ben Whishaw , Shane Zaza , Harry Kent , Fraser Ayres , Sophie Stanton and Dominic Hall . <eos> In 2006 , Boulter starred alongside Whishaw in the play Citizenship written by Mark Ravenhill .'

In [18]:
sample_size = 190
softmax = nn.Softmax(dim = -1)
for i in range(50): # Generating 10 consecutive words
    y_hats, _ = model(sentence)
    preds = torch.argmax(softmax(y_hats), dim = -1)
    generated = torch.cat((generated, preds), dim=1)
    sentence = generated[:,-sample_size:]

l_gen = generated.tolist()[0]
gen_text = ' '.join([decoder[i] for i in l_gen])
print(gen_text)

<eos> = Robert Boulter = <eos> <eos> Robert Boulter is an English film , television and theatre actor . He had a guest @-@ starring role on the television series The Bill in 2000 . This was followed by a starring role in the play Herons written by Simon Stephens , which was performed in 2001 at the Royal Court Theatre . He had a guest role in the television series Judge John Deed in 2002 . In 2004 Boulter landed a role as " Craig " in the episode " Teddy 's Story " of the television series The Long Firm ; he starred alongside actors Mark Strong and Derek Jacobi . He was cast in the 2005 theatre productions of the Philip Ridley play Mercury Fur , which was performed at the Drum Theatre in Plymouth and the Menier Chocolate Factory in London . He was directed by John Tiffany and starred alongside Ben Whishaw , Shane Zaza , Harry Kent , Fraser Ayres , Sophie Stanton and Dominic Hall . <eos> In 2006 , Boulter starred alongside Whishaw in the play Citizenship written by Mark Ravenhill . rest