In [1]:
import text_data
import wikitext_data
import networks
import numpy as np
import torch
import torch.nn as nn
from tqdm import tqdm

# Data preprocessing and model compiling

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
corpus = wikitext_data.Corpus(device)

C:\Users\kajud\Documents\GitHub\Onlab1\Wavenet\.data\wikitext-2-v1.zip: 100%|██████| 4.48M/4.48M [00:09<00:00, 469kB/s]


In [5]:
n_tokens = len(corpus.vocab.stoi)

In [18]:
model = networks.WaveNet(layer_size = 6, stack_size = 3, n_tokens = n_tokens, in_channels = 256, res_channels = 512).to(device)

In [19]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.002)

In [20]:
def train(inputs, targets):
        """
        Train 1 time
        :param inputs: Tensor[batch, timestep, channels]
        :param targets: Torch tensor [batch, timestep, channels]
        :return: float loss
        """
        preds, logits = model(inputs)

        loss = criterion(logits.view(-1, n_tokens),
                         targets.long().view(-1))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        return loss.item()

In [21]:
model.receptive_fields

190

In [27]:
train_data = wikitext_data.TextDataset(corpus.train, in_out_overlap = True, input_size = model.receptive_fields, seq_len = 280, stride = 280 - model.receptive_fields)

In [28]:
train_loader = torch.utils.data.DataLoader(train_data, batch_size = 32, shuffle = False)

# Training

In [29]:
epochs = 1

In [30]:
torch.cuda.empty_cache()

In [31]:
model.train()
for e in range(epochs):
    for b in tqdm(train_loader):
        inp, out = b
        loss = train(inp, out)
        
    print(f'[{e + 1}/{epochs}] loss: {loss}')

100%|████████████████████████████████████████████████████████████████████████████████| 712/712 [15:19<00:00,  1.29s/it]

[1/1] loss: 5.917958736419678





# Evaluating

In [32]:
sentence = corpus.test[0:190].unsqueeze(0).cuda()
generated = sentence

In [34]:
print("Generating text with seed:")
' '.join([corpus.vocab.itos[i] for i in generated.tolist()[0]])

Generating text with seed:


"= robert <unk> = robert <unk> is an english film , television and theatre actor . he had a guest @-@ starring role on the television series the bill in 2000 . this was followed by a starring role in the play herons written by simon stephens , which was performed in 2001 at the royal court theatre . he had a guest role in the television series judge john <unk> in 2002 . in 2004 <unk> landed a role as craig in the episode teddy ' s story of the television series the long firm he starred alongside actors mark strong and derek jacobi . he was cast in the 2005 theatre productions of the philip ridley play mercury fur , which was performed at the drum theatre in plymouth and the <unk> <unk> factory in london . he was directed by john <unk> and starred alongside ben <unk> , shane <unk> , harry kent , fraser <unk> , sophie stanton and dominic hall . in 2006 , <unk> starred alongside <unk> in the play <unk> written by mark <unk> . he appeared on a 2006 episode of the"

In [37]:
sample_size = 190
softmax = nn.Softmax(dim = -1)
for i in range(50): # Generating 10 consecutive words
    y_hats, _ = model(sentence)
    preds = torch.argmax(softmax(y_hats), dim = -1)
    generated = torch.cat((generated, preds), dim=1)
    sentence = generated[:,-sample_size:]

l_gen = generated.tolist()[0]
gen_text = ' '.join([corpus.vocab.itos[i] for i in l_gen])
print(gen_text)

= robert <unk> = robert <unk> is an english film , television and theatre actor . he had a guest @-@ starring role on the television series the bill in 2000 . this was followed by a starring role in the play herons written by simon stephens , which was performed in 2001 at the royal court theatre . he had a guest role in the television series judge john <unk> in 2002 . in 2004 <unk> landed a role as craig in the episode teddy ' s story of the television series the long firm he starred alongside actors mark strong and derek jacobi . he was cast in the 2005 theatre productions of the philip ridley play mercury fur , which was performed at the drum theatre in plymouth and the <unk> <unk> factory in london . he was directed by john <unk> and starred alongside ben <unk> , shane <unk> , harry kent , fraser <unk> , sophie stanton and dominic hall . in 2006 , <unk> starred alongside <unk> in the play <unk> written by mark <unk> . he appeared on a 2006 episode of the will national will national