In [1]:
%load_ext autoreload
%autoreload 2

## Load wikitext-2 dataset

In [2]:
import os

data_dir = os.path.join(os.getcwd(), '..', 'data', 'wikitext-2-raw')
train_path = os.path.join(data_dir, 'wiki.test.raw')

In [3]:
text = open(train_path, encoding='utf8').read()

In [4]:
text[:500]

' \n = Robert Boulter = \n \n Robert Boulter is an English film , television and theatre actor . He had a guest @-@ starring role on the television series The Bill in 2000 . This was followed by a starring role in the play Herons written by Simon Stephens , which was performed in 2001 at the Royal Court Theatre . He had a guest role in the television series Judge John Deed in 2002 . In 2004 Boulter landed a role as " Craig " in the episode " Teddy \'s Story " of the television series The Long Firm ; '

In [5]:
chars = list(sorted(set(text)))
# for padding
chars.insert(0, "\0")
''.join(chars)

'\x00\n !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^abcdefghijklmnopqrstuvwxyz£¥©°½ÁÆÉ×ßàáãäåæçèéêëíîñóôöúüćčěīłŌōŠūžǐǔǜəɛɪʊˈː̍͘ΠΩέαβδειλμνοπςστυωόПвдикнאבילרשתاتدسكلوڠगनरलषुेो्ảẩ‑–—’“”†‡…⁄₩₱→−♯のアイクグジダッドナブラル中为伊傳八利前勢史型士大学宝开律成戦春智望杜東民王甫田甲秘聖艦處衛解詩贈邵都鉄集魯'

In [6]:
vocab_size = len(chars)
print(f'Total characters: {vocab_size}')

Total characters: 260


In [7]:
# Maps
char_idx = {char: idx for idx, char in enumerate(chars)}
idx_char = {idx: char for idx, char in enumerate(chars)}

In [8]:
idx_text = [char_idx[char] for char in text]

In [9]:
print(idx_text[:100])
print([idx_char[idx] for idx in idx_text[:100]])

[2, 1, 2, 31, 2, 52, 78, 65, 68, 81, 83, 2, 36, 78, 84, 75, 83, 68, 81, 2, 31, 2, 1, 2, 1, 2, 52, 78, 65, 68, 81, 83, 2, 36, 78, 84, 75, 83, 68, 81, 2, 72, 82, 2, 64, 77, 2, 39, 77, 70, 75, 72, 82, 71, 2, 69, 72, 75, 76, 2, 14, 2, 83, 68, 75, 68, 85, 72, 82, 72, 78, 77, 2, 64, 77, 67, 2, 83, 71, 68, 64, 83, 81, 68, 2, 64, 66, 83, 78, 81, 2, 16, 2, 42, 68, 2, 71, 64, 67, 2]
[' ', '\n', ' ', '=', ' ', 'R', 'o', 'b', 'e', 'r', 't', ' ', 'B', 'o', 'u', 'l', 't', 'e', 'r', ' ', '=', ' ', '\n', ' ', '\n', ' ', 'R', 'o', 'b', 'e', 'r', 't', ' ', 'B', 'o', 'u', 'l', 't', 'e', 'r', ' ', 'i', 's', ' ', 'a', 'n', ' ', 'E', 'n', 'g', 'l', 'i', 's', 'h', ' ', 'f', 'i', 'l', 'm', ' ', ',', ' ', 't', 'e', 'l', 'e', 'v', 'i', 's', 'i', 'o', 'n', ' ', 'a', 'n', 'd', ' ', 't', 'h', 'e', 'a', 't', 'r', 'e', ' ', 'a', 'c', 't', 'o', 'r', ' ', '.', ' ', 'H', 'e', ' ', 'h', 'a', 'd', ' ']


In [11]:
import torch
from torch.utils import data
import numpy as np

class Dataset(data.Dataset):
    def __init__(self, text):
        
        chars = list(sorted(set(text)))
        chars.insert(0, "\0")
        
        vocab_size = len(chars)
        # Maps
        char_idx = {char: idx for idx, char in enumerate(chars)}
        idx_char = {idx: char for idx, char in enumerate(chars)}
        
        idx_text = [char_idx[char] for char in text]
        
        cs=3
        c1_dat = [idx_text[i]   for i in range(0, len(idx_text)-cs, cs)]
        c2_dat = [idx_text[i+1] for i in range(0, len(idx_text)-cs, cs)]
        c3_dat = [idx_text[i+2] for i in range(0, len(idx_text)-cs, cs)]
        c4_dat = [idx_text[i+3] for i in range(0, len(idx_text)-cs, cs)]
        
        x = np.vstack((np.stack(c1_dat), np.stack(c2_dat), np.stack(c3_dat))).T
        y = np.stack(c4_dat)
        
        self.x = torch.from_numpy(x)
        self.y = torch.from_numpy(y)
        
    def __len__(self):
        return self.x.shape[0]

    def __getitem__(self, index):
        return self.x[index], self.y[index]

In [12]:
dataset = Dataset(text)
data_generator = data.DataLoader(dataset, batch_size=12, shuffle=True)

In [16]:
n_hidden = 256
n_fac = 42
from torch import nn
import torch.functional as F

class Char3Model(nn.Module):
    def __init__(self, vocab_size, n_fac):
        super().__init__()
        self.e = nn.Embedding(vocab_size, n_fac)

        # The 'green arrow' from our diagram - the layer operation from input to hidden
        self.l_in = nn.Linear(n_fac, n_hidden)

        # The 'orange arrow' from our diagram - the layer operation from hidden to hidden
        self.l_hidden = nn.Linear(n_hidden, n_hidden)
        
        # The 'blue arrow' from our diagram - the layer operation from hidden to output
        self.l_out = nn.Linear(n_hidden, vocab_size)
        
    def forward(self, c1, c2, c3):
        in1 = F.relu(self.l_in(self.e(c1)))
        in2 = F.relu(self.l_in(self.e(c2)))
        in3 = F.relu(self.l_in(self.e(c3)))
        
        h = V(torch.zeros(in1.size()).cuda())
        h = F.tanh(self.l_hidden(h+in1))
        h = F.tanh(self.l_hidden(h+in2))
        h = F.tanh(self.l_hidden(h+in3))
        
        return F.log_softmax(self.l_out(h))

In [18]:
model = Char3Model(vocab_size, n_fac).cuda()
loss_fn = F.nll_loss()
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

AttributeError: module 'torch.functional' has no attribute 'nll_loss'

In [17]:
max_epochs = 10
for epoch in range(max_epochs):
    for x, y in data_generator:
        # Forward pass: compute predicted y by passing x to the model.
        y_pred = model(x)

        # Compute and print loss.
        loss = loss_fn(y_pred, y)
        print(t, loss.item())

        optimizer.zero_grad()

        # Backward pass: compute gradient of the loss with respect to model
        # parameters
        loss.backward()

        # Calling the step function on an Optimizer makes an update to its
        # parameters
        optimizer.step()

AttributeError: module 'torch.functional' has no attribute 'nll_loss'