In [14]:
%config Completer.use_jedi = False

# Problem 7: Building an GRU from Scratch 

In [1]:
from fastai import *
from fastai.text.all import *
from fastai.text import *

### Data preparation

In [2]:
path = untar_data(URLs.HUMAN_NUMBERS)

In [3]:
path.ls()

(#2) [Path('/Users/lberelidze/.fastai/data/human_numbers/train.txt'),Path('/Users/lberelidze/.fastai/data/human_numbers/valid.txt')]

In [4]:
lines = L()
with open(path/'train.txt') as f: lines += L(*f.readlines())
with open(path/'valid.txt') as f: lines += L(*f.readlines())
lines

(#9998) ['one \n','two \n','three \n','four \n','five \n','six \n','seven \n','eight \n','nine \n','ten \n'...]

In [5]:
text = ' . '.join([l.strip() for l in lines])
text[:100]

'one . two . three . four . five . six . seven . eight . nine . ten . eleven . twelve . thirteen . fo'

In [6]:
tokens = text.split(' ')
tokens[:10]

['one', '.', 'two', '.', 'three', '.', 'four', '.', 'five', '.']

In [7]:
vocab = L(*tokens).unique()
vocab

(#30) ['one','.','two','three','four','five','six','seven','eight','nine'...]

In [8]:
word2idx = {w:i for i,w in enumerate(vocab)}
nums = L(word2idx[i] for i in tokens)
nums

(#63095) [0,1,2,1,3,1,4,1,5,1...]

In [9]:
bs = 64

In [11]:
def group_chunks(ds, bs):
    m = len(ds) // bs
    new_ds = L()
    for i in range(m): new_ds += L(ds[i + m*j] for j in range(bs))
    return new_ds

In [13]:
sl = 16
seqs = L((tensor(nums[i:i+sl]), tensor(nums[i+1:i+sl+1])) for i in range(0,len(nums)-sl-1,sl))
cut = int(len(seqs) * 0.8)
dls = DataLoaders.from_dsets(
    group_chunks(seqs[:cut], bs),
    group_chunks(seqs[cut:], bs),
    bs=bs,
    drop_last=True,
    shuffle=False
)

### LSTM 

In [26]:
class LMModel7(Module):
    def __init__(self, vocab_sz, n_hidden, n_layers, p):
        self.i_h = nn.Embedding(vocab_sz, n_hidden)
        self.rnn = nn.LSTM(n_hidden, n_hidden, n_layers, batch_first=True)
        self.drop = nn.Dropout(p)
        self.h_o = nn.Linear(n_hidden, vocab_sz)
        self.h_o.weight = self.i_h.weight
        self.h = [torch.zeros(n_layers, bs, n_hidden) for _ in range(2)]

    def forward(self, x):
        raw,h = self.rnn(self.i_h(x), self.h)
        out = self.drop(raw)
        self.h = [h_.detach() for h_ in h]
        return self.h_o(out),raw,out

    def reset(self):
        for h in self.h: h.zero_()

In [28]:
learn = TextLearner(dls, LMModel7(len(vocab), 64, 2, 0.4),
    loss_func=CrossEntropyLossFlat(), metrics=accuracy)

In [29]:
learn.fit_one_cycle(15, 1e-2, wd=0.1)

epoch,train_loss,valid_loss,accuracy,time
0,2.509391,1.730979,0.494303,00:02
1,1.531583,1.176885,0.680501,00:02
2,0.817933,0.691484,0.835042,00:02
3,0.401794,0.560343,0.864258,00:02
4,0.201361,0.530603,0.880778,00:02
5,0.107395,0.530075,0.879069,00:02
6,0.065757,0.48618,0.894531,00:02
7,0.041751,0.569346,0.880208,00:02
8,0.031767,0.520494,0.890951,00:02
9,0.025729,0.500363,0.890381,00:02


### GRU

In [157]:
class GRUModel(Module):
    def __init__(self, vocab_sz, n_hidden, n_layers, p):
        self.i_h = nn.Embedding(vocab_sz, n_hidden)
        self.rnn = nn.GRU(n_hidden, n_hidden, n_layers, batch_first=True)
        self.drop = nn.Dropout(p)
        self.h_o = nn.Linear(n_hidden, vocab_sz)
        self.h_o.weight = self.i_h.weight
        self.h = torch.zeros(n_layers, bs, n_hidden)

    def forward(self, x):
        raw,h = self.rnn(self.i_h(x), self.h)
        out = self.drop(raw)
        self.h = h.detach()
        return self.h_o(out),raw,out

    def reset(self):
        for h in self.h: h.zero_()

In [158]:
learn = TextLearner(dls, GRUModel(len(vocab), 64, 2, 0.4),
    loss_func=CrossEntropyLossFlat(), metrics=accuracy)

In [159]:
learn.fit_one_cycle(10, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,2.401323,1.859045,0.474202,00:02
1,1.449953,1.304858,0.677327,00:02
2,0.715166,0.875463,0.798421,00:02
3,0.355609,0.814546,0.817057,00:02
4,0.189877,0.845866,0.824463,00:02
5,0.110245,0.805919,0.828939,00:02
6,0.070439,0.758953,0.840739,00:02
7,0.04741,0.78868,0.826253,00:02
8,0.035616,0.754902,0.83138,00:02
9,0.029286,0.783605,0.823161,00:02
