# Problem 7: Building an GRU from Scratch 

In [1]:
from fastai import *
from fastai.text.all import *
from fastai.text import *

### Data preparation

In [2]:
path = untar_data(URLs.HUMAN_NUMBERS)

In [3]:
path.ls()

(#2) [Path('/Users/lberelidze/.fastai/data/human_numbers/train.txt'),Path('/Users/lberelidze/.fastai/data/human_numbers/valid.txt')]

In [4]:
lines = L()
with open(path/'train.txt') as f: lines += L(*f.readlines())
with open(path/'valid.txt') as f: lines += L(*f.readlines())
lines

(#9998) ['one \n','two \n','three \n','four \n','five \n','six \n','seven \n','eight \n','nine \n','ten \n'...]

In [5]:
text = ' . '.join([l.strip() for l in lines])
text[:100]

'one . two . three . four . five . six . seven . eight . nine . ten . eleven . twelve . thirteen . fo'

In [6]:
tokens = text.split(' ')
tokens[:10]

['one', '.', 'two', '.', 'three', '.', 'four', '.', 'five', '.']

In [7]:
vocab = L(*tokens).unique()
vocab

(#30) ['one','.','two','three','four','five','six','seven','eight','nine'...]

In [8]:
word2idx = {w:i for i,w in enumerate(vocab)}
nums = L(word2idx[i] for i in tokens)
nums

(#63095) [0,1,2,1,3,1,4,1,5,1...]

In [9]:
bs = 64

In [10]:
def group_chunks(ds, bs):
    m = len(ds) // bs
    new_ds = L()
    for i in range(m): new_ds += L(ds[i + m*j] for j in range(bs))
    return new_ds

In [11]:
sl = 16
seqs = L((tensor(nums[i:i+sl]), tensor(nums[i+1:i+sl+1])) for i in range(0,len(nums)-sl-1,sl))
cut = int(len(seqs) * 0.8)
dls = DataLoaders.from_dsets(
    group_chunks(seqs[:cut], bs),
    group_chunks(seqs[cut:], bs),
    bs=bs,
    drop_last=True,
    shuffle=False
)

### LSTM 

In [12]:
class LMModel7(Module):
    def __init__(self, vocab_sz, n_hidden, n_layers, p):
        self.i_h = nn.Embedding(vocab_sz, n_hidden)
        self.rnn = nn.LSTM(n_hidden, n_hidden, n_layers, batch_first=True)
        self.drop = nn.Dropout(p)
        self.h_o = nn.Linear(n_hidden, vocab_sz)
        self.h_o.weight = self.i_h.weight
        self.h = [torch.zeros(n_layers, bs, n_hidden) for _ in range(2)]

    def forward(self, x):
        raw,h = self.rnn(self.i_h(x), self.h)
        out = self.drop(raw)
        self.h = [h_.detach() for h_ in h]
        return self.h_o(out),raw,out

    def reset(self):
        for h in self.h: h.zero_()

In [13]:
learn = TextLearner(dls, LMModel7(len(vocab), 64, 2, 0.4),
    loss_func=CrossEntropyLossFlat(), metrics=accuracy)

In [14]:
learn.fit_one_cycle(25, 1e-2, wd=0.1)

epoch,train_loss,valid_loss,accuracy,time
0,2.696619,2.188272,0.43514,00:02
1,1.847599,1.431532,0.576335,00:02
2,1.150079,0.868134,0.744385,00:02
3,0.625532,0.698771,0.82373,00:02
4,0.340576,0.556864,0.842122,00:02
5,0.19561,0.447286,0.872884,00:02
6,0.117412,0.467264,0.872233,00:02
7,0.078261,0.369614,0.891113,00:02
8,0.061542,0.507591,0.871663,00:02
9,0.048282,0.382078,0.891764,00:02


### GRU

In [19]:
class GRUModel(Module):
    def __init__(self, vocab_sz, n_hidden, n_layers, p):
        self.i_h = nn.Embedding(vocab_sz, n_hidden)
        self.rnn = nn.GRU(n_hidden, n_hidden, n_layers, batch_first=True)
        self.drop = nn.Dropout(p)
        self.h_o = nn.Linear(n_hidden, vocab_sz)
        self.h_o.weight = self.i_h.weight
        self.h = torch.zeros(n_layers, bs, n_hidden)

    def forward(self, x):
        raw,h = self.rnn(self.i_h(x), self.h)
        out = self.drop(raw)
        self.h = h.detach()
        return self.h_o(out),raw,out

    def reset(self):
        for h in self.h: h.zero_()

In [20]:
learn = TextLearner(dls, GRUModel(len(vocab), 64, 2, 0.4),
    loss_func=CrossEntropyLossFlat(), metrics=accuracy)

In [21]:
learn.fit_one_cycle(32, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,2.722024,2.197563,0.445068,00:02
1,1.961179,1.612515,0.547282,00:02
2,1.408881,1.195055,0.701497,00:02
3,0.856161,0.652798,0.821859,00:02
4,0.477122,0.513192,0.867513,00:02
5,0.277914,0.669675,0.850586,00:02
6,0.195327,0.529508,0.868815,00:02
7,0.126527,0.468893,0.885742,00:02
8,0.104583,0.611902,0.862467,00:02
9,0.081242,0.418926,0.900391,00:02
