In [1]:
from fastbook import *
from fastai.text.all import *

In [2]:
path = untar_data(URLs.HUMAN_NUMBERS)

In [3]:
path.ls()

(#2) [Path('/root/.fastai/data/human_numbers/valid.txt'),Path('/root/.fastai/data/human_numbers/train.txt')]

In [4]:
lines = L()
with open(path/'train.txt') as f:
    lines += L(*f.readlines())

with open(path/'valid.txt') as f:
    lines += L(*f.readlines())
    
lines

(#9998) ['one \n','two \n','three \n','four \n','five \n','six \n','seven \n','eight \n','nine \n','ten \n'...]

In [12]:
text = ' . '.join([t.strip() for t in lines])
text[:100]

'one . two . three . four . five . six . seven . eight . nine . ten . eleven . twelve . thirteen . fo'

In [13]:
tokens = text.split(' ')
tokens[:10]

['one', '.', 'two', '.', 'three', '.', 'four', '.', 'five', '.']

In [14]:
vocab = L(*tokens).unique()
vocab

(#30) ['one','.','two','three','four','five','six','seven','eight','nine'...]

In [16]:
word2idx = {w:i for i,w in enumerate(vocab)}
nums = L(word2idx[w] for w in tokens)
nums

(#63095) [0,1,2,1,3,1,4,1,5,1...]

In [29]:
seqs = L(
    (tensor(nums[i:i+3]), nums[i+3]) for i in range(0, len(nums) - 4, 3)
    )

In [30]:
seqs

(#21031) [(tensor([0, 1, 2]), 1),(tensor([1, 3, 1]), 4),(tensor([4, 1, 5]), 1),(tensor([1, 6, 1]), 7),(tensor([7, 1, 8]), 1),(tensor([1, 9, 1]), 10),(tensor([10,  1, 11]), 1),(tensor([ 1, 12,  1]), 13),(tensor([13,  1, 14]), 1),(tensor([ 1, 15,  1]), 16)...]

In [31]:
bs = 64
cut = int(len(seqs)*0.8)
dls = DataLoaders.from_dsets(seqs[:cut], seqs[cut:], bs=64, shuffle=False)

In [32]:
class LMModel1(Module):
    def __init__(self, vocab_size, n_hidden):
        self.i_h = nn.Embedding(vocab_size, n_hidden)
        self.h_h = nn.Linear(n_hidden,n_hidden)
        self.h_o = nn.Linear(n_hidden, vocab_size)
        
    def forward(self, x):
        # shape of x: 64x1x3 tensor
        h = F.relu(
            self.h_h(
                # first word's embedding
                self.i_h(x[:,0])
            )
        )
        
        h = h + self.i_h(x[:,1])
        h = F.relu(
            self.h_h(h)
        )
        
        h = h + self.i_h(x[:,2])
        h = F.relu(
            self.h_h(h)
        )
        h = self.h_o(h)
        return h
        

In [33]:
model1 = LMModel1(len(vocab), 60)

In [35]:
x,y = dls.one_batch()
x.shape, y.shape

(torch.Size([64, 3]), torch.Size([64]))

In [38]:
model1(x).shape

torch.Size([64, 30])

In [40]:
learn = Learner(dls, LMModel1(len(vocab), 70), loss_func = F.cross_entropy, metrics=accuracy)
learn.fit_one_cycle(4,1e-3)

epoch,train_loss,valid_loss,accuracy,time
0,1.825336,1.859318,0.466366,00:03
1,1.405354,1.690724,0.470406,00:02
2,1.404521,1.597311,0.493939,00:02
3,1.3687,1.601832,0.495603,00:02


In [42]:
x,y = first(dls.valid)
y.shape[0]

64

In [52]:
n, counts = 0, torch.zeros(len(vocab))
for x,y in dls.valid:
    n += y.shape[0]
    for i in range(len(vocab)):
        counts[i] += (y==i).long().sum()
idx = torch.argmax(counts)
idx, vocab[idx.item()], counts[idx].item()/n

(tensor(29), 'thousand', 0.15165200855716662)

In [53]:
class LMModel2(Module):
    def __init__(self, vocab_size, n_hidden):
        self.i_h = nn.Embedding(vocab_size,n_hidden)
        self.h_h = nn.Linear(n_hidden,n_hidden)
        self.h_o = nn.Linear(n_hidden, vocab_size)
        
    def forward(self, x):
        h = 0
        for i in range(3):
            h = h + self.i_h(x[:,i])
            h = F.relu(self.h_h(h))
            
        return self.h_o(h)

In [56]:
learn = Learner(dls, LMModel2(len(vocab), 70), loss_func = F.cross_entropy, metrics=accuracy)
learn.fit_one_cycle(4,1e-3)

epoch,train_loss,valid_loss,accuracy,time
0,1.743556,2.062414,0.462562,00:02
1,1.380203,1.803409,0.467554,00:03
2,1.413023,1.64471,0.490373,00:03
3,1.380128,1.656836,0.490849,00:03


In [None]:
class LMModel3(Module):
    def __init__(self, vocab_size, n_hidden):
        self.i_h = nn.Embedding(vocab_size, n_hidden)
        self.h_h = nn.Linear(n_hidden, n_hidden)
        self.h_o = nn.Linear(n_hidden, vocab_size)
        self.h = 0
    
    def forward(self, x)
        for i in range(3):
            w = x[:,i]
            # 1 x N_H
            self.h = self.h + self.i_h(w)
            # 1 x N_H
            self.h = F.relu(
                self.h_h(self.h)
            )
        out = self.h_o(self.h)
        self.h = self.h.detach()
        return out