[View in Colaboratory](https://colab.research.google.com/github/PranavMahajan25/Deep-Learning-Implementations/blob/master/RNN_pytorch(fast_ai).ipynb)

In [0]:
!pip3 install http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp36-cp36m-linux_x86_64.whl 
!pip3 install torchvision
!pip3 install fastai
!pip3 install spacy && python -m spacy download en

In [0]:
%matplotlib inline

In [0]:
from fastai.io import *
from fastai.conv_learner import *

from fastai.column_data import *

In [15]:
PATH='data/nietzsche/'
get_data("https://s3.amazonaws.com/text-datasets/nietzsche.txt", f'{PATH}nietzsche.txt')
text = open(f'{PATH}nietzsche.txt').read()
print('corpus length:', len(text))

corpus length: 600893


In [5]:
text[:500]

'PREFACE\n\n\nSUPPOSING that Truth is a woman--what then? Is there not ground\nfor suspecting that all philosophers, in so far as they have been\ndogmatists, have failed to understand women--that the terrible\nseriousness and clumsy importunity with which they have usually paid\ntheir addresses to Truth, have been unskilled and unseemly methods for\nwinning a woman? Certainly she has never allowed herself to be won; and\nat present every kind of dogma stands with sad and discouraged mien--IF,\nindeed, it s'

In [5]:
chars = sorted(list(set(text)))
vocab_size = len(chars)+1
chars.insert(0, "\0")
print('total chars:', vocab_size)


total chars: 85


In [7]:
print(chars[0:-6])

['\x00', '\n', ' ', '!', '"', "'", '(', ')', ',', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '=', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', ']', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y']


In [6]:
char_indices = {c: i for i, c in enumerate(chars)}
idx = [char_indices[c] for c in text]
idx[:10]

[40, 42, 29, 30, 25, 27, 29, 1, 1, 1]

In [7]:
indices_char = {i: c for i, c in enumerate(chars)}
charx = [indices_char[i] for i in [40,42,29,30,25,27,29,1,1,1]]
charx[:10]

['P', 'R', 'E', 'F', 'A', 'C', 'E', '\n', '\n', '\n']

#3 char model

In [0]:
cs=3
c1_dat = [idx[i]   for i in range(0, len(idx)-cs, cs)]
c2_dat = [idx[i+1] for i in range(0, len(idx)-cs, cs)]
c3_dat = [idx[i+2] for i in range(0, len(idx)-cs, cs)]
c4_dat = [idx[i+3] for i in range(0, len(idx)-cs, cs)]

In [0]:
x1 = np.stack(c1_dat)
x2 = np.stack(c2_dat)
x3 = np.stack(c3_dat)
y = np.stack(c4_dat)

In [12]:
x1[:4], x2[:4], x3[:4]

(array([40, 30, 29,  1]), array([42, 25,  1, 43]), array([29, 27,  1, 45]))

In [13]:

y[:4]

array([30, 29,  1, 40])

In [14]:
x1.shape,y.shape

((200297,), (200297,))

In [0]:
n_hidden = 256
n_fac=42 #embeddings size

In [0]:
class Char3Model(nn.Module):
    def __init__(self, vocab_size, n_fac):
        super().__init__()
        self.e = nn.Embedding(vocab_size, n_fac)

        # The 'green arrow' from our diagram - the layer operation from input to hidden
        self.l_in = nn.Linear(n_fac, n_hidden)

        # The 'orange arrow' from our diagram - the layer operation from hidden to hidden
        self.l_hidden = nn.Linear(n_hidden, n_hidden)
        
        # The 'blue arrow' from our diagram - the layer operation from hidden to output
        self.l_out = nn.Linear(n_hidden, vocab_size)
        
    def forward(self, c1, c2, c3):
        in1 = F.relu(self.l_in(self.e(c1)))
        in2 = F.relu(self.l_in(self.e(c2)))
        in3 = F.relu(self.l_in(self.e(c3)))
        
        h = V(torch.zeros(in1.size()).cuda())
        h = F.tanh(self.l_hidden(h+in1))
        h = F.tanh(self.l_hidden(h+in2))
        h = F.tanh(self.l_hidden(h+in3))
        
        return F.log_softmax(self.l_out(h))

In [0]:
md = ColumnarModelData.from_arrays('.', [-1], np.stack([x1,x2,x3], axis=1), y, bs=512)

m =  Char3Model(vocab_size, n_fac).cuda()

In [0]:
it = iter(md.trn_dl)
*xs,yt = next(it)
t = m(*V(xs))

In [19]:
md,
md.trn_dl,
it,
xs,
yt,
t

Variable containing:
-4.6761 -4.4326 -4.2251  ...  -4.6642 -4.8194 -4.4240
-4.5821 -4.2972 -4.3980  ...  -4.6519 -4.6038 -4.4318
-4.6093 -4.2848 -4.3186  ...  -4.3018 -4.4857 -4.6288
          ...             ⋱             ...          
-4.2546 -4.5138 -4.4584  ...  -4.6267 -4.4684 -4.4786
-4.5246 -4.5423 -4.5485  ...  -4.8612 -4.6206 -4.3814
-4.5089 -4.4401 -4.4121  ...  -4.7843 -4.7301 -4.2392
[torch.cuda.FloatTensor of size 512x85 (GPU 0)]

In [20]:
opt = optim.Adam(m.parameters(), 1e-2)
fit(m, md, 1, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

epoch      trn_loss   val_loss   
    0      2.116363   1.166479  



[array([1.16648])]

In [21]:
set_lrs(opt, 0.001)
fit(m, md, 1, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

epoch      trn_loss   val_loss   
    0      1.85875    0.879778  



[array([0.87978])]

In [0]:
def get_next(inp):
    idxs = T(np.array([char_indices[c] for c in inp]))
    p = m(*VV(idxs))
    i = np.argmax(to_np(p))
    return chars[i]

In [23]:
get_next('tru')

't'

#RNN using concatenation

In [0]:
cs =8 #size of unrolled rnn

In [0]:
c_in_dat = [[idx[i+j] for i in range(cs)] for j in range(len(idx)-cs)]

In [0]:
c_out_dat = [idx[j+cs] for j in range(len(idx)-cs)]

In [27]:
xs = np.stack(c_in_dat, axis=0)
xs.shape

(600885, 8)

In [0]:
y = np.stack(c_out_dat)

In [29]:
xs[:20,:cs]

array([[40, 42, 29, 30, 25, 27, 29,  1],
       [42, 29, 30, 25, 27, 29,  1,  1],
       [29, 30, 25, 27, 29,  1,  1,  1],
       [30, 25, 27, 29,  1,  1,  1, 43],
       [25, 27, 29,  1,  1,  1, 43, 45],
       [27, 29,  1,  1,  1, 43, 45, 40],
       [29,  1,  1,  1, 43, 45, 40, 40],
       [ 1,  1,  1, 43, 45, 40, 40, 39],
       [ 1,  1, 43, 45, 40, 40, 39, 43],
       [ 1, 43, 45, 40, 40, 39, 43, 33],
       [43, 45, 40, 40, 39, 43, 33, 38],
       [45, 40, 40, 39, 43, 33, 38, 31],
       [40, 40, 39, 43, 33, 38, 31,  2],
       [40, 39, 43, 33, 38, 31,  2, 73],
       [39, 43, 33, 38, 31,  2, 73, 61],
       [43, 33, 38, 31,  2, 73, 61, 54],
       [33, 38, 31,  2, 73, 61, 54, 73],
       [38, 31,  2, 73, 61, 54, 73,  2],
       [31,  2, 73, 61, 54, 73,  2, 44],
       [ 2, 73, 61, 54, 73,  2, 44, 71]])

In [30]:

y[:20]


array([ 1,  1, 43, 45, 40, 40, 39, 43, 33, 38, 31,  2, 73, 61, 54, 73,  2, 44, 71, 74])

In [0]:
val_idx = get_cv_idxs(len(idx)-cs-1)
md = ColumnarModelData.from_arrays('.', val_idx, xs, y, bs=512)

In [0]:
class CharLoopConcatModel(nn.Module):
    def __init__(self, vocab_size, n_fac):
        super().__init__()
        self.e = nn.Embedding(vocab_size, n_fac)
        self.l_in = nn.Linear(n_fac+n_hidden, n_hidden)
        self.l_hidden = nn.Linear(n_hidden, n_hidden)
        self.l_out = nn.Linear(n_hidden, vocab_size)
        
    def forward(self, *cs):
        bs = cs[0].size(0)
        h = V(torch.zeros(bs, n_hidden).cuda())
        for c in cs:
            inp = torch.cat((h, self.e(c)), 1)
            inp = F.relu(self.l_in(inp))
            h = F.tanh(self.l_hidden(inp))
        
        return F.log_softmax(self.l_out(h), dim=-1)

In [33]:
m = CharLoopConcatModel(vocab_size, n_fac).cuda()
opt = optim.Adam(m.parameters(), 1e-3)
fit(m, md, 1, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

epoch      trn_loss   val_loss   
    0      1.813198   1.796775  



[array([1.79678])]

In [34]:
set_lrs(opt, 1e-4)
fit(m, md, 1, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

epoch      trn_loss   val_loss   
    0      1.716738   1.721608  



[array([1.72161])]

In [35]:
get_next('for thos')

'e'

#RNN using builtin function by pytorch
compare with CharLoopConcateModel

In [0]:
class CharRnn(nn.Module):
    def __init__(self, vocab_size, n_fac):
        super().__init__()
        self.e = nn.Embedding(vocab_size, n_fac)
        self.rnn = nn.RNN(n_fac, n_hidden)
        self.l_out = nn.Linear(n_hidden, vocab_size)
        
    def forward(self, *cs):
        bs = cs[0].size(0)
        h = V(torch.zeros(1, bs, n_hidden))
        inp = self.e(torch.stack(cs)) #let's find out what this stack is ? also let's checkout weight matrices in rnn
        outp,h = self.rnn(inp, h)
        
        return F.log_softmax(self.l_out(outp[-1]), dim=-1)

In [38]:
m = CharRnn(vocab_size, n_fac).cuda()
opt = optim.Adam(m.parameters(), 1e-3)
it = iter(md.trn_dl)
*xs,yt = next(it)
fit(m, md, 4, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=4), HTML(value='')))

epoch      trn_loss   val_loss   
    0      1.867699   1.846299  
 22%|██▏       | 211/939 [00:06<00:21, 34.43it/s, loss=1.8] 

    1      1.671898   1.672194  


    2      1.595611   1.596666  
    3      1.533915   1.55066   



[array([1.55066])]

In [46]:
set_lrs(opt, 1e-4)

fit(m, md, 2, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=2), HTML(value='')))

epoch      trn_loss   val_loss   
    0      1.472107   1.512657  
 22%|██▏       | 211/939 [00:06<00:20, 34.91it/s, loss=1.45]

    1      1.466541   1.507175  



[array([1.50718])]

In [0]:
def get_next_n(inp, n):
    res = inp
    for i in range(n):
        c = get_next(inp)
        res += c
        inp = inp[1:]+c
    return res
get_next_n('strong love',100)

In [50]:
get_next_n('have been',100)

'have been strong the same to the same to the same to the same to the same to the same to the same to the sam'

In [52]:
get_next_n('much like',100)

'much like the same to the same to the same to the same to the same to the same to the same to the same to the'

In [54]:
get_next_n('I miss my days',100)

'I miss my days and the same to the same to the same to the same to the same to the same to the same to the same to'

In [55]:
get_next_n('which they have',100)

'which they have a strong the same to the same to the same to the same to the same to the same to the same to the sa'

In [56]:
get_next_n('understanding women',100)

'understanding women and the same to the same to the same to the same to the same to the same to the same to the same to'

In [0]:
c_in_dat = [[idx[i+j] for i in range(cs)] for j in range(0, len(idx)-cs-1, cs)]
c_out_dat = [[idx[i+j] for i in range(cs)] for j in range(1, len(idx)-cs, cs)]

In [59]:
xs = np.stack(c_in_dat)
ys = np.stack(c_out_dat)
xs.shape,ys.shape

((75111, 8), (75111, 8))

In [61]:
xs[:20,:cs]

array([[40, 42, 29, 30, 25, 27, 29,  1],
       [ 1,  1, 43, 45, 40, 40, 39, 43],
       [33, 38, 31,  2, 73, 61, 54, 73],
       [ 2, 44, 71, 74, 73, 61,  2, 62],
       [72,  2, 54,  2, 76, 68, 66, 54],
       [67,  9,  9, 76, 61, 54, 73,  2],
       [73, 61, 58, 67, 24,  2, 33, 72],
       [ 2, 73, 61, 58, 71, 58,  2, 67],
       [68, 73,  2, 60, 71, 68, 74, 67],
       [57,  1, 59, 68, 71,  2, 72, 74],
       [72, 69, 58, 56, 73, 62, 67, 60],
       [ 2, 73, 61, 54, 73,  2, 54, 65],
       [65,  2, 69, 61, 62, 65, 68, 72],
       [68, 69, 61, 58, 71, 72,  8,  2],
       [62, 67,  2, 72, 68,  2, 59, 54],
       [71,  2, 54, 72,  2, 73, 61, 58],
       [78,  2, 61, 54, 75, 58,  2, 55],
       [58, 58, 67,  1, 57, 68, 60, 66],
       [54, 73, 62, 72, 73, 72,  8,  2],
       [61, 54, 75, 58,  2, 59, 54, 62]])

In [64]:
ys[:20,:cs]

array([[42, 29, 30, 25, 27, 29,  1,  1],
       [ 1, 43, 45, 40, 40, 39, 43, 33],
       [38, 31,  2, 73, 61, 54, 73,  2],
       [44, 71, 74, 73, 61,  2, 62, 72],
       [ 2, 54,  2, 76, 68, 66, 54, 67],
       [ 9,  9, 76, 61, 54, 73,  2, 73],
       [61, 58, 67, 24,  2, 33, 72,  2],
       [73, 61, 58, 71, 58,  2, 67, 68],
       [73,  2, 60, 71, 68, 74, 67, 57],
       [ 1, 59, 68, 71,  2, 72, 74, 72],
       [69, 58, 56, 73, 62, 67, 60,  2],
       [73, 61, 54, 73,  2, 54, 65, 65],
       [ 2, 69, 61, 62, 65, 68, 72, 68],
       [69, 61, 58, 71, 72,  8,  2, 62],
       [67,  2, 72, 68,  2, 59, 54, 71],
       [ 2, 54, 72,  2, 73, 61, 58, 78],
       [ 2, 61, 54, 75, 58,  2, 55, 58],
       [58, 67,  1, 57, 68, 60, 66, 54],
       [73, 62, 72, 73, 72,  8,  2, 61],
       [54, 75, 58,  2, 59, 54, 62, 65]])

In [0]:
val_idx = get_cv_idxs(len(xs)-cs-1)
md = ColumnarModelData.from_arrays('.', val_idx, xs, ys, bs=512)
class CharSeqRnn(nn.Module):
    def __init__(self, vocab_size, n_fac):
        super().__init__()
        self.e = nn.Embedding(vocab_size, n_fac)
        self.rnn = nn.RNN(n_fac, n_hidden)
        self.l_out = nn.Linear(n_hidden, vocab_size)
        
    def forward(self, *cs):
        bs = cs[0].size(0)
        h = V(torch.zeros(1, bs, n_hidden))
        inp = self.e(torch.stack(cs))
        outp,h = self.rnn(inp, h)
        return F.log_softmax(self.l_out(outp), dim=-1)
m = CharSeqRnn(vocab_size, n_fac).cuda()
opt = optim.Adam(m.parameters(), 1e-3)
it = iter(md.trn_dl)
*xst,yt = next(it)


In [0]:
def nll_loss_seq(inp, targ):
    sl,bs,nh = inp.size()
    targ = targ.transpose(0,1).contiguous().view(-1)
    return F.nll_loss(inp.view(-1,nh), targ)

In [69]:
m.rnn.weight_hh_l0.data.copy_(torch.eye(n_hidden))  ##Hinton's 2015 paper


    1     0     0  ...      0     0     0
    0     1     0  ...      0     0     0
    0     0     1  ...      0     0     0
       ...          ⋱          ...       
    0     0     0  ...      1     0     0
    0     0     0  ...      0     1     0
    0     0     0  ...      0     0     1
[torch.cuda.FloatTensor of size 256x256 (GPU 0)]

In [70]:
fit(m, md, 4, opt, nll_loss_seq)

HBox(children=(IntProgress(value=0, description='Epoch', max=4), HTML(value='')))

epoch      trn_loss   val_loss   
    0      2.367242   2.308005  
    1      2.269234   2.238829  
    2      2.213178   2.19687   
    3      2.179532   2.166616  



[array([2.16662])]

In [71]:
set_lrs(opt, 1e-4)
fit(m, md, 1, opt, nll_loss_seq)

HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

epoch      trn_loss   val_loss   
    0      2.149212   2.143031  



[array([2.14303])]

#Stateful model

In [16]:
!cat data/nietzsche/nietzsche.txt | wc -l

9934


In [0]:
!mkdir data/nietzsche/trn
!mkdir data/nietzsche/val

In [0]:
!head -n 7000 data/nietzsche/nietzsche.txt | tee data/nietzsche/trn/trn.txt
!tail -n 2934 data/nietzsche/nietzsche.txt | tee data/nietzsche/val/val.txt

In [25]:
!cat data/nietzsche/trn/trn.txt | wc -l

7000


In [26]:
!cat data/nietzsche/val/val.txt | wc -l

2933


In [27]:
from torchtext import vocab, data

from fastai.nlp import *
from fastai.lm_rnn import *

PATH='data/nietzsche/'

TRN_PATH = 'trn/'
VAL_PATH = 'val/'
TRN = f'{PATH}{TRN_PATH}'
VAL = f'{PATH}{VAL_PATH}'

# Note: The student needs to practice her shell skills and prepare her own dataset before proceeding:
# - trn/trn.txt (first 80% of nietzsche.txt)
# - val/val.txt (last 20% of nietzsche.txt)

%ls {PATH}

nietzsche.txt  [0m[01;34mtrn[0m/  [01;34mval[0m/


In [28]:
%ls {PATH}trn

trn.txt


In [29]:
TEXT = data.Field(lower=True, tokenize=list)
bs=64; bptt=8; n_fac=42; n_hidden=256

FILES = dict(train=TRN_PATH, validation=VAL_PATH, test=VAL_PATH)
md = LanguageModelData.from_text_files(PATH, TEXT, **FILES, bs=bs, bptt=bptt, min_freq=3)

len(md.trn_dl), md.nt, len(md.trn_ds), len(md.trn_ds[0].text)

(837, 54, 1, 429135)

In [0]:
# From the pytorch source

def RNNCell(input, hidden, w_ih, w_hh, b_ih, b_hh):
    return F.tanh(F.linear(input, w_ih, b_ih) + F.linear(hidden, w_hh, b_hh))

In [0]:
class CharSeqStatefulRnn2(nn.Module):
    def __init__(self, vocab_size, n_fac, bs):
        super().__init__()
        self.vocab_size = vocab_size
        self.e = nn.Embedding(vocab_size, n_fac)
        self.rnn = nn.RNNCell(n_fac, n_hidden)
        self.l_out = nn.Linear(n_hidden, vocab_size)
        self.init_hidden(bs)
        
    def forward(self, cs):
        bs = cs[0].size(0)
        if self.h.size(1) != bs: self.init_hidden(bs)
        outp = []
        o = self.h
        for c in cs: 
            o = self.rnn(self.e(c), o)
            outp.append(o)
        outp = self.l_out(torch.stack(outp))
        self.h = repackage_var(o)
        return F.log_softmax(outp, dim=-1).view(-1, self.vocab_size)
    
    def init_hidden(self, bs): self.h = V(torch.zeros(1, bs, n_hidden))

In [0]:
m = CharSeqStatefulRnn2(md.nt, n_fac, 512).cuda()
opt = optim.Adam(m.parameters(), 1e-3)

In [33]:
fit(m, md, 4, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=4), HTML(value='')))

epoch      trn_loss   val_loss   
    0      1.896546   1.913566  
 36%|███▌      | 298/837 [00:05<00:10, 51.37it/s, loss=1.82]

    1      1.706597   1.742194  


    2      1.616072   1.669026  
    3      1.560965   1.627798  



[array([1.6278])]

#GRU and LSTM cells instead of RNN cells give better performance

In [0]:
class CharSeqStatefulGRU(nn.Module):
    def __init__(self, vocab_size, n_fac, bs):
        super().__init__()
        self.vocab_size = vocab_size
        self.e = nn.Embedding(vocab_size, n_fac)
        self.rnn = nn.GRU(n_fac, n_hidden)
        self.l_out = nn.Linear(n_hidden, vocab_size)
        self.init_hidden(bs)
        
    def forward(self, cs):
        bs = cs[0].size(0)
        if self.h.size(1) != bs: self.init_hidden(bs)
        outp,h = self.rnn(self.e(cs), self.h)
        self.h = repackage_var(h)
        return F.log_softmax(self.l_out(outp), dim=-1).view(-1, self.vocab_size)
    
    def init_hidden(self, bs): self.h = V(torch.zeros(1, bs, n_hidden))
      
# From the pytorch source code - for reference

def GRUCell(input, hidden, w_ih, w_hh, b_ih, b_hh):
    gi = F.linear(input, w_ih, b_ih)
    gh = F.linear(hidden, w_hh, b_hh)
    i_r, i_i, i_n = gi.chunk(3, 1)
    h_r, h_i, h_n = gh.chunk(3, 1)

    resetgate = F.sigmoid(i_r + h_r)
    inputgate = F.sigmoid(i_i + h_i)
    newgate = F.tanh(i_n + resetgate * h_n)
    return newgate + inputgate * (hidden - newgate)

In [35]:
m = CharSeqStatefulGRU(md.nt, n_fac, 512).cuda()

opt = optim.Adam(m.parameters(), 1e-3)

fit(m, md, 6, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=6), HTML(value='')))

epoch      trn_loss   val_loss   
    0      1.770318   1.77696   
 39%|███▉      | 329/837 [00:04<00:06, 80.94it/s, loss=1.67]

    1      1.578865   1.62333   


    2      1.486993   1.564848  
    3      1.429499   1.526052  
 14%|█▍        | 118/837 [00:01<00:09, 78.28it/s, loss=1.43]

    4      1.383741   1.508247  
    5      1.353114   1.494747  



[array([1.49475])]

In [36]:
set_lrs(opt, 1e-4)

fit(m, md, 3, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=3), HTML(value='')))

epoch      trn_loss   val_loss   
    0      1.258307   1.462969  
 39%|███▉      | 330/837 [00:04<00:06, 79.77it/s, loss=1.31]

    1      1.269495   1.459366  
    2      1.26746    1.456618  



[array([1.45662])]

New get_next() and get_next_n() for TEXT.vocab

In [0]:
def get_next(inp):
    idxs = TEXT.numericalize(inp)
    p = m(VV(idxs.transpose(0,1)))
    r = torch.multinomial(p[-1].exp(), 1)
    return TEXT.vocab.itos[to_np(r)[0]]

In [0]:
def get_next_n(inp, n):
    res = inp
    for i in range(n):
        c = get_next(inp)
        res += c
        inp = inp[1:]+c
    return res

In [43]:
get_next_n('have been', 400)

'have been:" and like in a perhaps their more edisting--i headts, delicable; and ofy sympaths, of the french a soul, how man is their daven at a kind" all their extraordinarys?106. after visively to doing consequents,a cluminty and have requires good man as fear is adox, seld missepularity in the world of every his age along with the ear tastes ofmaniter "modern an actificatism, owingstrould madator! and in'

In [44]:
get_next_n('much like',400)

"much like probowing more will ambition.         a. a unemotively;it were commands will out for the very no longer cational exaking wellcaustranted thanthat is thusing--aswowlerngs, it was really this point of the insurely. them with sighed on earthandinfour into tastes putting--or whom experience of   day laughto: he with such merely knowadays, go one's ofentermerrow-previrtue, who to taking owing to helle"

In [45]:
get_next_n('I miss my days',400)

'I miss my days in more magnish what is that among occasion of austaptics with sative scientifictest then someon specism has also one a actuals which he belief! they hard, bound to recognizul of the metaphysical taught in everythingnere, they are honemelands, thepossibility, that it was free spirit in the doubity difficult" is actificated induce forms of arrum, and old into allow to comporiousness?).--but we kno'

In [46]:
get_next_n('understanding women',400)

'understanding women took find all especially richard except occurredincertance." the speek tooly, is not he worth in its estimatest is requirement nay! but "age (as in some laitsynsiouss and requirestant,than also about him thinking isteeps or is milled into bot up toagnerating which ordinary more costors of distrust;ay the belier, and is crebeianity, fact we are the amble, and love of the maturity; one dis vicionar'

In [0]:
from fastai import sgdr

n_hidden=512

class CharSeqStatefulLSTM(nn.Module):
    def __init__(self, vocab_size, n_fac, bs, nl):
        super().__init__()
        self.vocab_size,self.nl = vocab_size,nl
        self.e = nn.Embedding(vocab_size, n_fac)
        self.rnn = nn.LSTM(n_fac, n_hidden, nl, dropout=0.5)
        self.l_out = nn.Linear(n_hidden, vocab_size)
        self.init_hidden(bs)
        
    def forward(self, cs):
        bs = cs[0].size(0)
        if self.h[0].size(1) != bs: self.init_hidden(bs)
        outp,h = self.rnn(self.e(cs), self.h)
        self.h = repackage_var(h)
        return F.log_softmax(self.l_out(outp), dim=-1).view(-1, self.vocab_size)
    
    def init_hidden(self, bs):
        self.h = (V(torch.zeros(self.nl, bs, n_hidden)),
                  V(torch.zeros(self.nl, bs, n_hidden)))

In [48]:
m = CharSeqStatefulLSTM(md.nt, n_fac, 512, 2).cuda()
lo = LayerOptimizer(optim.Adam, m, 1e-2, 1e-5)
os.makedirs(f'{PATH}models', exist_ok=True)
fit(m, md, 2, lo.opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=2), HTML(value='')))

epoch      trn_loss   val_loss   
    0      1.928408   1.852095  
 34%|███▍      | 288/837 [00:06<00:11, 46.80it/s, loss=1.84]

    1      1.725508   1.671039  



[array([1.67104])]

In [49]:
on_end = lambda sched, cycle: save_model(m, f'{PATH}models/cyc_{cycle}')
cb = [CosAnneal(lo, len(md.trn_dl), cycle_mult=2, on_cycle_end=on_end)]
fit(m, md, 2**4-1, lo.opt, F.nll_loss, callbacks=cb)

HBox(children=(IntProgress(value=0, description='Epoch', max=15), HTML(value='')))

epoch      trn_loss   val_loss   
    0      1.529105   1.522802  
 33%|███▎      | 278/837 [00:06<00:12, 46.17it/s, loss=1.65]

    1      1.575525   1.538003  


    2      1.427903   1.448071  
    3      1.592513   1.581464  
  8%|▊         | 64/837 [00:01<00:17, 44.97it/s, loss=1.59]

    4      1.491149   1.492414  


    5      1.398366   1.4258    
    6      1.326546   1.393989  
 11%|█         | 88/837 [00:01<00:16, 45.07it/s, loss=1.33]

    7      1.578389   1.546977  


    8      1.528047   1.523015  
    9      1.479904   1.487113  
  9%|▉         | 77/837 [00:01<00:16, 46.87it/s, loss=1.48]

    10     1.434823   1.46273   


    11     1.379255   1.415903  
    12     1.335107   1.388118  
 11%|█         | 89/837 [00:01<00:16, 46.14it/s, loss=1.34]

    13     1.281454   1.367052  
    14     1.244399   1.356175  



[array([1.35618])]

In [50]:
on_end = lambda sched, cycle: save_model(m, f'{PATH}models/cyc_{cycle}')
cb = [CosAnneal(lo, len(md.trn_dl), cycle_mult=2, on_cycle_end=on_end)]
fit(m, md, 2**6-1, lo.opt, F.nll_loss, callbacks=cb)

HBox(children=(IntProgress(value=0, description='Epoch', max=63), HTML(value='')))

epoch      trn_loss   val_loss   
    0      1.24186    1.353589  
 34%|███▍      | 284/837 [00:06<00:11, 46.81it/s, loss=1.26]

    1      1.243437   1.352513  


    2      1.237277   1.351641  
    3      1.24712    1.351353  
  8%|▊         | 71/837 [00:01<00:17, 43.86it/s, loss=1.24]

    4      1.236016   1.349186  


    5      1.228558   1.347315  
    6      1.224525   1.346814  
 10%|▉         | 83/837 [00:01<00:16, 46.03it/s, loss=1.23]

    7      1.228065   1.348113  


    8      1.224632   1.346076  
    9      1.221996   1.343949  
  9%|▉         | 79/837 [00:01<00:16, 44.75it/s, loss=1.23]

    10     1.2122     1.342346  


    11     1.210542   1.341784  
    12     1.20362    1.340894  
  9%|▉         | 74/837 [00:01<00:16, 47.16it/s, loss=1.21]

    13     1.199315   1.340639  


    14     1.203851   1.340834  
    15     1.204548   1.342453  
  9%|▊         | 72/837 [00:01<00:15, 48.24it/s, loss=1.22]

    16     1.19942    1.341447  


    17     1.193645   1.341271  
    18     1.193654   1.340717  
  8%|▊         | 71/837 [00:01<00:17, 43.78it/s, loss=1.2]

    19     1.181273   1.340194  


    20     1.180488   1.340232  
    21     1.175049   1.340909  
  8%|▊         | 67/837 [00:01<00:17, 45.09it/s, loss=1.19]

    22     1.168731   1.340517  


    23     1.155431   1.340724  
    24     1.16212    1.340466  
  8%|▊         | 71/837 [00:01<00:16, 45.73it/s, loss=1.17]

    25     1.157973   1.341496  


    26     1.153812   1.341135  
    27     1.148098   1.340845  
  7%|▋         | 62/837 [00:01<00:17, 45.01it/s, loss=1.16]

    28     1.147169   1.341002  


    29     1.146775   1.341168  
    30     1.148749   1.341432  
  9%|▉         | 79/837 [00:01<00:16, 46.52it/s, loss=1.16]

    31     1.14709    1.341313  


    32     1.154382   1.342461  
    33     1.149926   1.342664  
  9%|▉         | 76/837 [00:01<00:16, 45.14it/s, loss=1.16]

    34     1.148443   1.343756  


    35     1.146194   1.344841  
    36     1.135897   1.347349  
  7%|▋         | 59/837 [00:01<00:17, 43.75it/s, loss=1.15]

    37     1.13267    1.348163  


    38     1.122121   1.349945  
    39     1.115372   1.35079   
  7%|▋         | 55/837 [00:01<00:16, 46.23it/s, loss=1.13]

    40     1.110733   1.35158   


    41     1.108059   1.35342   
    42     1.106465   1.35494   
 10%|▉         | 83/837 [00:01<00:16, 46.75it/s, loss=1.11]

    43     1.094476   1.356639  


    44     1.09381    1.358493  
    45     1.081021   1.359306  
 10%|█         | 86/837 [00:01<00:15, 47.85it/s, loss=1.09]

    46     1.077507   1.361001  


    47     1.084257   1.36307   
    48     1.07713    1.364331  
 10%|█         | 84/837 [00:01<00:15, 48.35it/s, loss=1.09]

    49     1.067957   1.364325  


    50     1.068184   1.3669    
    51     1.064108   1.367267  
 10%|▉         | 80/837 [00:01<00:16, 44.68it/s, loss=1.07]

    52     1.063409   1.368971  


    53     1.05448    1.369563  
    54     1.045984   1.369652  
  8%|▊         | 70/837 [00:01<00:16, 46.61it/s, loss=1.06]

    55     1.05042    1.370015  


    56     1.043335   1.37144   
    57     1.046835   1.371699  
  8%|▊         | 64/837 [00:01<00:16, 47.57it/s, loss=1.05]

    58     1.037568   1.372844  


    59     1.047029   1.373012  
    60     1.049744   1.37371   
  9%|▉         | 78/837 [00:01<00:17, 44.58it/s, loss=1.05]

    61     1.047995   1.373636  
    62     1.042044   1.373721  



[array([1.37372])]