In [1]:
%reload_ext autoreload

%autoreload 2
%matplotlib inline

In [2]:
from fastai.imports import *
from fastai.io import *
from fastai.conv_learner import *
from fastai.column_data import *

In [3]:
PATH = '/content/clouderizer/hackerearth-dl-3/data/nietzsche/'

In [None]:
get_data('https://s3.amazonaws.com/text-datasets/nietzsche.txt', f'{PATH}nietzsche.txt')

In [4]:
!ls {PATH}

nietzsche.txt


In [5]:
text = open(f'{PATH}nietzsche.txt').read()
len(text)

600893

In [6]:
text[:400]

'PREFACE\n\n\nSUPPOSING that Truth is a woman--what then? Is there not ground\nfor suspecting that all philosophers, in so far as they have been\ndogmatists, have failed to understand women--that the terrible\nseriousness and clumsy importunity with which they have usually paid\ntheir addresses to Truth, have been unskilled and unseemly methods for\nwinning a woman? Certainly she has never allowed herself '

In [7]:
chars = sorted(list(set(text)))
vocab_size = len(chars)+1
vocab_size

85

In [8]:
chars.insert(0, '\0')
''.join(chars[1:-6])

'\n !"\'(),-.0123456789:;=?ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxy'

In [9]:
char_indices = {c: i for i, c in enumerate(chars)}

In [10]:
indices_char = {i: c for i, c in enumerate(chars)}

In [11]:
idx = [char_indices[c] for c in text]
idx[:10]

[40, 42, 29, 30, 25, 27, 29, 1, 1, 1]

In [12]:
''.join(indices_char[i] for i in idx[:70])

'PREFACE\n\n\nSUPPOSING that Truth is a woman--what then? Is there not gro'

In [13]:
cs = 3
c1_dat = [idx[i] for i in range(0, len(idx)-cs, cs)]
c2_dat = [idx[i+1] for i in range(0, len(idx)-cs, cs)]
c3_dat = [idx[i+2] for i in range(0, len(idx)-cs, cs)]
c4_dat = [idx[i+3] for i in range(0, len(idx)-cs, cs)]

In [14]:
x1 = np.stack(c1_dat)
x2 = np.stack(c2_dat)
x3 = np.stack(c3_dat)

In [15]:
y = np.stack(c4_dat)

In [16]:
x1[:4], x2[:4], x3[:4]

(array([40, 30, 29,  1]), array([42, 25,  1, 43]), array([29, 27,  1, 45]))

In [17]:
y[:4]

array([30, 29,  1, 40])

In [18]:
x1.shape, y.shape

((200297,), (200297,))

In [19]:
n_hidden = 256

In [20]:
n_fac = 42

In [21]:
class Char3Model(nn.Module):
    def __init__(self, vocab_size, n_fac):
        super().__init__()
        self.e = nn.Embedding(vocab_size, n_fac)
        self.l_in = nn.Linear(n_fac, n_hidden)
        self.l_hidden = nn.Linear(n_hidden, n_hidden)
        self.l_out = nn.Linear(n_hidden, vocab_size)
    def forward(self, c1, c2, c3):
        in1 = F.relu(self.l_in(self.e(c1)))
        in2 = F.relu(self.l_in(self.e(c2)))
        in3 = F.relu(self.l_in(self.e(c3)))
        h = V(torch.zeros(in1.size()).cuda())
        h = F.tanh(self.l_hidden(h+in1))
        h = F.tanh(self.l_hidden(h+in2))
        h = F.tanh(self.l_hidden(h+in3))
        return F.log_softmax(self.l_out(h))

In [22]:
md = ColumnarModelData.from_arrays('.', [-1], np.stack([x1, x2, x3], axis=1), y, bs=512)

In [23]:
m = Char3Model(vocab_size, n_fac).cuda()

In [24]:
it = iter(md.trn_dl)
*xs, yt = next(it)
t = m(*V(xs))

In [25]:
opt = optim.Adam(m.parameters(), 1e-2)

In [26]:
fit(m, md, 1, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      2.08337    0.999472  



[array([0.99947])]

In [27]:
set_lrs(opt, 0.001)

In [28]:
fit(m, md, 1, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      1.83289    0.450849  



[array([0.45085])]

In [29]:
def get_next(inp):
    idxs = T(np.array([char_indices[c] for c in inp]))
    p = m(*VV(idxs))
    i = np.argmax(to_np(p))
    return chars[i]

In [30]:
get_next('y. ')

'T'

In [31]:
get_next('hi ')

's'

In [32]:
get_next('of ')

't'

In [33]:
cs = 8

In [34]:
c_in_dat = [[idx[i+j] for i in range(cs)] for j in range(len(idx)-cs)]

In [35]:
c_out_dat = [idx[j+cs] for j in range(len(idx)-cs)]

In [36]:
xs = np.stack(c_in_dat, axis=0)

In [37]:
xs.shape

(600885, 8)

In [38]:
y = np.stack(c_out_dat)

In [39]:
xs[:cs, :cs]

array([[40, 42, 29, 30, 25, 27, 29,  1],
       [42, 29, 30, 25, 27, 29,  1,  1],
       [29, 30, 25, 27, 29,  1,  1,  1],
       [30, 25, 27, 29,  1,  1,  1, 43],
       [25, 27, 29,  1,  1,  1, 43, 45],
       [27, 29,  1,  1,  1, 43, 45, 40],
       [29,  1,  1,  1, 43, 45, 40, 40],
       [ 1,  1,  1, 43, 45, 40, 40, 39]])

In [40]:
y[:cs]

array([ 1,  1, 43, 45, 40, 40, 39, 43])

In [41]:
val_idx = get_cv_idxs(len(idx)-cs-1)

In [42]:
md = ColumnarModelData.from_arrays('.', val_idx, xs, y, bs=512)

In [46]:
class CharLoopModel(nn.Module):
    def __init__(self, vocab_size, n_fac):
        super().__init__()
        self.e = nn.Embedding(vocab_size, n_fac)
        self.l_in = nn.Linear(n_fac, n_hidden)
        self.l_hidden = nn.Linear(n_hidden, n_hidden)
        self.l_out = nn.Linear(n_hidden, vocab_size)
    
    def forward(self, *cs):
        bs = cs[0].size(0)
        h = V(torch.zeros(bs, n_hidden).cuda())
        for c in cs:
            inp = F.relu(self.l_in(self.e(c)))
            h = F.tanh(self.l_hidden(h+inp))
        return F.log_softmax(self.l_out(h), dim=-1)

In [47]:
m = CharLoopModel(vocab_size, n_fac).cuda()
opt = optim.Adam(m.parameters(), 1e-2)

In [48]:
fit(m, md, 1, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      2.00046    1.991496  



[array([1.9915])]

In [49]:
set_lrs(opt, 0.01)

In [50]:
fit(m, md, 1, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      1.984883   1.98573   



[array([1.98573])]

In [55]:
class CharLoopConcatModel(nn.Module):
    def __init__(self, vocab_size, n_fac):
        super().__init__()
        self.e = nn.Embedding(vocab_size, n_fac)
        self.l_in = nn.Linear(n_fac+n_hidden, n_hidden)
        self.l_hidden = nn.Linear(n_hidden, n_hidden)
        self.l_out = nn.Linear(n_hidden, vocab_size)
    def forward(self, *cs):
        bs = cs[0].size(0)
        h = V(torch.zeros(bs, n_hidden).cuda())
        for c in cs:
            inp = torch.cat((h, self.e(c)), 1)
            inp = F.relu(self.l_in(inp))
            h = F.tanh(self.l_hidden(inp))
        return F.log_softmax(self.l_out(h), dim=-1)

In [56]:
m = CharLoopConcatModel(vocab_size, n_fac).cuda()
opt = optim.Adam(m.parameters(), 1e-3)

In [57]:
it = iter(md.trn_dl)
*xs, yt = next(it)
t = m(*V(xs))

In [58]:
fit(m, md, 1, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      1.782801   1.761369  



[array([1.76137])]

In [59]:
set_lrs(opt, 1e-4)

In [60]:
fit(m, md, 1, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      1.678801   1.674716  



[array([1.67472])]

In [61]:
def get_next(inp):
    idxs = T(np.array([char_indices[c] for c in inp]))
    p = m(*VV(idxs))
    i = np.argmax(to_np(p))
    return chars[i]

In [64]:
get_next('for thos')

'e'

In [66]:
class CharRnn(nn.Module):
    def __init__(self, vocab_size, n_fac):
        super().__init__()
        self.e = nn.Embedding(vocab_size, n_fac)
        self.rnn = nn.RNN(n_fac, n_hidden)
        self.l_out = nn.Linear(n_hidden, vocab_size)
        
    def forward(self, *cs):
        bs = cs[0].size(0)
        h = V(torch.zeros(1, bs, n_hidden))
        inp = self.e(torch.stack(cs))
        outp, h = self.rnn(inp, h)
        
        return F.log_softmax(self.l_out(outp[-1]), dim=-1)

In [67]:
m = CharRnn(vocab_size, n_fac).cuda()
opt = optim.Adam(m.parameters(), 1e-3)

In [68]:
fit(m, md, 2, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=2), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      1.86151    1.845161  
    1      1.674372   1.670037                              



[array([1.67004])]

In [69]:
fit(m, md, 2, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=2), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      1.592739   1.59457   
    1      1.535788   1.554798                              



[array([1.5548])]

In [70]:
set_lrs(opt, 1e-4)

In [71]:
fit(m, md, 2, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=2), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      1.465841   1.511639  
    1      1.462244   1.506338                              



[array([1.50634])]

In [72]:
def get_next(inp):
    idxs = T(np.array([char_indices[c] for c in inp]))
    p = m(*VV(idxs))
    i = np.argmax(to_np(p))
    return chars[i]

In [73]:
c_in_dat = [[idx[i+j] for i in range(cs)] for j in range(0, len(idx)-cs-1, cs)]

In [74]:
c_out_dat = [[idx[i+j] for i in range(cs)] for j in range(1, len(idx)-cs, cs)]

In [75]:
xs = np.stack(c_in_dat)
xs.shape

(75111, 8)

In [78]:
ys = np.stack(c_out_dat)

In [79]:
val_idx = get_cv_idxs(len(xs)-cs-1)

In [80]:
md = ColumnarModelData.from_arrays('.', val_idx, xs, ys, bs=512)

In [87]:
class CharSeqRnn(nn.Module):
    def __init__(self, vocab_size, n_fac):
        super().__init__()
        self.e = nn.Embedding(vocab_size, n_fac)
        self.rnn = nn.RNN(n_fac, n_hidden)
        self.l_out = nn.Linear(n_hidden, vocab_size)
    
    def forward(self, *cs):
        bs = cs[0].size(0)
        h = V(torch.zeros(1, bs, n_hidden))
        inp = self.e(torch.stack(cs))
        outp, h = self.rnn(inp, h)        
        return F.log_softmax(self.l_out(outp), dim=-1)

In [88]:
m = CharSeqRnn(vocab_size, n_fac).cuda()
opt = optim.Adam(m.parameters(), 1e-3)

In [89]:
def nll_loss_seq(inp, targ):
    sl,bs,nh = inp.size()
    targ = targ.transpose(0,1).contiguous().view(-1)
    return F.nll_loss(inp.view(-1,nh), targ)

In [90]:
fit(m, md, 4, opt, nll_loss_seq)

HBox(children=(IntProgress(value=0, description='Epoch', max=4), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      2.619071   2.42235   
    1      2.297652   2.205144                              
    2      2.139584   2.085699                              
    3      2.044393   2.009962                              



[array([2.00996])]

In [91]:
set_lrs(opt, 1e-4)

In [92]:
fit(m, md, 1, opt, nll_loss_seq)

HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      1.994531   1.996021  



[array([1.99602])]

In [93]:
m = CharSeqRnn(vocab_size, n_fac).cuda()
opt = optim.Adam(m.parameters(), 1e-2)

In [94]:
m.rnn.weight_hh_l0.data.copy_(torch.eye(n_hidden))


    1     0     0  ...      0     0     0
    0     1     0  ...      0     0     0
    0     0     1  ...      0     0     0
       ...          ⋱          ...       
    0     0     0  ...      1     0     0
    0     0     0  ...      0     1     0
    0     0     0  ...      0     0     1
[torch.cuda.FloatTensor of size 256x256 (GPU 0)]

In [95]:
fit(m, md, 4, opt, nll_loss_seq)

HBox(children=(IntProgress(value=0, description='Epoch', max=4), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      2.332499   2.169721  
    1      2.071053   1.998278                              
    2      1.966394   1.941441                              
    3      1.911764   1.912154                              



[array([1.91215])]

In [97]:
set_lrs(opt, 1e-3)

In [98]:
fit(m, md, 4, opt, nll_loss_seq)

HBox(children=(IntProgress(value=0, description='Epoch', max=4), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      1.815455   1.838117  
    1      1.805387   1.8284                                
    2      1.797763   1.823395                             
    3      1.79058    1.817898                              



[array([1.8179])]

In [100]:
!pip install spacy

Collecting spacy
[?25l  Downloading https://files.pythonhosted.org/packages/3c/31/e60f88751e48851b002f78a35221d12300783d5a43d4ef12fbf10cca96c3/spacy-2.0.11.tar.gz (17.6MB)
[K    100% |████████████████████████████████| 17.6MB 2.1MB/s eta 0:00:01   28% |█████████                       | 4.9MB 19.2MB/s eta 0:00:01
Collecting murmurhash<0.29,>=0.28 (from spacy)
  Downloading https://files.pythonhosted.org/packages/5e/31/c8c1ecafa44db30579c8c457ac7a0f819e8b1dbc3e58308394fff5ff9ba7/murmurhash-0.28.0.tar.gz
Collecting cymem<1.32,>=1.30 (from spacy)
  Downloading https://files.pythonhosted.org/packages/f8/9e/273fbea507de99166c11cd0cb3fde1ac01b5bc724d9a407a2f927ede91a1/cymem-1.31.2.tar.gz
Collecting preshed<2.0.0,>=1.0.0 (from spacy)
[?25l  Downloading https://files.pythonhosted.org/packages/1b/ac/7c17b1fd54b60972785b646d37da2826311cca70842c011c4ff84fbe95e0/preshed-1.0.0.tar.gz (89kB)
[K    100% |████████████████████████████████| 92kB 20.4MB/s ta 0:00:01
[?25hCollecting thinc<6.11.0,>=6.10

In [101]:
!python -m spacy download en

Collecting https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.0.0/en_core_web_sm-2.0.0.tar.gz
[?25l  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.0.0/en_core_web_sm-2.0.0.tar.gz (37.4MB)
[K    100% |████████████████████████████████| 37.4MB 52.0MB/s ta 0:00:01 1% |▍                               | 512kB 5.7MB/s eta 0:00:07    77% |█████████████████████████       | 29.1MB 45.3MB/s eta 0:00:01
[?25hInstalling collected packages: en-core-web-sm
  Running setup.py install for en-core-web-sm ... [?25ldone
[?25hSuccessfully installed en-core-web-sm-2.0.0

[93m    Linking successful[0m
    /usr/local/lib/python3.6/dist-packages/en_core_web_sm -->
    /usr/local/lib/python3.6/dist-packages/spacy/data/en

    You can now load the model via spacy.load('en')



In [106]:
from torchtext import vocab, data
from fastai.nlp import *
from fastai.lm_rnn import *

PATH = '/content/clouderizer/hackerearth-dl-3/data/nietzsche/'
TRN_PATH = 'trn/'
VAL_PATH = 'val/'
TRN = f'{PATH}{TRN_PATH}'
VAL = f'{PATH}{VAL_PATH}'

%ls {PATH}

nietzsche.txt  [0m[01;34mtrn[0m/  [01;34mval[0m/


In [107]:
%ls {PATH}trn

nietzsche1.txt


In [105]:
os.makedirs(TRN, exist_ok=True)
os.makedirs(VAL, exist_ok=True)
train_perc = .8
with open(f'{PATH}nietzsche.txt', 'r') as f:
    lines = f.readlines()
    text_len = len(lines)
    part_train = open(f'{TRN}nietzsche1.txt', 'w')
    part_val = open(f'{VAL}nietzsche2.txt', 'w')
    for ix, l in enumerate(lines):
        if ix/text_len<train_perc:
            part_train.write(l)
        else:
            part_val.write(l)
    part_train.close()
    part_val.close()

In [109]:
TEXT = data.Field(lower=True, tokenize=list)
bs = 64; bptt = 8; n_fac = 42; n_hidden = 256

FILES = dict(train=TRN_PATH, validation=VAL_PATH, test=VAL_PATH)
md = LanguageModelData.from_text_files(PATH, TEXT, **FILES, bs=bs, bptt=bptt, min_freq=3)
len(md.trn_dl), md.nt, len(md.trn_ds), len(md.trn_ds[0].text)

(942, 55, 1, 482972)

In [110]:
class CharSeqStatefulRnn(nn.Module):
    def __init__(self, vocab_size, n_fac, bs):
        self.vocab_size = vocab_size
        super().__init__()
        self.e = nn.Embedding(vocab_size, n_fac)
        self.rnn = nn.RNN(n_fac, n_hidden)
        self.l_out = nn.Linear(n_hidden, vocab_size)
        self.init_hidden(bs)
        
    def forward(self, cs):
        bs = cs[0].size(0)
        if self.h.size(1)!=bs: self.init_hidden(bs)
        outp, h = self.rnn(self.e(cs), self.h)
        self.h = repackage_var(h)
        return F.log_softmax(self.l_out(outp), dim=-1).view(-1, self.vocab_size)
        
    def init_hidden(self, bs): self.h = V(torch.zeros(1, bs, n_hidden))

In [111]:
m = CharSeqStatefulRnn(vocab_size, n_fac, 512).cuda()
opt = optim.Adam(m.parameters(), 1e-3)

In [112]:
fit(m, md, 4, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=4), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      1.897278   1.866042  
    1      1.715858   1.704338                              
    2      1.632773   1.648564                              
    3      1.574342   1.601633                              



[array([1.60163])]

In [113]:
set_lrs(opt, 1e-4)

In [114]:
fit(m, md, 4, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=4), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      1.501385   1.560133  
    1      1.500488   1.553796                              
    2      1.494497   1.549145                              
    3      1.488648   1.545314                              



[array([1.54531])]

In [121]:
class CharSeqStatefulRnn2(nn.Module):
    def __init__(self, vocab_size, n_fac, bs):
        super().__init__()
        self.vocab_size = vocab_size
        self.e = nn.Embedding(vocab_size, n_fac)
        self.rnn = nn.RNNCell(n_fac, n_hidden)
        self.l_out = nn.Linear(n_hidden, vocab_size)
        self.init_hidden(bs)
    
    def forward(self, cs):
        bs = cs[0].size(0)
        if self.h.size(1)!=bs: self.init_hidden(bs)
        outp = []
        o = self.h
        for c in cs:
            o = self.rnn(self.e(c), o)
            outp.append(o)
        outp = self.l_out(torch.stack(outp))
        self.h = repackage_var(o)
        return F.log_softmax(outp, dim=-1).view(-1, self.vocab_size)
    
    def init_hidden(self, bs): self.h = V(torch.zeros(1, bs, n_hidden))

In [119]:
m = CharSeqStatefulRnn2(md.nt, n_fac, 512).cuda()
opt = optim.Adam(m.parameters(), 1e-3)

In [120]:
fit(m, md, 4, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=4), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      1.880992   1.849197  
    1      1.708801   1.699495                              
    2      1.616695   1.633536                              
    3      1.569881   1.590788                              



[array([1.59079])]

In [122]:
class CharSeqStatefulGRU(nn.Module):
    def __init__(self, vocab_size, n_fac, bs):
        super().__init__()
        self.vocab_size = vocab_size
        self.e = nn.Embedding(vocab_size, n_fac)
        self.rnn = nn.GRU(n_fac, n_hidden)
        self.l_out = nn.Linear(n_hidden, vocab_size)
        self.init_hidden(bs)
    
    def forward(self, cs):
        bs = cs[0].size(0)
        if self.h.size(1)!=bs: self.init_hidden(bs)
        outp, h = self.rnn(self.e(cs), self.h)
        self.h = repackage_var(h)
        return F.log_softmax(self.l_out(outp), dim=-1).view(-1, self.vocab_size)
    
    def init_hidden(self, bs): self.h = V(torch.zeros(1, bs, n_hidden))

In [123]:
m = CharSeqStatefulGRU(md.nt, n_fac, 512).cuda()
opt = optim.Adam(m.parameters(), 1e-3)

In [124]:
fit(m, md, 6, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=6), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      1.761529   1.741525  
    1      1.583943   1.583122                              
    2      1.49457    1.51488                               
    3      1.445536   1.493942                              
    4      1.400485   1.468125                              
    5      1.377548   1.462811                              



[array([1.46281])]

In [125]:
set_lrs(opt, 1e-4)
fit(m, md, 3, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=3), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      1.289591   1.426389  
    1      1.290276   1.421453                              
    2      1.293634   1.420237                              



[array([1.42024])]

In [126]:
from fastai import sgdr
n_hidden = 512

In [144]:
class CharSeqStatefulLSTM(nn.Module):
    def __init__(self, vocab_size, n_fac, bs, nl):
        super().__init__()
        self.vocab_size, self.nl = vocab_size, nl
        self.e = nn.Embedding(vocab_size, n_fac)
        self.rnn = nn.LSTM(n_fac, n_hidden, nl, dropout=0.5)
        self.l_out = nn.Linear(n_hidden, vocab_size)
        self.init_hidden(bs)
        
    def forward(self, cs):
        bs = cs[0].size(0)
        if self.h[0].size(1)!=bs: self.init_hidden(bs)
        outp, h = self.rnn(self.e(cs), self.h)
        self.h = repackage_var(h)
        return F.log_softmax(self.l_out(outp), dim=-1).view(-1, self.vocab_size)
        
    def init_hidden(self, bs): self.h = (V(torch.zeros(self.nl, bs, n_hidden)), 
                                         V(torch.zeros(self.nl, bs, n_hidden)))

In [145]:
m = CharSeqStatefulLSTM(md.nt, n_fac, 512, 2).cuda()
lo = LayerOptimizer(optim.Adam, m, 1e-2, 1e-5)

In [146]:
os.makedirs(f'{PATH}models', exist_ok=True)

In [147]:
fit(m, md, 2, lo.opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=2), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      1.849575   1.752537  
    1      1.729232   1.656549                              


[array([1.65655])]

In [None]:
on_end = lambda sched, cycle: save_model(m, f'{PATH}models/cyc_{cycle}')
cb = [CosAnneal(lo, len(md.trn_dl), cycle_mult=2, on_cycle_end=on_end)]
fit(m, md, 2**4-1, lo.opt, F.nll_loss, callbacks=cb)

HBox(children=(IntProgress(value=0, description='Epoch', max=15), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      1.564539   1.49946   
    1      1.608771   1.535484                              
    2      1.480864   1.441178                              
    3      1.61779    1.550369                              
    4      1.550641   1.490907                              
    5      1.465093   1.423116                              
    6      1.401744   1.387773                              
    7      1.59794    1.524135                              
    8      1.568328   1.513191                              
    9      1.525157   1.48704                               
    10     1.488673   1.447542                              
    11     1.439729   1.414841                              
 93%|█████████▎| 876/942 [00:18<00:01, 48.63it/s, loss=1.4] 

In [138]:
def get_next(inp):
    idxs = TEXT.numericalize(inp)
    p = m(VV(idxs.transpose(0,1)))
    r = torch.multinomial(p[-1].exp(), 1)
    return TEXT.vocab.itos[to_np(r)[0]]


In [139]:
get_next('for thos')

'e'

In [140]:
def get_next_n(inp, n):
    res = inp
    for i in range(n):
        c = get_next(inp)
        res+=c
        inp = inp[1:]+c
    return res

In [142]:
print(get_next_n('for thos', 400))

for those taken," the pahe to this plebeians. in meditary to me! she is perfaction is ofman." insiltly responsibility--the child,howeqe, it cannowards to another; referred) of matters where call to the'hitdelly anadected: the delairs and blood" andfalse", a greater toknows of "rous" friendlines under other hand in its circumstance fromhome-soul-destrayed, and if so richard: when then thoughts to age were 


In [143]:
print(get_next_n('hi my n', 400))

hi my noreternal tender and seguasization: anndeers there--comprating and customs, moral, calledly, "forom ussh of the educate and disguish, with,belief in our "modernthis of lofty entourer,withouting is theporter ofszile ofa will to live moresymentor steep sensual; but has still nouths itself? has so a burden out, (to say of all more", any time to bad onthat "exhy, ""sympathy--then attituation of evenin
