In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 
%reload_ext autoreload
%autoreload 2
%matplotlib inline
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

from fastai.io import *
from fastai.conv_learner import *

from fastai.column_data import *

# Any results you write to the current directory are saved as output.

  from numpy.core.umath_tests import inner1d


In [2]:
PATH = 'data/nietzsche/'


In [4]:
text = open(f'{PATH}nietzsche.txt').read()
print('corpus length:', len(text))

corpus length: 600893


In [5]:
text[:400]

'PREFACE\n\n\nSUPPOSING that Truth is a woman--what then? Is there not ground\nfor suspecting that all philosophers, in so far as they have been\ndogmatists, have failed to understand women--that the terrible\nseriousness and clumsy importunity with which they have usually paid\ntheir addresses to Truth, have been unskilled and unseemly methods for\nwinning a woman? Certainly she has never allowed herself '

In [6]:
chars = sorted(list(set(text)))
vocab_size = len(chars)+1
print('total chars:', vocab_size)

total chars: 85


In [7]:
#adding 0 value for padding:
chars.insert(0, "\0")
''.join(chars[1:-6])

'\n !"\'(),-.0123456789:;=?ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxy'

In [8]:
#map from chars - indices and back
char_indices = {c: i for i, c in enumerate(chars)}
indices_char = {i: c for i, c in enumerate(chars)}

In [9]:
# idx converts all characters to their index
idx = [char_indices[c] for c in text]
idx[:10]

[40, 42, 29, 30, 25, 27, 29, 1, 1, 1]

In [10]:
 ''.join(indices_char[i] for i in idx[:70])

'PREFACE\n\n\nSUPPOSING that Truth is a woman--what then? Is there not gro'

# Three char model

**create inputs**

In [None]:
#a list of every 4th char
cs=3
c1_dat = [idx[i] for i in range(0, len(idx)-cs,cs)]
c2_dat = [idx[i+1] for i in range(0, len(idx)-cs,cs)]
c3_dat = [idx[i+2] for i in range(0, len(idx)-cs,cs)]
c4_dat = [idx[i+3] for i in range(0, len(idx)-cs,cs)]

In [None]:
#our inputs
x1 = np.stack(c1_dat)
x2 = np.stack(c2_dat)
x3 = np.stack(c3_dat)

In [None]:
#our input
y = np.stack(c4_dat)

In [None]:
#1st 4 inputs & outputs
x1[:4], x2[:4], x3[:4]

In [None]:
y[:4]

In [None]:
x1.shape, y.shape

**create and train model**

In [23]:
#size of hidden state
n_hidden = 256

In [24]:
#size of embedding matrix
n_fac = 42


In [None]:
class Char3Model(nn.Module):
    def __init__(self, vocab_size, n_fac):
        super().__init__()
        self.e = nn.Embedding(vocab_size, n_fac)
        
        #layer operation from input to hidden
        self.l_lin = nn.Linear(n_fac,n_hidden)
        
        #layer operationg from hidden to hidden
        self.l_hidden = nn.Linear(n_hidden, n_hidden)
        
        #layer operation from hidden to output
        self.l_out = nn.Linear(n_hidden, vocab_size)
    
    def forward(self, c1, c2,c3):
        in1 = F.relu(self.l_lin(self.e(c1)))
        in2 = F.relu(self.l_lin(self.e(c2)))
        in3 = F.relu(self.l_lin(self.e(c3)))
        
        h = V(torch.zeros(in1.size()).cuda())
        h = F.tanh(self.l_hidden(h+in1))
        h = F.tanh(self.l_hidden(h+in2))
        h = F.tanh(self.l_hidden(h+in3))
        
        return F.log_softmax(self.l_out(h))

In [None]:
md = ColumnarModelData.from_arrays('.',[-1], np.stack([x1,x2,x3],axis=1),y,bs=512)

In [None]:
m = Char3Model(vocab_size,n_fac).cuda()

In [None]:
it = iter(md.trn_dl)
*xs,yt = next(it)
t = m(*V(xs))

In [None]:
opt = optim.Adam(m.parameters(), 1e-2)

In [None]:
fit(m,md,1,opt,F.nll_loss)

In [None]:
set_lrs(opt, 0.001)

In [None]:
fit(m,md,1,opt,F.nll_loss)

## Testing

In [None]:
def get_next(inp):
    idxs = T(np.array([char_indices[c] for c in inp]))
    p = m(*VV(idxs))
    i = np.argmax(to_np(p))
    return chars[i]

In [None]:
get_next('car')

In [None]:
get_next('see')

In [None]:
get_next('pla')

# Our first RNN

Lets create the inputs:


In [11]:
#size of our unrolled RNN
cs=8

For each 0 -7 create a list of every 8th character with that starting point. WHich will make the 8 inputs to our model.

In [12]:
c_in_dat = [[idx[i+j] for i in range(cs)] for j in range(len(idx) -cs)]

Then create a list of the next charater in each of these series, which will be our labels:

In [13]:
c_out_dat = [idx[j+cs] for j in range(len(idx)-cs)]

In [14]:
xs = np.stack(c_in_dat, axis=0)

In [15]:
xs.shape

(600885, 8)

In [16]:
y = np.stack(c_out_dat)

In [17]:
xs[:cs,:cs]

array([[40, 42, 29, 30, 25, 27, 29,  1],
       [42, 29, 30, 25, 27, 29,  1,  1],
       [29, 30, 25, 27, 29,  1,  1,  1],
       [30, 25, 27, 29,  1,  1,  1, 43],
       [25, 27, 29,  1,  1,  1, 43, 45],
       [27, 29,  1,  1,  1, 43, 45, 40],
       [29,  1,  1,  1, 43, 45, 40, 40],
       [ 1,  1,  1, 43, 45, 40, 40, 39]])

This is the next character after each sequence. 

In [18]:
y[:cs]

array([ 1,  1, 43, 45, 40, 40, 39, 43])

# Create and train model

In [19]:
val_idx = get_cv_idxs(len(idx)-cs-1)

In [20]:
md=ColumnarModelData.from_arrays('.',val_idx,xs,y,bs=512)

In [21]:
class CharLoopModel(nn.Module):
    #an RNN
    def __init__(self,vocab_size,n_fac):
        super().__init__()
        self.e = nn.Embedding(vocab_size,n_fac)
        self.l_in = nn.Linear(n_fac,n_hidden)
        self.l_hidden = nn.Linear(n_hidden,n_hidden)
        self.l_out =nn.Linear(n_hidden,vocab_size)
        
    def forward(self,*cs):
        bs = cs[0].size(0)
        h = V(torch.zeros(bs,n_hidden).cuda())
        for c in cs:
            inp = F.relu(self.l_in(self.e(c)))
            h = F.tanh(self.l_hidden(h+inp))
        
        return F.log_softmax(self.l_out(h),dim=-1)

In [25]:
m = CharLoopModel(vocab_size,n_fac).cuda()
opt = optim.Adam(m.parameters(),1e-2)

In [26]:
fit(m, md, 1, opt, F.nll_loss)


HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      2.030953   2.040313  



[array([2.04031])]

In [27]:
set_lrs(opt,0.001)

In [28]:
fit(m,md,1,opt,F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      1.736717   1.739696  



[array([1.7397])]

In [35]:
class CharLoopConcatModel(nn.Module):
    def __init__(self, vocab_size, n_fac):
        super().__init__()
        self.e = nn.Embedding(vocab_size, n_fac)
        self.l_in = nn.Linear(n_fac+n_hidden, n_hidden)
        self.l_hidden = nn.Linear(n_hidden, n_hidden)
        self.l_out = nn.Linear(n_hidden, vocab_size)
        
    def forward(self, *cs):
        bs = cs[0].size(0)
        h = V(torch.zeros(bs, n_hidden).cuda())
        for c in cs:
            inp = torch.cat((h, self.e(c)), 1)
            inp = F.relu(self.l_in(inp))
            h = F.tanh(self.l_hidden(inp))
        
        return F.log_softmax(self.l_out(h), dim=-1)


In [36]:
m = CharLoopConcatModel(vocab_size,n_fac).cuda()
opt = optim.Adam(m.parameters(), 1e-3)

In [37]:
it = iter(md.trn_dl)
*xs,yt = next(it)
t = m(*V(xs))

In [38]:
fit(m,md,1,opt,F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      1.821145   1.799464  



[array([1.79946])]

In [39]:
set_lrs(opt, 1e-4)

In [40]:
fit(m, md,1,opt,F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      1.722957   1.722552  



[array([1.72255])]

# Lets test the model

In [41]:
def get_next(inp):
    idxs = T(np.array([char_indices[c] for c in inp]))
    p = m(*VV(idxs))
    i = np.argmax(to_np(p))
    return chars[i]

In [42]:
get_next('for thos')

'e'

In [44]:
get_next('part of ')

't'

In [45]:
get_next('shi')

'n'

In [46]:
get_next('tr')

'o'

# RNN with pytorch

In [47]:
class CharRNN(nn.Module):
    def __init__(self,vocab_size,n_fac):
        super().__init__()
        self.e = nn.Embedding(vocab_size, n_fac)
        self.rnn = nn.RNN(n_fac,n_hidden)
        self.l_out = nn.Linear(n_hidden,vocab_size)
        
    def forward(self, *cs):
        bs = cs[0].size(0)
        h = V(torch.zeros(1, bs, n_hidden))
        inp = self.e(torch.stack(cs))
        outp,h = self.rnn(inp, h)
        
        return F.log_softmax(self.l_out(outp[-1]),dim=-1)

In [49]:
m = CharRNN(vocab_size, n_fac).cuda()
opt = optim.Adam(m.parameters(),1e-3)

In [50]:
it = iter(md.trn_dl)
*xs,yt = next(it)

In [51]:
t = m.e(V(torch.stack(xs)))
t.size()

torch.Size([8, 512, 42])

In [52]:
ht = V(torch.zeros(1,512,n_hidden))
outp,hn = m.rnn(t,ht)
outp.size(),hn.size()

(torch.Size([8, 512, 256]), torch.Size([1, 512, 256]))

In [53]:
t = m(*V(xs)); t.size()

torch.Size([512, 85])

In [54]:
fit(m, md, 4, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=4), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      1.849165   1.844802  
    1      1.66784    1.669055                              
    2      1.584553   1.59724                               
    3      1.533005   1.553613                              



[array([1.55361])]

In [55]:
set_lrs(opt,1e-4)

In [56]:
fit(m, md, 2, opt, F.nll_loss)

HBox(children=(IntProgress(value=0, description='Epoch', max=2), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      1.458044   1.510942  
    1      1.464209   1.506052                              



[array([1.50605])]

### Lets test the model

In [75]:
def get_next(inp):
    idxs = T(np.array([char_indices[c] for c in inp]))
    p = m(*VV(idxs))
    i = np.argmax(to_np(p))
    return chars[i]

In [76]:
get_next('I am very ')

'p'

In [77]:
def get_next_n(inp, n):
    res = inp
    for i in range(n):
        c = get_next(inp)
        res += c
        inp = inp[1:]+c
    return res

In [78]:
get_next_n('for thos', 40)

'for those of the sense of the sense of the sense'

# Multi-output model

### setup

Lets take non-overlapping sets of characters

In [79]:
c_in_dat = [[idx[i+j] for i in range(cs)] for j in range(0, len(idx)-cs-1,cs)]

We create the exact same thing, offset by 1 as our labels

In [80]:
c_out_dat = [[idx[i+j] for i in range(cs)] for j in range(1, len(idx)-cs,cs)]

In [81]:
xs = np.stack(c_in_dat)
xs.shape

(75111, 8)

In [82]:
ys = np.stack(c_out_dat)
ys.shape

(75111, 8)

In [83]:
xs[:cs,:cs]

array([[40, 42, 29, 30, 25, 27, 29,  1],
       [ 1,  1, 43, 45, 40, 40, 39, 43],
       [33, 38, 31,  2, 73, 61, 54, 73],
       [ 2, 44, 71, 74, 73, 61,  2, 62],
       [72,  2, 54,  2, 76, 68, 66, 54],
       [67,  9,  9, 76, 61, 54, 73,  2],
       [73, 61, 58, 67, 24,  2, 33, 72],
       [ 2, 73, 61, 58, 71, 58,  2, 67]])

In [84]:
ys[:cs,:cs]

array([[42, 29, 30, 25, 27, 29,  1,  1],
       [ 1, 43, 45, 40, 40, 39, 43, 33],
       [38, 31,  2, 73, 61, 54, 73,  2],
       [44, 71, 74, 73, 61,  2, 62, 72],
       [ 2, 54,  2, 76, 68, 66, 54, 67],
       [ 9,  9, 76, 61, 54, 73,  2, 73],
       [61, 58, 67, 24,  2, 33, 72,  2],
       [73, 61, 58, 71, 58,  2, 67, 68]])

### Create and train model

In [85]:
val_idx = get_cv_idxs(len(xs)-cs-1)

In [86]:
md = ColumnarModelData.from_arrays('.', val_idx,xs,ys,bs=512)

In [87]:
class CharSeqRnn(nn.Module):
    def __init__(self, vocab_size, n_fac):
        super().__init__()
        self.e = nn.Embedding(vocab_size, n_fac)
        self.rnn  = nn.RNN(n_fac, n_hidden)
        self.l_out = nn.Linear(n_hidden, vocab_size)
        
    def forward(self, *cs):
        bs = cs[0].size(0)
        h = V(torch.zeros(1, bs, n_hidden))
        inp = self.e(torch.stack(cs))
        outp,h = self.rnn(inp, h)
        return F.log_softmax(self.l_out(outp), dim=-1)

In [89]:
m = CharSeqRnn(vocab_size,n_fac).cuda()
opt = optim.Adam(m.parameters(),1e-3)

In [90]:
it = iter(md.trn_dl)
*xst,yt = next(it)

In [91]:
def nll_loss_seq(inp, targ):
    sl,bs,nh = inp.size()
    targ = targ.transpose(0,1).contiguous().view(-1)
    return F.nll_loss(inp.view(-1,nh), targ)

In [92]:
fit(m, md,4,opt,nll_loss_seq)

HBox(children=(IntProgress(value=0, description='Epoch', max=4), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      2.635968   2.43601   
    1      2.305967   2.208475                              
    2      2.144119   2.091926                              
    3      2.046234   2.015903                              



[array([2.0159])]

In [93]:
set_lrs(opt, 1e-4)

In [94]:
fit(m,md,1,opt,nll_loss_seq)

HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      1.993502   1.998528  



[array([1.99853])]

### Identity init!

In [95]:
m = CharSeqRnn(vocab_size,n_fac).cuda()
opt = optim.Adam(m.parameters(), 1e-2)

In [96]:
m.rnn.weight_hh_l0.data.copy_(torch.eye(n_hidden))


    1     0     0  ...      0     0     0
    0     1     0  ...      0     0     0
    0     0     1  ...      0     0     0
       ...          ⋱          ...       
    0     0     0  ...      1     0     0
    0     0     0  ...      0     1     0
    0     0     0  ...      0     0     1
[torch.cuda.FloatTensor of size 256x256 (GPU 0)]

In [97]:
fit(m,md,4,opt,nll_loss_seq)

HBox(children=(IntProgress(value=0, description='Epoch', max=4), HTML(value='')))

epoch      trn_loss   val_loss                              
    0      2.329707   2.145268  
    1      2.054588   1.99214                               
    2      1.953355   1.931806                              
    3      1.896553   1.895064                             



[array([1.89506])]

In [98]:
set_lrs(opt,1e-3)

In [99]:
fit(m,md,4,opt,nll_loss_seq)

HBox(children=(IntProgress(value=0, description='Epoch', max=4), HTML(value='')))

epoch      trn_loss   val_loss                             
    0      1.801179   1.822663  
    1      1.787866   1.815379                              
    2      1.780794   1.811947                              
    3      1.772717   1.805266                              



[array([1.80527])]

# Stateful model

### Setup

In [100]:

from torchtext import vocab, data

from fastai.nlp import *
from fastai.lm_rnn import *

PATH='data/nietzsche/'

TRN_PATH = 'trn/'
VAL_PATH = 'val/'
TRN = f'{PATH}{TRN_PATH}'
VAL = f'{PATH}{VAL_PATH}'

%ls {PATH}

nietzsche.txt


In [101]:
%ls {PATH}trn

ls: cannot access 'data/nietzsche/trn': No such file or directory


In [None]:
import torch
print(torch.__version__)