# RNN
---

# Import Libraries

In [1]:
import torch
import torch.utils.data
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import numpy as np

In [2]:
lstm = nn.LSTM(3,3)

In [30]:
x = [torch.randn(1,3) for _ in range(5)]
torch.cat(x)


tensor([[ 1.9959, -0.0094, -0.8107],
        [-1.9418, -1.5982, -0.5297],
        [ 0.2225,  0.9908,  0.1086],
        [ 0.5519,  0.6413,  0.2606],
        [ 0.2096, -1.8712, -0.5589]])

In [31]:
torch.cat(x).shape

torch.Size([5, 3])

In [35]:
torch.cat(x).view(5, 1, 3)

tensor([[[ 1.9959, -0.0094, -0.8107]],

        [[-1.9418, -1.5982, -0.5297]],

        [[ 0.2225,  0.9908,  0.1086]],

        [[ 0.5519,  0.6413,  0.2606]],

        [[ 0.2096, -1.8712, -0.5589]]])

In [42]:
out, hid = lstm(torch.cat(x).view(5, 1, 3))

In [43]:
out

tensor([[[ 0.0213, -0.0101,  0.1333]],

        [[ 0.1576, -0.3163,  0.0579]],

        [[ 0.0163, -0.0553,  0.1200]],

        [[ 0.0055,  0.0230,  0.1197]],

        [[ 0.1879, -0.0805,  0.1540]]])

In [44]:
for i in x:
    out, hid = lstm(i.view(1,1,3))
    print(out)

tensor([[[ 0.0213, -0.0101,  0.1333]]])
tensor([[[ 0.1610, -0.2864, -0.0352]]])
tensor(1.00000e-02 *
       [[[-2.9729,  4.6452,  8.1661]]])
tensor(1.00000e-02 *
       [[[ 0.1586,  5.2490,  8.2591]]])
tensor([[[ 0.1929, -0.0836,  0.0769]]])


In [45]:
out.shape

torch.Size([1, 1, 3])

# Hyperparameters

In [2]:
dev = torch.device("cuda:0" if torch.cuda.is_available else "cpu")

In [3]:
epochs = 5
bs = 10
lr = 0.001
emb_sz = 42
hid_sz = 256

# Load Data

In [4]:
PATH = '../data/nietzsche/'

In [5]:
text = open(f'{PATH}/nietzsche.txt').read()

In [6]:
print(text[:400])

PREFACE


SUPPOSING that Truth is a woman--what then? Is there not ground
for suspecting that all philosophers, in so far as they have been
dogmatists, have failed to understand women--that the terrible
seriousness and clumsy importunity with which they have usually paid
their addresses to Truth, have been unskilled and unseemly methods for
winning a woman? Certainly she has never allowed herself 


In [7]:
len(text)

600893

In [8]:
text = text[:len(text)//10]
len(text)

60089

# Preprocess Data

## Vocab

In [9]:
vocab = list(set(text))
len(vocab)

77

In [10]:
vocab.insert(0, '\0')

In [11]:
idx2char = {i:c for i,c in enumerate(vocab)}
char2idx = {c:i for i,c in enumerate(vocab)} 

In [12]:
print(idx2char)
print(char2idx)

{0: '\x00', 1: 's', 2: 'b', 3: '!', 4: '5', 5: '0', 6: 'k', 7: '-', 8: 'z', 9: 'Y', 10: 'P', 11: 'e', 12: 'V', 13: '2', 14: 'c', 15: '7', 16: 'f', 17: '"', 18: 'w', 19: 'L', 20: 'K', 21: 'm', 22: ']', 23: '(', 24: "'", 25: 'E', 26: 'i', 27: 'l', 28: ':', 29: '4', 30: 'H', 31: 'U', 32: 'C', 33: 'T', 34: 'q', 35: 'a', 36: 'B', 37: ',', 38: 'S', 39: 'A', 40: 'o', 41: '?', 42: 'W', 43: '8', 44: '_', 45: 'M', 46: 'h', 47: 'J', 48: '.', 49: '3', 50: '9', 51: 'X', 52: 'y', 53: 'j', 54: 'I', 55: 'v', 56: 'n', 57: 'd', 58: '1', 59: '\n', 60: ')', 61: 'F', 62: 'R', 63: 'u', 64: 'O', 65: 't', 66: '6', 67: 'x', 68: 'D', 69: 'p', 70: 'g', 71: 'Q', 72: '[', 73: 'N', 74: ';', 75: ' ', 76: 'r', 77: 'G'}
{'\x00': 0, 's': 1, 'b': 2, '!': 3, '5': 4, '0': 5, 'k': 6, '-': 7, 'z': 8, 'Y': 9, 'P': 10, 'e': 11, 'V': 12, '2': 13, 'c': 14, '7': 15, 'f': 16, '"': 17, 'w': 18, 'L': 19, 'K': 20, 'm': 21, ']': 22, '(': 23, "'": 24, 'E': 25, 'i': 26, 'l': 27, ':': 28, '4': 29, 'H': 30, 'U': 31, 'C': 32, 'T': 33, 'q'

## Indexifying Characters

In [13]:
data = [char2idx[i] for i in text]


**Sanity Check**

In [14]:
print(data[:400])
print(''.join(idx2char[i] for i in data[:400]))

[10, 62, 25, 61, 39, 32, 25, 59, 59, 59, 38, 31, 10, 10, 64, 38, 54, 73, 77, 75, 65, 46, 35, 65, 75, 33, 76, 63, 65, 46, 75, 26, 1, 75, 35, 75, 18, 40, 21, 35, 56, 7, 7, 18, 46, 35, 65, 75, 65, 46, 11, 56, 41, 75, 54, 1, 75, 65, 46, 11, 76, 11, 75, 56, 40, 65, 75, 70, 76, 40, 63, 56, 57, 59, 16, 40, 76, 75, 1, 63, 1, 69, 11, 14, 65, 26, 56, 70, 75, 65, 46, 35, 65, 75, 35, 27, 27, 75, 69, 46, 26, 27, 40, 1, 40, 69, 46, 11, 76, 1, 37, 75, 26, 56, 75, 1, 40, 75, 16, 35, 76, 75, 35, 1, 75, 65, 46, 11, 52, 75, 46, 35, 55, 11, 75, 2, 11, 11, 56, 59, 57, 40, 70, 21, 35, 65, 26, 1, 65, 1, 37, 75, 46, 35, 55, 11, 75, 16, 35, 26, 27, 11, 57, 75, 65, 40, 75, 63, 56, 57, 11, 76, 1, 65, 35, 56, 57, 75, 18, 40, 21, 11, 56, 7, 7, 65, 46, 35, 65, 75, 65, 46, 11, 75, 65, 11, 76, 76, 26, 2, 27, 11, 59, 1, 11, 76, 26, 40, 63, 1, 56, 11, 1, 1, 75, 35, 56, 57, 75, 14, 27, 63, 21, 1, 52, 75, 26, 21, 69, 40, 76, 65, 63, 56, 26, 65, 52, 75, 18, 26, 65, 46, 75, 18, 46, 26, 14, 46, 75, 65, 46, 11, 52, 75, 46, 3

## 1. Char3 Data

In [15]:
seq=3
a = np.arange(10)

In [23]:
seq = 3
x1 = [data[i] for i in range(0, len(data)-seq, seq)]
x2 = [data[i+1] for i in range(0, len(data)-seq, seq)]
x3 = [data[i+2] for i in range(0, len(data)-seq, seq)]
x4 = [data[i+3] for i in range(0, len(data)-seq, seq)]

In [44]:
len(x1), len(x2), len(x3), len(x4)

(2002, 2002, 2002, 2002)

In [18]:
x1 = np.stack(x1)
x2 = np.stack(x2)
x3 = np.stack(x3)
x4 = np.stack(x4)

In [19]:
print(data[:10])
print(x1[:10])
print(x2[:10])
print(x3[:10])
print(x4[:10])

[59, 24, 82, 15, 69, 47, 82, 68, 68, 68]
[59 15 82 68 59  1 64 75 25 51]
[24 69 68  1 59  8 25 45 32 62]
[82 47 68 35 80 52 62 62 22 75]
[15 82 68 59  1 64 75 25 51 25]


In [20]:
x = torch.tensor(np.stack([x1, x2, x3], axis=1), dtype=torch.long)
y = torch.tensor(x4, dtype=torch.long)

In [21]:
x.shape

torch.Size([200297, 3])

In [22]:
x[:5]

tensor([[ 59,  24,  82],
        [ 15,  69,  47],
        [ 82,  68,  68],
        [ 68,   1,  35],
        [ 59,  59,  80]])

In [23]:
y[:5]

tensor([ 15,  82,  68,  59,   1])

In [24]:
trn_set = torch.utils.data.TensorDataset(x, y)
trn_ldr = torch.utils.data.DataLoader(dataset=trn_set, batch_size=bs, shuffle=False)

In [25]:
del x,y

In [26]:
import gc
gc.collect()

0

**Sanity Check**

In [27]:
it = iter(trn_ldr)
x1, y1 = it.next()

In [28]:
x1.shape, y1.shape

(torch.Size([512, 3]), torch.Size([512]))

In [29]:
x1[0], y1[0]

(tensor([ 59,  24,  82]), tensor(15))

In [30]:
x1_1, x1_2, x1_3 = x1[:,0], x1[:,1], x1[:,2] 

In [31]:
x1_1.shape, x1_2.shape, x1_3.shape

(torch.Size([512]), torch.Size([512]), torch.Size([512]))

In [32]:
x1_1[0], x1_2[0], x1_3[0]

(tensor(59), tensor(24), tensor(82))

## 2. RNN Data

In [16]:
seq=8

In [17]:
x = [[data[j+i] for i in range(seq)] for j in range(len(data)-seq)]
x = np.stack(x, 0)

In [18]:
x.shape, x[0]

((60081, 8), array([10, 62, 25, 61, 39, 32, 25, 59]))

In [19]:
y = [data[i+seq] for i in range(len(data)-seq)]
y = np.stack(y)

In [20]:
y.shape, y[0]

((60081,), 59)

In [21]:
print(x[:5])
print(y[:5])


[[10 62 25 61 39 32 25 59]
 [62 25 61 39 32 25 59 59]
 [25 61 39 32 25 59 59 59]
 [61 39 32 25 59 59 59 38]
 [39 32 25 59 59 59 38 31]]
[59 59 38 31 10]


In [22]:
x = torch.tensor(x, dtype=torch.long)
y = torch.tensor(y, dtype=torch.long)
x.shape, y.shape

(torch.Size([60081, 8]), torch.Size([60081]))

In [23]:
x.permute(1,0).shape

torch.Size([8, 60081])

In [24]:
trn_set = torch.utils.data.TensorDataset(x, y)
trn_ldr = torch.utils.data.DataLoader(dataset=trn_set, batch_size=bs, shuffle=False)

In [25]:
del x,y

In [26]:
import gc
gc.collect()

0

**Sanity Check**

In [27]:
it = iter(trn_ldr)
x1, y1 = it.next()

In [28]:
x1.permute(1,0).shape, y1.shape

(torch.Size([8, 10]), torch.Size([10]))

In [29]:
x1[0], y1[0]

(tensor([ 10,  62,  25,  61,  39,  32,  25,  59]), tensor(59))

# Build Models

## 1. Basic Char3 Model

In [38]:
class Char3(nn.Module):
    
    def __init__(self, emb_sz, hid_sz, vocab_sz):
        super(Char3, self).__init__()
        self.emb = nn.Embedding(vocab_sz, emb_sz)
        self.lin = nn.Linear(emb_sz, hid_sz)
        self.hid = nn.Linear(hid_sz, hid_sz)
        self.lout = nn.Linear(hid_sz, vocab_sz)
    
    def forward(self, x1, x2, x3):
        in1 = F.relu(self.lin(self.emb(x1)))
        in2 = F.relu(self.lin(self.emb(x2)))
        in3 = F.relu(self.lin(self.emb(x3)))
        
        h = torch.zeros(in1.size(), dtype=torch.float).to(dev)
        h = F.tanh(self.hid(h+in1))
        h = F.tanh(self.hid(h+in2))
        h = F.tanh(self.hid(h+in3))
        
        y = F.log_softmax(self.lout(h), dim=-1)
        
        return y

## 2. RNN Model

In [30]:
class RNN(nn.Module):
    
    def __init__(self, emb_sz, hid_sz, vocab_sz):
        super(RNN, self).__init__()
        self.emb = nn.Embedding(vocab_sz, emb_sz)
        self.lin = nn.Linear(emb_sz, hid_sz)
        self.hid = nn.Linear(hid_sz, hid_sz)
        self.lout = nn.Linear(hid_sz, vocab_sz)
    
    def forward(self, *xs):
        bs = xs[0].size(0)
        h = torch.zeros(bs, hid_sz, dtype=torch.float).to(dev)
        for x in xs:
            inp = F.relu(self.lin(self.emb(x)))        
            h = F.tanh(self.hid(h+inp))
        
        y = F.log_softmax(self.lout(h), dim=-1)
        return y


## 3. Pytorch RNN Model

## 4. Identity Initialization

## 5. Multi-output RNN Model

# Training

## 1. Char Model

In [34]:
char3md = Char3(emb_sz, hid_sz, len(vocab)).to(dev)

In [35]:
char3md

Char3(
  (emb): Embedding(85, 42)
  (lin): Linear(in_features=42, out_features=256, bias=True)
  (hid): Linear(in_features=256, out_features=256, bias=True)
  (lout): Linear(in_features=256, out_features=85, bias=True)
)

In [36]:
y1_pred = char3md(x1_1.to(dev), x1_2.to(dev), x1_3.to(dev))

In [37]:
y1_pred.shape

torch.Size([512, 85])

In [38]:
y1_pred = np.argmax(y1_pred.detach().cpu().numpy(), 1)
''.join(idx2char[i] for i in y1_pred)

'"; 1\nPf!V_Spa[gpÆkHÆH5ä]gp\nD]5!?q?7ÆK]7 E5! Hwi[tk2w G5æG-p"]-!_qg?)=\nTDnæH[q]vwp=pÆ_w\n?n[I]EqNHpæf HwæexWp]G!y7Hq=[n);kq5\n__n=pxw_))7R"ppBY9pwn]u\x00[2]H-TnptDMvj["XGm55pDfc75qDHDRH7E[]_pkT GWMfoUpw"5HCi]nqWM55DXHT_!gppCnVK\n5qDHC_K(z_c(MfoU5tnp3T_.M=p?j)!wDP\nw]VldBHpTk[mE_wRn?D7Gæ=TDng[P]rU?5H[)D_pgnp?UÆG]QwæG-i)HTDD\nætdz5HkHpDXÆ]Dn2æH3T_DQVÆD5HU55_w=Æ"..éq!2G7?p!)5])[.\nn[qD\x005HKTy-t[g_65Hq"T,ED("DqthWT7Hk;d"_j_!wq!2G?nh)iWpÆtppÆDppM)np(és\nwqcB])wnP=.nV!)[nVgElx7_EVKpÆ_"!npÆ";Sf5S7w7K)n?DY6Hq_\np"qVDG3PD)RKë!]5'

In [72]:
def train(md, trn_ldr, lr=0.001, epochs=1, name='rnn'):
    md = md.to(dev)
    criterion = nn.NLLLoss()
    optimizer = optim.Adam(md.parameters(), lr)
    
    for epoch in range(epochs):
        for i, (x, y) in enumerate(trn_ldr,0):
            #  preformat input
            x1, x2, x3 = x[:,0].to(dev), x[:,1].to(dev), x[:,2].to(dev)
            y = y.to(dev)
            
            # forward
            out = md(x1,x2,x3)
            loss = criterion(out, y)
            
            # backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # print status
            if (i%50) == 49:
                print("Epoch:{} Iterations:{} Loss:{}".format(epoch+1, i+1, loss.item()))
    # Save
    torch.save(md.state_dict(), "../models/{}.ckpt".format(name))

In [73]:
train(char3md, trn_ldr, 0.001, epochs, name='rnn0')

Epoch:1 Iterations:50 Loss:4.76631498336792
Epoch:1 Iterations:100 Loss:4.85749626159668
Epoch:1 Iterations:150 Loss:4.816066741943359
Epoch:1 Iterations:200 Loss:4.974685192108154
Epoch:1 Iterations:250 Loss:4.972196102142334
Epoch:1 Iterations:300 Loss:4.90360164642334
Epoch:1 Iterations:350 Loss:4.88721227645874
Epoch:2 Iterations:50 Loss:4.740199089050293
Epoch:2 Iterations:100 Loss:4.835621356964111
Epoch:2 Iterations:150 Loss:4.78948450088501
Epoch:2 Iterations:200 Loss:4.947761535644531
Epoch:2 Iterations:250 Loss:4.953901290893555
Epoch:2 Iterations:300 Loss:4.8849287033081055
Epoch:2 Iterations:350 Loss:4.870678901672363
Epoch:3 Iterations:50 Loss:4.716228008270264
Epoch:3 Iterations:100 Loss:4.816891193389893
Epoch:3 Iterations:150 Loss:4.7695231437683105
Epoch:3 Iterations:200 Loss:4.924459934234619
Epoch:3 Iterations:250 Loss:4.939205646514893
Epoch:3 Iterations:300 Loss:4.870082378387451
Epoch:3 Iterations:350 Loss:4.858486652374268
Epoch:4 Iterations:50 Loss:4.69789600372

## 2. RNN

In [32]:
rnn = RNN(emb_sz, hid_sz, len(vocab))

In [33]:
rnn

RNN(
  (emb): Embedding(78, 42)
  (lin): Linear(in_features=42, out_features=256, bias=True)
  (hid): Linear(in_features=256, out_features=256, bias=True)
  (lout): Linear(in_features=256, out_features=78, bias=True)
)

In [None]:
rnn

In [None]:
y1_pred = rnn(x1.to(dev))

In [145]:
y1_pred.shape

torch.Size([4, 85])

In [146]:
y1_pred = np.argmax(y1_pred.detach().cpu().numpy(), 1)
''.join(idx2char[i] for i in y1_pred)

'""FZ'

In [58]:
t = iter(trn_ldr)

In [60]:
(*x, y) = next(t)

In [63]:
len(x), len(x[0]), len(y)

(1, 512, 512)

In [147]:
def train(md, trn_ldr, lr=0.001, epochs=1, name='rnn'):
    md = md.to(dev)
    criterion = nn.NLLLoss()
    optimizer = optim.Adam(md.parameters(), lr)
    
    for epoch in range(epochs):
        for i, (x, y) in enumerate(trn_ldr,0):
            #  preformat input
            x = x.permute(1,0)
            x, y = x.to(dev), y.to(dev)
            
            # forward
            out = md(x1,x2,x3)
            loss = criterion(out, y)
            
            # backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # print status
            if (i%50) == 49:
                print("Epoch:{} Iterations:{} Loss:{}".format(epoch+1, i+1, loss.item()))
    # Save
    torch.save(md.state_dict(), "../models/{}.ckpt".format(name))

In [148]:
train(char3md, trn_ldr, 0.001, epochs, name='rnn1')

Epoch:1 Iterations:50 Loss:6.045565605163574
Epoch:1 Iterations:100 Loss:5.852102279663086
Epoch:1 Iterations:150 Loss:5.7982869148254395
Epoch:1 Iterations:200 Loss:5.889673709869385
Epoch:1 Iterations:250 Loss:5.715638160705566
Epoch:1 Iterations:300 Loss:5.8514404296875
Epoch:1 Iterations:350 Loss:5.752878665924072
Epoch:1 Iterations:400 Loss:5.748596668243408
Epoch:1 Iterations:450 Loss:5.922341346740723
Epoch:1 Iterations:500 Loss:5.782570838928223
Epoch:1 Iterations:550 Loss:5.582164764404297
Epoch:1 Iterations:600 Loss:5.74899959564209
Epoch:1 Iterations:650 Loss:5.5965576171875
Epoch:1 Iterations:700 Loss:5.500996112823486
Epoch:1 Iterations:750 Loss:5.859821796417236
Epoch:1 Iterations:800 Loss:5.688898086547852
Epoch:1 Iterations:850 Loss:5.668003082275391
Epoch:1 Iterations:900 Loss:5.746268272399902
Epoch:1 Iterations:950 Loss:5.67934513092041
Epoch:1 Iterations:1000 Loss:5.557328224182129
Epoch:1 Iterations:1050 Loss:5.622857570648193
Epoch:1 Iterations:1100 Loss:5.7341337

In [149]:
train(char3md, trn_ldr, 0.01, 2, 'rnn2')

Epoch:1 Iterations:50 Loss:5.958637237548828
Epoch:1 Iterations:100 Loss:5.6186299324035645
Epoch:1 Iterations:150 Loss:5.667288780212402
Epoch:1 Iterations:200 Loss:5.905817031860352
Epoch:1 Iterations:250 Loss:5.658107280731201
Epoch:1 Iterations:300 Loss:5.804039001464844
Epoch:1 Iterations:350 Loss:5.753924369812012
Epoch:1 Iterations:400 Loss:5.762933731079102
Epoch:1 Iterations:450 Loss:5.930180549621582
Epoch:1 Iterations:500 Loss:5.850101947784424
Epoch:1 Iterations:550 Loss:5.631256103515625
Epoch:1 Iterations:600 Loss:5.76055383682251
Epoch:1 Iterations:650 Loss:5.601725101470947
Epoch:1 Iterations:700 Loss:5.539243221282959
Epoch:1 Iterations:750 Loss:5.924337863922119
Epoch:1 Iterations:800 Loss:5.716416358947754
Epoch:1 Iterations:850 Loss:5.7156453132629395
Epoch:1 Iterations:900 Loss:5.855612277984619
Epoch:1 Iterations:950 Loss:5.806646823883057
Epoch:1 Iterations:1000 Loss:5.676392555236816
Epoch:1 Iterations:1050 Loss:5.733517169952393
Epoch:1 Iterations:1100 Loss:5.8

# Testing

## 1. CharModel

In [176]:
def predict(inp):
    x = torch.tensor(np.array([char2idx[c] for c in inp]))
    p = char3md(x[0].to(dev), x[1].to(dev), x[2].to(dev))
    print(p)
    i = np.argmax(p.detach().cpu().numpy(),0)
    return idx2char[i]

In [172]:
predict('and')

'='

In [173]:
predict(' an')

'='

In [61]:
predict(' is')

' '

In [63]:
predict(' yo')

'u'

In [70]:
predict('The')

'y'

## 2. RNN

In [192]:
y1 = rnn(x1.to(dev))

In [185]:
y1 = np.argmax(y1.detach().cpu().numpy(), 1)

In [189]:
''.join(idx2char[i] for i in x1)

KeyError: tensor([ 31,  33,  75,   1])

In [187]:
''.join(idx2char[i] for i in y1)

'""FZ'