In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import SGD 
import numpy as np
import random

## Dataset

In [4]:
cities = ['moscow','saint-petersburg','san-francisco','pekin','kongo']

In [5]:
class Encoder_class():
    def __init__(self):
        self.table = {}
    
    def make_table(self, input_list):
        self.table = {}
        for target in input_list:
            for t in target:
                if t not in self.table:
                    self.table[t] = 1
                else:
                    self.table[t] += 1
        self.table = {letter: i for i,letter in enumerate(self.table)}
        print(self.table)
        
    def encode(self, input_list):
        output_list = []
        for target in input_list:
            tmp = []
            for t in target:
                if t in self.table:
                    tmp.append(self.table[t])
            output_list.append(tmp)
        return output_list

In [6]:
Encoder = Encoder_class()
Encoder.make_table(cities)
Cities_encoded = Encoder.encode(cities)
print(Cities_encoded)

{'m': 0, 'o': 1, 's': 2, 'c': 3, 'w': 4, 'a': 5, 'i': 6, 'n': 7, 't': 8, '-': 9, 'p': 10, 'e': 11, 'r': 12, 'b': 13, 'u': 14, 'g': 15, 'f': 16, 'k': 17}
[[0, 1, 2, 3, 1, 4], [2, 5, 6, 7, 8, 9, 10, 11, 8, 11, 12, 2, 13, 14, 12, 15], [2, 5, 7, 9, 16, 12, 5, 7, 3, 6, 2, 3, 1], [10, 11, 17, 6, 7], [17, 1, 7, 15, 1]]


In [7]:
class Decoder():
    def __init__(self, table_encode):
        self.seqmap = {table_encode[letter]: letter for  letter in table_encode}
    def decode(self, sequence):
        word =''
        for t in sequence:
            word += str(self.seqmap[t])
        return word

In [8]:
Decode = Decoder(Encoder.table)
Decode.decode(Cities_encoded[1])

'saint-petersburg'

## Реализация базовой RNN
<br/>
Скрытый элемент
$$ h_t= tanh⁡ (W_{ℎℎ} h_{t−1}+W_{xh} x_t) $$

In [9]:
class Base_RNN(nn.Module):
    
    def __init__(self, in_size=1, hidden_size=64):
        super(Base_RNN, self).__init__()   
        self.hidden_size = hidden_size
        
        self.Wxh = nn.Linear(in_features=in_size, out_features=hidden_size)
        self.Whh = nn.Linear(in_features=hidden_size, out_features=hidden_size)
        
        self.activation   = nn.Tanh()
        
    def init_weights(self):
        return torch.ones(self.hidden_size)
        
    def forward(self, x, prev_hidden):
        return self.activation(self.Whh(prev_hidden) + self.Wxh(x))

In [10]:
class Clasification_Net(nn.Module):
    
    def __init__(self, RNN, All_out=5):
        super(Clasification_Net, self).__init__()
        
        self.RNN = RNN
        self.fc = nn.Linear(self.RNN.hidden_size, All_out)
        
    def forward(self, x):
        h = self.RNN.init_weights()
        for i in x:
            h = self.RNN(torch.tensor(i, dtype=torch.float).unsqueeze(0), h)
        out = self.fc(h)
        out = F.softmax(out)
        return out

In [12]:
RNN = Base_RNN(1,128)
Net = Clasification_Net(RNN, All_out=5)
print(Net(Cities_encoded[1]))
criterion = nn.MSELoss()
e_cnt     = 1000
optim     = SGD(Net.parameters(), lr = 0.1, momentum=0.9)

tensor([0.1611, 0.2157, 0.2801, 0.1160, 0.2272], grad_fn=<SoftmaxBackward>)


  


In [13]:
CLIP_GRAD = True
Net.train()

for epoch in range(1000):
    loss = 0
    optim.zero_grad()
    num = random.randint(0,4)
    y = Net(Cities_encoded[num])
#     print(y)
    target = torch.zeros(5)
    target[num] = 1
#     print(target)
    loss += criterion(y, target)
     

    loss.backward()
    
    if epoch % 10 == 0:
        print (loss.data.item())
        if CLIP_GRAD: torch.nn.utils.clip_grad_norm_(Net.parameters(), max_norm=5)
    else: 
        if CLIP_GRAD: torch.nn.utils.clip_grad_norm_(Net.parameters(), max_norm=1)
    
    optim.step()

  


0.19067659974098206
0.2815345823764801
0.3986036479473114
0.31188297271728516
0.0029315019492059946
0.30459532141685486
0.36912161111831665
0.318460077047348
1.468395839765435e-05
6.690429290756583e-05
0.00020305762882344425
6.171270439114807e-11
7.893047586549073e-05
0.3978227972984314
1.008259999935035e-07
6.64466351736337e-05
0.3036174774169922
0.377157986164093
3.8892883935659484e-07
4.5087895222950536e-11
3.0954953877948554e-11
0.3024592101573944
0.29865577816963196
0.3937094211578369
1.0668237337085884e-05
2.104532359226141e-05
1.2376398517144338e-11
5.345850498383875e-11
2.47680098652836e-10
0.3044185936450958
2.0734363559427038e-08
0.00010091102012665942
9.37825461733155e-05
2.5127303016070357e-10
5.6296412367373705e-05
0.3695404529571533
0.00022960676869843155
3.441971430095947e-10
3.4879432675438693e-10
9.470592797537975e-16
1.6191626173167606e-07
3.5380190444023185e-10
0.366015762090683
1.6619323162103683e-07
5.083353737944662e-10
2.8351843095464346e-10
0.29105183482170105
5

In [14]:
Error_list = ['moooscow', 'mocow', 'mosco', 'ongo', 'ogo', 'sant-pitirburg', 'saaant-pitiiirburg', 'facico', 'in']
Net.eval()
Error = Encoder.encode(Error_list)
print(Encoder.encode(Error_list))
for city in Error:
    y = Net(city)
    print(cities[y.argmax().item()])

[[0, 1, 1, 1, 2, 3, 1, 4], [0, 1, 3, 1, 4], [0, 1, 2, 3, 1], [1, 7, 15, 1], [1, 15, 1], [2, 5, 7, 8, 9, 10, 6, 8, 6, 12, 13, 14, 12, 15], [2, 5, 5, 5, 7, 8, 9, 10, 6, 8, 6, 6, 6, 12, 13, 14, 12, 15], [16, 5, 3, 6, 3, 1], [6, 7]]
moscow
moscow
kongo
kongo
kongo
saint-petersburg
saint-petersburg
san-francisco
saint-petersburg


  


## Реализовать LSTM

In [15]:
class LSTM(nn.Module):
    
    def __init__(self, in_size=1, hidden_size=64):
        super(LSTM, self).__init__()        
        
        self.hidden_size = hidden_size
        self.Wxc          = nn.Linear(in_features=in_size, out_features=hidden_size)
        self.Whc          = nn.Linear(in_features=hidden_size, out_features=hidden_size)
        
        self.Wxi          = nn.Linear(in_features=in_size, out_features=hidden_size)
        self.Whi          = nn.Linear(in_features=hidden_size, out_features=hidden_size)
        
        self.Wxf          = nn.Linear(in_features=in_size, out_features=hidden_size)
        self.Whf          = nn.Linear(in_features=hidden_size, out_features=hidden_size)
        
        self.Wxo          = nn.Linear(in_features=in_size, out_features=hidden_size)
        self.Who          = nn.Linear(in_features=hidden_size, out_features=hidden_size)
        
        self.activation   = nn.Tanh()
    
    def init_weights(self):
        return torch.ones(self.hidden_size), torch.ones(self.hidden_size)
    
    def forward(self, x, prev_hidden, candidate_state):
        candidate_new = self.activation(self.Wxc(x) + self.Whc(prev_hidden))
        
        input_gate =  F.sigmoid(self.Wxi(x) + self.Whi(prev_hidden))
        forget_gate = F.sigmoid(self.Wxf(x) + self.Whf(prev_hidden))
        output_gate = F.sigmoid(self.Wxf(x) + self.Who(prev_hidden))
        
        cell_state= forget_gate * candidate_state + input_gate * candidate_new
        
        hidden = output_gate * self.activation(cell_state)

        return hidden, cell_state

In [16]:
class Clasification_Net_LSTM(nn.Module):
    
    def __init__(self, RNN, All_out=5):
        super(Clasification_Net_LSTM, self).__init__()
        
        self.RNN = RNN
        self.fc = nn.Linear(self.RNN.hidden_size, All_out)
        
    def forward(self, x):
        h , c = self.RNN.init_weights()
        for i in x:
            h, c = self.RNN(torch.tensor(i, dtype=torch.float).unsqueeze(0), h, c)
        out = self.fc(h)
        out = F.softmax(out)
        return out

In [17]:
RNN = LSTM()
Net = Clasification_Net_LSTM(RNN, All_out=5)
h , c = RNN.init_weights()
print(Net(Cities_encoded[1]))
criterion = nn.MSELoss()
e_cnt     = 1000
optim     = SGD(Net.parameters(), lr = 0.1, momentum=0.9)
print(Net)

tensor([0.2010, 0.1915, 0.1209, 0.1786, 0.3080], grad_fn=<SoftmaxBackward>)
Clasification_Net_LSTM(
  (RNN): LSTM(
    (Wxc): Linear(in_features=1, out_features=64, bias=True)
    (Whc): Linear(in_features=64, out_features=64, bias=True)
    (Wxi): Linear(in_features=1, out_features=64, bias=True)
    (Whi): Linear(in_features=64, out_features=64, bias=True)
    (Wxf): Linear(in_features=1, out_features=64, bias=True)
    (Whf): Linear(in_features=64, out_features=64, bias=True)
    (Wxo): Linear(in_features=1, out_features=64, bias=True)
    (Who): Linear(in_features=64, out_features=64, bias=True)
    (activation): Tanh()
  )
  (fc): Linear(in_features=64, out_features=5, bias=True)
)


  


In [19]:
CLIP_GRAD = True
Net.train()

for epoch in range(100):
    loss = 0
    optim.zero_grad()
    num = random.randint(0,4)
    y = Net(Cities_encoded[num])
#     print(y)
    target = torch.zeros(5)
    target[num] = 1
#     print(target)
    loss = criterion(y, target)
    loss.backward()
    print(loss.grad_fn)
    if epoch % 10 == 0:
        print (loss.data.item())
        if CLIP_GRAD: torch.nn.utils.clip_grad_norm_(Net.parameters(), max_norm=5)
    else: 
        if CLIP_GRAD: torch.nn.utils.clip_grad_norm_(Net.parameters(), max_norm=1)
    
    optim.step()

  


<MseLossBackward object at 0x7ff613bbafd0>
0.15501278638839722
<MseLossBackward object at 0x7ff613bbafd0>
<MseLossBackward object at 0x7ff613bbafd0>
<MseLossBackward object at 0x7ff613bbafd0>
<MseLossBackward object at 0x7ff613bbafd0>
<MseLossBackward object at 0x7ff613bbafd0>
<MseLossBackward object at 0x7ff613bbafd0>
<MseLossBackward object at 0x7ff613bbafd0>
<MseLossBackward object at 0x7ff613bbafd0>
<MseLossBackward object at 0x7ff613bbafd0>
<MseLossBackward object at 0x7ff613bbafd0>
0.05172501131892204
<MseLossBackward object at 0x7ff613bbafd0>
<MseLossBackward object at 0x7ff613bbafd0>
<MseLossBackward object at 0x7ff613bbafd0>
<MseLossBackward object at 0x7ff613bbafd0>
<MseLossBackward object at 0x7ff613bbafd0>
<MseLossBackward object at 0x7ff613bbafd0>
<MseLossBackward object at 0x7ff613bbafd0>
<MseLossBackward object at 0x7ff613bbafd0>
<MseLossBackward object at 0x7ff613bbafd0>
<MseLossBackward object at 0x7ff613bbafd0>
0.05838508903980255
<MseLossBackward object at 0x7ff613bb

In [20]:
Error_list = ['moooscow', 'mocow', 'mosco', 'ongo', 'ogo', 'sant-pitirburg', 'saaant-pitiiirburg', 'sn-facico', 'in']
Net.eval()
Error = Encoder.encode(Error_list)
print(Encoder.encode(Error_list))
for city in Error:
    y = Net(city)
    print(cities[y.argmax().item()])

[[0, 1, 1, 1, 2, 3, 1, 4], [0, 1, 3, 1, 4], [0, 1, 2, 3, 1], [1, 7, 15, 1], [1, 15, 1], [2, 5, 7, 8, 9, 10, 6, 8, 6, 12, 13, 14, 12, 15], [2, 5, 5, 5, 7, 8, 9, 10, 6, 8, 6, 6, 6, 12, 13, 14, 12, 15], [2, 7, 9, 16, 5, 3, 6, 3, 1], [6, 7]]
moscow
moscow
moscow
kongo
kongo
saint-petersburg
saint-petersburg
kongo
pekin


  



# NumPy implementation

In [1]:
class Fully_Connected:
    def __init__(self, in_size, out_size):
        self.W = np.random.normal(scale=1, size=(out_size, in_size))*np.sqrt(2/(out_size + in_size))
#         Example
#         self.W = np.array([[0.1,0.5],[-0.3,0.8]])
        self.b = np.zeros(out_size)
        
    def forward(self, x):
        self.x = x
#         print('Fully_Connected',x.shape)
        return np.dot(self.W, x) + self.b
    
    def backward(self, dz, lr = 0.01):
        dx = np.matmul(dz, self.W)
        db = dz
        dW = np.outer(dz, self.x) #Загуглить
        
        self.W = self.W - dW * lr
        self.b = self.b - db * lr
        return dx  

In [8]:
#Example
lay = Fully_Connected(1,3)
print('W=',lay.W)
x = np.array([0.2])
print('x=',x)
print('z=',lay.forward(x))
print('dx=',lay.backward(np.array([0.44,0.52, 0.3])))

W= [[0.20593141]
 [0.38077013]
 [0.19228071]]
x= [0.2]
z= [0.04118628 0.07615403 0.03845614]
dx= [0.3462945]


In [9]:
class MSEloss:
    def __init__(self):
        pass
    
    def forward(self, x, y):
        self.x = x
        self.y = y
        return sum((x-y)**2)/len(x)
    
    def backward(self, dz = 1, lr = 0.1):
        return 2./len(self.x)*(self.x-self.y)

In [10]:
criterion = MSEloss()

In [17]:
x = np.array([1,3,7,4])
y = np.array([1,2,4,4])
criterion.forward(x,y)
criterion.backward()

array([0. , 0.5, 1.5, 0. ])

In [18]:
class SoftMax:
    def __init__(self):
        pass
    
    def forward(self, x):
        self.x = x
        exps = np.exp(x)
        return exps/sum(exps)
    
    def backward(self, dz, lr = 0.1):
        sm = self.forward(self.x)
        lp = (np.eye(sm.shape[0], sm.shape[0]) - sm).T
        lp2 = sm * lp
        return np.dot(dz, lp2)

In [19]:
Soft = SoftMax()
Soft.forward(x)

array([0.00231533, 0.01710812, 0.93407185, 0.0465047 ])

In [20]:
class Tanh:
    
    def forward(self, x):
        self.x = x
        return np.tanh(x)

    def backward(self, dz, lr=0.1):
        proizv = (np.cosh(self.x)**2 - np.sinh(self.x)**2)/np.cosh(self.x)**2
        return dz * proizv

In [21]:
tan = Tanh()
print(tan.forward(x))
print(tan.backward(x))

[0.76159416 0.99505475 0.99999834 0.9993293 ]
[4.19974342e-01 2.95981115e-02 2.32827654e-05 5.36380273e-03]


In [22]:
class Sigmoid:
    
    def forward(self, x):
        self.x = x
        one = np.ones(len(self.x))
        return one / (one + np.exp(-x))
    
    def backward(self, dz, lr=0.1):
        one = np.ones(len(self.x))
        sig = one / (one + np.exp(-self.x))
        sigma = sig * (one - sig)
        return dz * sigma

In [23]:
Sigm = Sigmoid()
x = np.array([1,2,3])
print(Sigm.forward(x))
print(Sigm.backward(x))

[0.73105858 0.88079708 0.95257413]
[0.19661193 0.20998717 0.13552998]


# RNN

In [24]:
class RNN_cell():
    def __init__(self, in_size=1, hidden_size=64): 
        
        self.hidden_size = hidden_size
        
        self.Wxh = Fully_Connected(in_size, hidden_size)
        self.Whh = Fully_Connected(hidden_size, hidden_size)
        
        self.activation   = Tanh()
        
    def init_weights(self):
        return np.ones(self.hidden_size)
        
    def forward(self, x, prev_hidden):
#         print(x)
#         print(prev_hidden)
#         print(self.Whh.forward(prev_hidden))
#         print(self.Wxh.forward(x))
#         print(self.Whh.forward(prev_hidden) + self.Wxh.forward(x))
#         print(self.activation.forward(self.Whh.forward(prev_hidden) + self.Wxh.forward(x)))
#         print('===========')
        return self.activation.forward(self.Whh.forward(prev_hidden) + self.Wxh.forward(x))
    
    def backward(self, dz, lr =0.1):
        dz = self.activation.backward(dz)
        dWxh = self.Wxh.backward(dz)
        dz = self.Whh.backward(dz)
        return dz

In [25]:
class Clasification_Net():
    
    def __init__(self, RNN, All_out=5):
        
        self.RNN = RNN
        self.fc = Fully_Connected(self.RNN.hidden_size, All_out)
        self.softmax = SoftMax()
        
    def forward(self, x):
        self.x = x
        h = self.RNN.init_weights()
        for i in x:
            h = self.RNN.forward(np.array([i]), h)
#         print(h.shape)
        out = self.fc.forward(h)
        out = self.softmax.forward(out)
        return out
    
    def backward(self, dz, lr = 0.1):
        dz = self.softmax.backward(dz)
        dz = self.fc.backward(dz)
        for i in x:
            dz = self.RNN.backward(dz)
        return dz

In [26]:
RNN = RNN_cell(1,256)
Net = Clasification_Net(RNN, All_out=5)
print(Net.forward(Cities_encoded[1]))
criterion = MSEloss()

[0.06718428 0.06429808 0.0378575  0.65143928 0.17922086]


In [27]:
for epoch in range(1000):
    Loss = 0
    num = random.randint(0,4)
    y = Net.forward(Cities_encoded[num])
    target = np.zeros(5)
    target[num] = 1
    loss = criterion.forward(y , target)
    Loss += loss
    if epoch % 100:
        print('Epoch',Loss)
        Loss = 0
    dz = criterion.backward()
    Net.backward(dz)

Epoch 0.10499751259276371
Epoch 0.14231786159770024
Epoch 0.2609627650357568
Epoch 0.12281929757257855
Epoch 0.10799005221368023
Epoch 0.12681884821275974
Epoch 0.24950477705709737
Epoch 0.18501964733554707
Epoch 0.11945075569302195
Epoch 0.1831435058073955
Epoch 0.22120168877740923
Epoch 0.19900230773310407
Epoch 0.10397447604466656
Epoch 0.1379556726013248
Epoch 0.11126119641957403
Epoch 0.1785803932803271
Epoch 0.1306667989268407
Epoch 0.20158204780348318
Epoch 0.09292290045542402
Epoch 0.0730831030409742
Epoch 0.05684900645389494
Epoch 0.12351372275433017
Epoch 0.10220075603827623
Epoch 0.13558562697281612
Epoch 0.057599496457227894
Epoch 0.10747475514668955
Epoch 0.18322967000505208
Epoch 0.1780103686701147
Epoch 0.1722613927424677
Epoch 0.16594551331927582
Epoch 0.197538387305937
Epoch 0.0492767690201908
Epoch 0.11802216451872119
Epoch 0.1674523217583233
Epoch 0.09879270086561745
Epoch 0.13633225072785937
Epoch 0.11005755446413323
Epoch 0.05241439990147849
Epoch 0.167357944976543

Epoch 0.0025895260851471515
Epoch 0.010884187307551982
Epoch 0.010273876697931474
Epoch 0.0026858321566317614
Epoch 0.013874229328896485
Epoch 0.0030018107204379837
Epoch 0.010256520984763548
Epoch 0.012692243094220956
Epoch 0.010183221385326546
Epoch 0.00963261757923914
Epoch 0.003196462380763086
Epoch 0.009154944120911609
Epoch 0.0029859216242561857
Epoch 0.00872283281405541
Epoch 0.003036911282716906
Epoch 0.008358189431442802
Epoch 0.00286976084862755
Epoch 0.002804611332017953
Epoch 0.0029730661398293776
Epoch 0.0026217416557329735
Epoch 0.014798853348063476
Epoch 0.002905510696496537
Epoch 0.002805167338359906
Epoch 0.008564505153065487
Epoch 0.0081630204802156
Epoch 0.007795372323531213
Epoch 0.002725740752186745
Epoch 0.014271828830769792
Epoch 0.0031889687074395387
Epoch 0.0029624630402362434
Epoch 0.002749409718213954
Epoch 0.0026589497907196567
Epoch 0.0027476594107153944
Epoch 0.0031549405235727877
Epoch 0.002569111453001159
Epoch 0.0024216559355728392
Epoch 0.0140574747788

In [28]:
Error_list = ['moooscow', 'mocow', 'mosco', 'ongo', 'ogo', 'sant-pitirburg', 'saaant-pitiiirburg', 'sn-facico', 'in']
Error = Encoder.encode(Error_list)
print(Encoder.encode(Error_list))
for city in Error:
    y = Net.forward(city)
    print(cities[y.argmax().item()])

[[0, 1, 1, 1, 2, 3, 1, 4], [0, 1, 3, 1, 4], [0, 1, 2, 3, 1], [1, 7, 15, 1], [1, 15, 1], [2, 5, 7, 8, 9, 10, 6, 8, 6, 12, 13, 14, 12, 15], [2, 5, 5, 5, 7, 8, 9, 10, 6, 8, 6, 6, 6, 12, 13, 14, 12, 15], [2, 7, 9, 16, 5, 3, 6, 3, 1], [6, 7]]
moscow
pekin
kongo
kongo
kongo
saint-petersburg
saint-petersburg
san-francisco
saint-petersburg


# LSTM

In [30]:
class LSTM_Cell():
    
    def __init__(self, in_size=1, hidden_size=64):     
        
        self.hidden_size  = hidden_size
        self.Wxc          = Fully_Connected(in_size, hidden_size)
        self.Whc          = Fully_Connected(hidden_size, hidden_size)
        
        self.Wxi          = Fully_Connected(in_size, hidden_size)
        self.Whi          = Fully_Connected(hidden_size, hidden_size)
        
        self.Wxf          = Fully_Connected(in_size, hidden_size)
        self.Whf          = Fully_Connected(hidden_size, hidden_size)
        
        self.Wxo          = Fully_Connected(in_size, hidden_size)
        self.Who          = Fully_Connected(hidden_size, hidden_size)
        
        self.activation   = Tanh()
        self.sigmoid      = Sigmoid()
        
        
    def init_weights(self):
        return np.ones(self.hidden_size), np.ones(self.hidden_size)
    
    def forward(self, x, prev_hidden, candidate_state):
        
        self.candidate_state = candidate_state
        
        self.candidate_new = self.activation.forward(self.Wxc.forward(x) + self.Whc.forward(prev_hidden))
#         print(x.shape)
        self.input_gate =  self.sigmoid.forward(self.Wxi.forward(x) + self.Whi.forward(prev_hidden))
        self.forget_gate = self.sigmoid.forward(self.Wxf.forward(x) + self.Whf.forward(prev_hidden))
        self.output_gate = self.sigmoid.forward(self.Wxo.forward(x) + self.Who.forward(prev_hidden))
        
        self.cell_state_1 = self.forget_gate * self.candidate_state
        
        self.cell_state_2 = self.input_gate * self.candidate_new
        
        self.cell_state= self.cell_state_1 + self.cell_state_2
        
        self.cell_state_act = self.activation.forward(self.cell_state)
        
        hidden = self.output_gate * self.cell_state_act

        return hidden, self.cell_state
    
    def backward(self, dz_h, dz_c = None, lr = 0.1):
        
        d_o = dz_h * self.cell_state_act
        d_o = self.sigmoid.backward(d_o)
        d_ho = self.Who.backward(d_o)
        d_xo = self.Wxo.backward(d_o)
        
        d_cell_state_act = dz_h * self.output_gate
        d_cell_state_act = self.activation.backward(d_cell_state_act)
        
        if dz_c is not None:
            d_c = d_cell_state_act + dz_c
        else: 
            d_c = d_cell_state_act
        
        d_c_prev = d_c * self.output_gate
            
        d_f = d_c * self.candidate_state
        d_f = self.sigmoid.backward(d_f)
        d_hf = self.Whf.backward(d_f)
        d_xf = self.Wxf.backward(d_f)
        
        d_c_cell = d_c * self.input_gate
        d_c_cell = self.activation.backward(d_c_cell)
        d_hc = self.Whc.backward(d_c_cell)
        d_xc = self.Wxc.backward(d_c_cell)
        
        d_i = d_c * self.candidate_new
        d_i = self.sigmoid.backward(d_i)
        d_hi = self.Whi.backward(d_i)
        d_xi = self.Wxi.backward(d_i)
        
        dh = d_ho + d_hf +d_hc + d_hi
        return dh, d_c_prev

In [31]:
class Clasification_Net_LSTM():
    
    def __init__(self, RNN, All_out=5):
        
        self.RNN = RNN
        self.fc = Fully_Connected(self.RNN.hidden_size, All_out)
        self.softmax = SoftMax()
        
    def forward(self, x):
        self.x = x
        h,c = self.RNN.init_weights()
        for i in x:
            h,c = self.RNN.forward(np.array([i]), h , c)
#         print(h.shape)
        out = self.fc.forward(h)
        out = self.softmax.forward(out)
        return out
    
    def backward(self, dz, lr = 0.1):
        dz = self.softmax.backward(dz)
        dz = self.fc.backward(dz)
        dz_h = dz
        dz_c = None
        for i in x:
            dz_h, dz_c = self.RNN.backward(dz_h, dz_c)
        return dz_h, dz_c

In [32]:
RNN = LSTM_Cell(1,256)
Net = Clasification_Net_LSTM(RNN, All_out=5)
print(Net.forward(Cities_encoded[1]))
criterion = MSEloss()

[0.12353056 0.18362896 0.14804153 0.14445545 0.40034351]


In [33]:
for epoch in range(1000):
    Loss = 0
    num = random.randint(0,4)
    y = Net.forward(Cities_encoded[num])
    target = np.zeros(5)
    target[num] = 1
    loss = criterion.forward(y , target)
    Loss += loss
    if epoch % 100:
        print('Epoch',Loss)
        Loss = 0
    dz = criterion.backward()
    Net.backward(dz)

Epoch 0.17249777515553197
Epoch 0.16802417377270054
Epoch 0.1901885435436448
Epoch 0.15217358561165487
Epoch 0.16320528820786373
Epoch 0.18296279302498175
Epoch 0.15883364773826064
Epoch 0.18248281220388138
Epoch 0.18811971413980114
Epoch 0.13413400239504708
Epoch 0.13312767900778416
Epoch 0.15606228428394076
Epoch 0.13346701851347748
Epoch 0.15244192475791346
Epoch 0.18783913444471392
Epoch 0.13435187956653089
Epoch 0.18235867237975661
Epoch 0.148834840021398
Epoch 0.14428770310932879
Epoch 0.1868409372259586
Epoch 0.18538020038334385
Epoch 0.1370786701833077
Epoch 0.18428847836635825
Epoch 0.1535204020709869
Epoch 0.13689412429031306
Epoch 0.1532655896913573
Epoch 0.14183810499878596
Epoch 0.1821741411642271
Epoch 0.18346919578362872
Epoch 0.15286870585412768
Epoch 0.13831038153559277
Epoch 0.1817521963202717
Epoch 0.1386977687478353
Epoch 0.13862195261912197
Epoch 0.1830218682865163
Epoch 0.1814037990810392
Epoch 0.13821238918298623
Epoch 0.1535088723479912
Epoch 0.18189229614125788

Epoch 0.15535637411826125
Epoch 0.03195451513495639
Epoch 0.15474275662457576
Epoch 0.14275082387129273
Epoch 0.03070129985497801
Epoch 0.15420880238118165
Epoch 0.14657784445358774
Epoch 0.1425282732425187
Epoch 0.029725843787659877
Epoch 0.14193117280344633
Epoch 0.028464259614314757
Epoch 0.14133397365370254
Epoch 0.14069360545114415
Epoch 0.02732315109558937
Epoch 0.14711554176815564
Epoch 0.026386448998632694
Epoch 0.14642308108971847
Epoch 0.025517465092498005
Epoch 0.024527559373176168
Epoch 0.14597043376380417
Epoch 0.023781923613011187
Epoch 0.14090277864576328
Epoch 0.140257706300459
Epoch 0.14566440532684827
Epoch 0.16623092458827743
Epoch 0.14490377225018375
Epoch 0.14009069996108986
Epoch 0.15562558535436552
Epoch 0.1442476015380896
Epoch 0.1398384103626482
Epoch 0.15503428767280783
Epoch 0.13936539118662028
Epoch 0.02496780902291345
Epoch 0.13875720426462618
Epoch 0.024046158203183277
Epoch 0.15471619502255513
Epoch 0.023316840874543658
Epoch 0.15407658004354793
Epoch 0.1

In [34]:
Error_list = ['moooscow', 'mocow', 'mosco', 'ongo', 'ogo', 'sant-pitirburg', 'saaant-pitiiirburg', 'sn-facico', 'in']
Error = Encoder.encode(Error_list)
print(Encoder.encode(Error_list))
for city in Error:
    y = Net.forward(city)
    print(cities[y.argmax().item()])

[[0, 1, 1, 1, 2, 3, 1, 4], [0, 1, 3, 1, 4], [0, 1, 2, 3, 1], [1, 7, 15, 1], [1, 15, 1], [2, 5, 7, 8, 9, 10, 6, 8, 6, 12, 13, 14, 12, 15], [2, 5, 5, 5, 7, 8, 9, 10, 6, 8, 6, 6, 6, 12, 13, 14, 12, 15], [2, 7, 9, 16, 5, 3, 6, 3, 1], [6, 7]]
moscow
moscow
moscow
kongo
kongo
saint-petersburg
saint-petersburg
san-francisco
kongo
