- referrence: https://github.com/ma2rten/seq2seq

# layers

In [38]:
import numpy as np
from numpy.random import rand

def initialize(dim, init_range):
    return rand(*dim) * init_range

class Embedding:
    
    def __init__(self, vocab_size, embed_size, init_range=1):
        self.vocab_size = vocab_size
        self.embed_size = embed_size
        self.W = initialize((vocab_size, embed_size), init_range=1)
        self.params = [
            ('W', self.W, self.dW)
        ]
    
    def initSequence(self):
        self.t =0
        self.x = {}
        self.dW[:] = 0 # reset
    
    def forward(self, x):
        self.x[self.t] = x
        self.t += 1
        return self.W[x]
    
    def backward(self, delta):
        self.t -= 1
        x = self.x[self.t]
        self.dW[x] += delta

# LSTM
- img link: https://sergioskar.github.io/Bitcon_prediction_LSTM/

![](https://sergioskar.github.io/assets/img/posts/lstm_equations.jpg)

In [55]:
def zeros(*dim):
    return np.zeros(dim)

class LSTM:
    
    def __init__(self, input_size, hidden_size, init_range=1, previous=None):
        self.input_size, self.hidden_size = input_size, hidden_size
        
        if previous:
            self.previous = previous
            previous.next = self
        
        def init(x, y):
            return initialize((x, y), init_range)
    
        h, n = hidden_size, input_size
        
        self.W_hi, self.W_hf, self.W_ho, self.W_hj =\
            init(h, h), init(h, h), init(h, h), init(h, h)
        self.W_xi, self.W_xf, self.W_xo, self.W_xj =\
            init(h, n), init(h, n), init(h, n), init(h, n)
        self.b_i, self.b_f, self.b_o, self.b_j =\
            zeros(h), ones(h)*3, zeros(h), zeros(h)
        
        # initialize gradients
        self.dW_hi, self.dW_hf, self.W_ho, self.W_hj =\
            zeros(h, h), zeros(h, h), zeros(h, h), zeros(h, h)
        self.dW_xi, self.dW_xf, self.dW_xo, self.dW_xj =\
            zeros(h, n), zeros(h, n), zeros(h, n), zeros(h, n)
        self.db_i, self.db_f, self.db_o, self.db_j =\
            zeros(h), zeros(h), zeros(h), zeros(h)
        
        # name, param, grad
        self.params = [
            ('W_hi', self.W_hi, self.dW_hi),
            ('W_hf', self.W_hf, self.dW_hf),
            ('W_ho', self.W_ho, self.dW_ho),
            ('W_hj', self.W_hj, self.dW_hj),
            
            ('W_xi', self.W_xi, self.dW_xi),
            ('W_xf', self.W_xf, self.dW_xf),
            ('W_xo', self.W_xo, self.dW_xo),
            ('W_xj', self.W_xj, self.dW_xj),
            
            ('b_i', self.b_i, self.db_i),
            ('b_f', self.b_f, self.db_f),
            ('b_o', self.b_o, self.db_o),
            ('b_j', self.b_j, self.db_j),
        ]
        self.initSequence()
        
    def initSequence(self):
        self.t = 0
        self.x = {}
        self.h = {}
        self.c = {}
        self.ct = {}
        
        self.input_gate = {}
        self.forget_gate = {}
        self.output_gate = {}
        self.cell_update = {}
        
        if has_attr(self, 'previous') :
            self.h[0] = self.previous.h[self.previous.t]
            self.c[0] = self.previous.c[self.previous.t]
        else:
            self.h[0] = zeros(self.hidden_size)
            self.c[0] = zeros(self.hidden_size)
        
        if has_attr(self, 'next'):
            self.dh_prev = self.next.dh_prev
            self.dc_prev = self.next.dc_prev
        else:
            self.dh_prev = zeros(self.hidden_size)
            self.dc_prev = zeros(self.hidden_size)
        
        for name, param, grad in self.params:
            grad[:] = 0
    
    def forward(self, x_t):
        self.t += 1
        
        t = self.t
        h = self.h[t-1]
        
        self.forget_gate[t] = sigmoid(np.dot(self.W_hf, h) + np.dot(self.W_xf, x_t) + self.b_f)
        self.cell_update[t] = tanh(np.dot(self.W_hj, h) + np.dot(self.W_xj, x_t) + self.b_j)
        self.input_gate[t] = sigmoid(np.dot(self.W_hi, h) + np.dot(self.W_xi, x_t) + self.b_i)
        self.output_gate[t] = sigmoid(np.dot(self.W_ho, h) + np.dot(self.W_xo, x_t) + self.b_o)
        
        self.c[t] = self.forget_gate[t] * self.c[t-1] + self.input_gate[t] * self.cell_update[t]