In [1]:
from abc import ABC, abstractmethod
import numpy as np

c:\users\temil\onedrive\documents\codes_and_scripts\python_envs\pytorch-tf2-env\lib\site-packages\numpy\.libs\libopenblas.NOIJJG62EMASZI6NYURL6JBKM4EVBGM7.gfortran-win_amd64.dll
c:\users\temil\onedrive\documents\codes_and_scripts\python_envs\pytorch-tf2-env\lib\site-packages\numpy\.libs\libopenblas.PYQHXLVVQ7VESDPUVUADXEVJOBGHJPAY.gfortran-win_amd64.dll
  stacklevel=1)


In [36]:
class RNN(ABC):
    """
    Base RNN class
    """
    _named_parameters = None
    _hidden_state_activation = None
    _output_activation = None
    _initalizer = None
    _a_prev = None
    _a_next = None
    _y_output = None
    _c_next = None
    
    def __init__(self):
        pass    
    
    
    @property
    def hidden_state(self):
        return self._a_next
    
    @property
    def prev_hidden_state(self):
        return self._a_prev
    
    @property
    def cell_state(self):
        return self._c_next
    
    @property
    def output(self):
        return self._y_output
    
    @property
    def named_parameters(self):
        return self._named_parameters
    
    
    @abstractmethod
    def random_initializer(self):
        pass
    
    @abstractmethod
    def forward(self, x, a_prev, c_prev=None):
        pass
    
    @abstractmethod
    def backward(self):
        pass

    
    @classmethod
    def tanh(cls, x):
        return np.tanh(x)
    
    @classmethod
    def softmax(cls, x):
        x = np.exp(x)
        return x/np.sum(x, axis=0, keepdims=True)
    
    @classmethod
    def sigmoid(cls, x):
        return 1/(1 + np.exp(-x))
    
    def initialize_parameters(self):
        initializer_fn = getattr(self, self._initalizer)
        return initializer_fn()
    
    def hidden_state_activation(self, x):
        hidden_state_activation_fn = getattr(self, self._hidden_state_activation)
        return hidden_state_activation_fn(x)
    
    def output_activation(self, x):
        output_activation_fn = getattr(self, self._output_activation)
        return output_activation_fn(x)
    
    def gate_activation(self, x):
        gate_activation_fn = getattr(self, self._gate_activation)
        return gate_activation_fn(x)
    
    

In [37]:
class VanillaRNN(RNN):
    """
    Vanilla RNN class
    
    Args:
         input_dims (int) - input dimension
         output_dims (int) - output dimension
         hidden_state_dims (int) - size of hidden state
         initalizer (str) - type of initializer
         hidden_state_activation (str) - hidden state activation
         output_activation (str) -  output activation
         
    properties:
        prev_hidden_state (ndarray) - previous hidden state tensor
        hidden_state (ndarray) - hidden state tensor
        output (ndarray) - output tensor
        named_parameters (dict) - timestep parameters
    
    """
    def __init__(self, input_dims,
                       output_dims,
                       hidden_state_dims,
                       initalizer="random_initializer",
                       hidden_state_activation="tanh",
                       output_activation="softmax"
                ):
        self._input_dims = input_dims
        self._hidden_state_dims = hidden_state_dims
        self._output_dims = output_dims
        self._initalizer = initalizer
        self._named_parameters = self.initialize_parameters()
        self._hidden_state_activation = hidden_state_activation
        self._output_activation = output_activation
    
    
    def __call__(self, x, a_prev, c_prev=None):
        return self.forward(x, a_prev)
        
        
    @property
    def cell_state(self):
        raise NotImplementedError("Vanilla RNNs have no cell state")
        
    def forward(self, x, a_prev):
        self._a_prev = a_prev
        weights = np.hstack([self.named_parameters["Waa"], self.named_parameters["Wax"]])
        inputs = np.vstack([a_prev, x])
        Wy = self.named_parameters["Wya"]
        ba = self.named_parameters["ba"]
        by = self.named_parameters["by"]
        self._a_next = self.hidden_state_activation(weights.dot(inputs) + ba)
        self._y_output = self.output_activation(Wy.dot(self._a_next) + by)
        return self._a_next, self._c_next
    
    def backward(self):
        pass
    
    def random_initializer(self):
        return {
            "Wax": np.random.rand(self._hidden_state_dims, self._input_dims),
            "Waa": np.random.rand(self._hidden_state_dims, self._hidden_state_dims),
            "Wya": np.random.rand(self._output_dims, self._hidden_state_dims),
            "ba" : np.random.rand(self._hidden_state_dims, 1),
            "by" : np.random.rand(self._output_dims, 1)
        }
    

In [38]:
class GRU(RNN):
    pass
    

In [39]:
class LSTM(RNN):
    """
    LSTM RNN class
    
    Args:
         input_dims (int) - size of vocabulary
         hidden_state_dims (int) - size of hidden state
         output_dims (int) - output dimension
         initalizer (str) - type of initializer
         hidden_state_activation (str) - hidden state activation
         output_activation (str) -  output activation
         
    properties:
        prev_hidden_state (ndarray) - previous hidden state tensor
        hidden_state (ndarray) - hidden state tensor
        output (ndarray) - output tensor
        named_parameters (dict) - timestep parameters
    
    """
    def __init__(self, input_dims,
                       output_dims,
                       hidden_state_dims,
                       initalizer="random_initializer",
                       gate_activation="sigmoid",
                       hidden_state_activation="tanh",
                       output_activation="softmax"):
        
        self._hidden_state_dims = hidden_state_dims
        self._input_dims = input_dims
        self._output_dims = output_dims
        self._initalizer = initalizer
        self._named_parameters = self.initialize_parameters()
        self._gate_activation = gate_activation
        self._hidden_state_activation = hidden_state_activation
        self._output_activation = output_activation
    
    
    def __call__(self, x, a_prev, c_prev):
        return self.forward(x, a_prev, c_prev)

        
    def forward(self, x, a_prev, c_prev):
        self._a_prev = a_prev
        self._c_prev = c_prev
        concat = np.concatenate([a_prev, x], axis=0)
        
        Wf = self.named_parameters["Wf"]
        bf = self.named_parameters["bf"]
        Wi = self.named_parameters["Wi"]
        bi = self.named_parameters["bi"]
        Wc = self.named_parameters["Wc"]
        bc = self.named_parameters["bc"]
        Wo = self.named_parameters["Wo"]
        bo = self.named_parameters["bo"]
        Wy = self.named_parameters["Wy"]
        by = self.named_parameters["by"]
        
        forget_gate = self.gate_activation(Wf.dot(concat) + bf)
        update_gate = self.gate_activation(Wi.dot(concat) + bi)
        output_gate = self.gate_activation(Wo.dot(concat) + bo)
        candidate = self.hidden_state_activation(Wc.dot(concat) + bc)
        self._c_next = forget_gate*c_prev + update_gate*candidate
        self._a_next = output_gate * self.hidden_state_activation(self._c_next)
        self._y_output = self.output_activation(Wy.dot(self._c_next) + by)
        return self._a_next, self._c_next
    
    def backward(self):
        pass
    
    def random_initializer(self):
        return {
            "Wf": np.random.rand(self._hidden_state_dims, self._hidden_state_dims + self._input_dims),
            "Wi": np.random.rand(self._hidden_state_dims, self._hidden_state_dims + self._input_dims),
            "Wc": np.random.rand(self._hidden_state_dims, self._hidden_state_dims + self._input_dims),
            "Wo": np.random.rand(self._hidden_state_dims, self._hidden_state_dims + self._input_dims),
            "Wy": np.random.rand(self._output_dims, self._hidden_state_dims),
            "bf" : np.random.rand(self._hidden_state_dims, 1),
            "bi" : np.random.rand(self._hidden_state_dims, 1),
            "bc" : np.random.rand(self._hidden_state_dims, 1),
            "bo" : np.random.rand(self._hidden_state_dims, 1),
            "by" : np.random.rand(self._output_dims, 1),
        }
    

In [40]:
class SequenceModel:
    """
    Sequence Model
    
    Args:
         num_time_steps - number of time steps
         rnn_model_class - class of RNN model
    """
    def __init__(self, num_time_steps, rnn_model_class, kwargs):
        self.num_time_steps = num_time_steps
        for i in range(num_time_steps):
            setattr(self, f"timestep_{i}", rnn_model_class(**kwargs))
            
    def __call__(self, X, a0, c0):
        a_next = a0
        c_next = c0
        for i in range(self.num_time_steps):
            timestep = getattr(self, f"timestep_{i}")
            a_next, c_next = timestep(X[..., i], a_next, c_next)
        return a_next, c_next

In [41]:
np.random.seed(1)
num_timesteps = 3
input_dims = 3
hidden_state_dims = 5
output_dims = 2
num_examples = 4

X = np.random.rand(input_dims, num_examples, num_timesteps)
a0 = np.zeros((hidden_state_dims, num_examples))
c0 = np.zeros((hidden_state_dims, num_examples))
model_kwargs = {"input_dims":input_dims, "hidden_state_dims":hidden_state_dims, "output_dims":output_dims}
model = SequenceModel(num_timesteps, VanillaRNN, model_kwargs)
model(X, a0, c0)

(array([[0.99195588, 0.99866091, 0.99926061, 0.99839471],
        [0.99784975, 0.99941239, 0.99957269, 0.99913954],
        [0.9960653 , 0.99951336, 0.99970848, 0.99909163],
        [0.98854368, 0.9956519 , 0.99779915, 0.99780434],
        [0.99812025, 0.99933286, 0.99969128, 0.99974024]]),
 None)

In [42]:
model.timestep_2.named_parameters

{'Wax': array([[0.07197428, 0.96727633, 0.56810046],
        [0.20329323, 0.25232574, 0.74382585],
        [0.19542948, 0.58135893, 0.97001999],
        [0.8468288 , 0.23984776, 0.49376971],
        [0.61995572, 0.8289809 , 0.15679139]]),
 'Waa': array([[0.0185762 , 0.07002214, 0.48634511, 0.60632946, 0.56885144],
        [0.31736241, 0.98861615, 0.57974522, 0.38014117, 0.55094822],
        [0.74533443, 0.66923289, 0.26491956, 0.06633483, 0.3700842 ],
        [0.62971751, 0.21017401, 0.75275555, 0.06653648, 0.2603151 ],
        [0.80475456, 0.19343428, 0.63946088, 0.52467031, 0.92480797]]),
 'Wya': array([[0.26329677, 0.06596109, 0.73506596, 0.77217803, 0.90781585],
        [0.93197207, 0.01395157, 0.23436209, 0.61677836, 0.94901632]]),
 'ba': array([[0.95017612],
        [0.55665319],
        [0.91560635],
        [0.64156621],
        [0.39000771]]),
 'by': array([[0.48599067],
        [0.60431048]])}

In [43]:
model = SequenceModel(num_timesteps, LSTM, model_kwargs)
model(X, a0, c0)

(array([[0.92524232, 0.93645167, 0.93283155, 0.96388035],
        [0.91963357, 0.93599172, 0.92470351, 0.95918422],
        [0.90109836, 0.91461579, 0.90811989, 0.94940092],
        [0.91501721, 0.90004912, 0.89997088, 0.95234815],
        [0.94168865, 0.94655854, 0.94236709, 0.96846044]]),
 array([[2.48596498, 2.22155935, 2.0135528 , 2.57291445],
        [2.33392141, 2.13967363, 1.89781327, 2.5391089 ],
        [2.27439984, 2.04190123, 1.83601401, 2.40743337],
        [2.0905266 , 1.90905341, 1.82754505, 2.27356226],
        [2.41063501, 2.18279401, 2.06094839, 2.56765447]]))

In [44]:
model.timestep_2.named_parameters

{'Wf': array([[0.73324507, 0.74447315, 0.2213967 , 0.21411214, 0.19894792,
         0.14251834, 0.3770826 , 0.02662788],
        [0.11092037, 0.67456402, 0.79977654, 0.08052953, 0.23170231,
         0.20762566, 0.91733356, 0.71131452],
        [0.55388461, 0.30451799, 0.83485405, 0.43530596, 0.92345622,
         0.7060518 , 0.47803131, 0.1262101 ],
        [0.97604355, 0.15983365, 0.20260213, 0.43118176, 0.40420191,
         0.14675148, 0.72931892, 0.18874507],
        [0.64389564, 0.75430595, 0.21073239, 0.60095425, 0.74892838,
         0.63821871, 0.5971273 , 0.29548229]]),
 'Wi': array([[0.73160647, 0.94530844, 0.42556139, 0.78218182, 0.05614104,
         0.8352716 , 0.19225002, 0.39509687],
        [0.30008105, 0.08010364, 0.904631  , 0.37015418, 0.53069744,
         0.49411627, 0.13216114, 0.20645406],
        [0.07618881, 0.5079217 , 0.26154955, 0.35706161, 0.10806533,
         0.78755184, 0.10658388, 0.98570882],
        [0.17716116, 0.57240511, 0.04484533, 0.78711629, 0.1896059