In [1]:
from abc import ABC, abstractmethod
import numpy as np

c:\users\temil\onedrive\documents\codes_and_scripts\python_envs\pytorch-tf2-env\lib\site-packages\numpy\.libs\libopenblas.NOIJJG62EMASZI6NYURL6JBKM4EVBGM7.gfortran-win_amd64.dll
c:\users\temil\onedrive\documents\codes_and_scripts\python_envs\pytorch-tf2-env\lib\site-packages\numpy\.libs\libopenblas.PYQHXLVVQ7VESDPUVUADXEVJOBGHJPAY.gfortran-win_amd64.dll
  stacklevel=1)


In [25]:
class RNN(ABC):
    """
    Base RNN class
    """
    _vocab_size = None
    _cell_state_dims = None
    _hidden_state_dims = None
    _output_dims = None
    _named_parameters = None
    _hidden_state_activation = None
    _output_activation = None
    _initalizer = None
    _a_prev = None
    _a_next = None
    _y_output = None
    _c_state = None
    
    def __init__(self):
        pass    
    
    @property
    def vocab_size(self):
        return self._vocab_size
    
    @property
    def cell_state_dims(self):
        return self._cell_state_dims
    
    @property
    def hidden_state_dims(self):
        return self._hidden_state_dims
    
    @property
    def output_dims(self):
        return self._output_dims
    
    @property
    def hidden_state(self):
        return self._a_next
    
    @property
    def prev_hidden_state(self):
        return self._a_prev
    
    @property
    def cell_state(self):
        return self._c_state
    
    @property
    def output(self):
        return self._y_output
    
    @property
    def named_parameters(self):
        return self._named_parameters
    
    
    @abstractmethod
    def random_initializer(self):
        pass
    
    @abstractmethod
    def forward(self, x, a_prev, c_prev=None):
        pass
    
    @abstractmethod
    def backward(self):
        pass

    
    @classmethod
    def tanh(cls, x):
        return np.tanh(x)
    
    @classmethod
    def softmax(cls, x):
        x = np.exp(x)
        return x/np.sum(x, axis=0, keepdims=True)
    
    def initialize_parameters(self):
        initializer_fn = getattr(self, self._initalizer)
        return initializer_fn()
    
    def hidden_state_activation(self, x):
        hidden_state_activation_fn = getattr(self, self._hidden_state_activation)
        return hidden_state_activation_fn(x)
    
    def output_activation(self, x):
        output_activation_fn = getattr(self, self._output_activation)
        return output_activation_fn(x)

In [26]:
class VanillaRNN(RNN):
    """
    Vanilla RNN class
    
    Args:
         vocab_size (int) - size of vocabulary
         hidden_state_dims (int) - size of hidden state
         output_dims (int) - output dimension
         initalizer (str) - type of initializer
         hidden_state_activation (str) - hidden state activation
         output_activation (str) -  output activation
         
    properties:
        prev_hidden_state (ndarray) - previous hidden state tensor
        hidden_state (ndarray) - hidden state tensor
        output (ndarray) - output tensor
        named_parameters (dict) - timestep parameters
    
    """
    def __init__(self, vocab_size, 
                       hidden_state_dims, 
                       output_dims,
                       initalizer="random_initializer",
                       hidden_state_activation="tanh",
                       output_activation="softmax"
                ):
        self._vocab_size = vocab_size
        self._hidden_state_dims = hidden_state_dims
        self._output_dims = output_dims
        self._initalizer = initalizer
        self._named_parameters = self.initialize_parameters()
        self._hidden_state_activation = hidden_state_activation
        self._output_activation = output_activation
    
    
    def __call__(self, x, a_prev):
        return self.forward(x, a_prev)
        
    
    @property
    def cell_state_dims(self):
        raise NotImplementedError("Vanilla RNNs have no cell state")
        
    @property
    def cell_state(self):
        raise NotImplementedError("Vanilla RNNs have no cell state")
        
    def forward(self, x, a_prev):
        self._a_prev = a_prev
        vocab_size, num_examples = x.shape
        weights = np.hstack([self.named_parameters["Waa"], self.named_parameters["Wax"]])
        inputs = np.vstack([a_prev, x])
        Wa = self.named_parameters["Wya"]
        ba = self.named_parameters["ba"]
        by = self.named_parameters["by"]
        self._a_next = self.hidden_state_activation(weights.dot(inputs) + ba)
        self._y_output = self.output_activation(Wa.dot(self._a_next) + by)
        return self._a_next, self._y_output 
    
    def backward(self):
        pass
    
    def random_initializer(self):
        return {
            "Wax": np.random.rand(self._hidden_state_dims, self._vocab_size),
            "Waa": np.random.rand(self._hidden_state_dims, self._hidden_state_dims),
            "Wya": np.random.rand(self._output_dims, self._hidden_state_dims),
            "ba" : np.random.rand(self._hidden_state_dims, 1),
            "by" : np.random.rand(self._output_dims, 1)
        }
    

In [54]:
class GRU(RNN):
    pass
    

In [55]:
class LSTM(RNN):
    pass

In [42]:
class SequenceModel:
    """
    Sequence Model
    
    Args:
         num_time_steps - number of time steps
         rnn_model_class - class of RNN model
    """
    def __init__(self, num_time_steps, rnn_model_class, kwargs):
        self.num_time_steps = num_time_steps
        for i in range(num_time_steps):
            setattr(self, f"timestep_{i}", rnn_model_class(**kwargs))
            
    def __call__(self, X, a0):
        a_next = a0
        for i in range(self.num_time_steps):
            timestep = getattr(self, f"timestep_{i}")
            a_next, y_output = timestep(X[..., i], a_next)
        return a_next, y_output

In [43]:
np.random.seed(1)
num_timesteps = 3
vocab_size = 3
hidden_state_dims = 5
output_dims = 2
num_examples = 4

X = np.random.rand(vocab_size, num_examples, num_timesteps)
a0 = np.zeros((hidden_state_dims, num_examples))
model_kwargs = {"vocab_size":vocab_size, "hidden_state_dims":hidden_state_dims, "output_dims":output_dims}
model = SequenceModel(3, VanillaRNN, model_kwargs)
model(X, a0)

(array([[0.99195588, 0.99866091, 0.99926061, 0.99839471],
        [0.99784975, 0.99941239, 0.99957269, 0.99913954],
        [0.9960653 , 0.99951336, 0.99970848, 0.99909163],
        [0.98854368, 0.9956519 , 0.99779915, 0.99780434],
        [0.99812025, 0.99933286, 0.99969128, 0.99974024]]),
 array([[0.4704125 , 0.47000867, 0.47001463, 0.47007601],
        [0.5295875 , 0.52999133, 0.52998537, 0.52992399]]))

In [51]:
model.timestep_2.named_parameters

{'Wax': array([[0.07197428, 0.96727633, 0.56810046],
        [0.20329323, 0.25232574, 0.74382585],
        [0.19542948, 0.58135893, 0.97001999],
        [0.8468288 , 0.23984776, 0.49376971],
        [0.61995572, 0.8289809 , 0.15679139]]),
 'Waa': array([[0.0185762 , 0.07002214, 0.48634511, 0.60632946, 0.56885144],
        [0.31736241, 0.98861615, 0.57974522, 0.38014117, 0.55094822],
        [0.74533443, 0.66923289, 0.26491956, 0.06633483, 0.3700842 ],
        [0.62971751, 0.21017401, 0.75275555, 0.06653648, 0.2603151 ],
        [0.80475456, 0.19343428, 0.63946088, 0.52467031, 0.92480797]]),
 'Wya': array([[0.26329677, 0.06596109, 0.73506596, 0.77217803, 0.90781585],
        [0.93197207, 0.01395157, 0.23436209, 0.61677836, 0.94901632]]),
 'ba': array([[0.95017612],
        [0.55665319],
        [0.91560635],
        [0.64156621],
        [0.39000771]]),
 'by': array([[0.48599067],
        [0.60431048]])}

In [52]:
model.timestep_2.output

array([[0.4704125 , 0.47000867, 0.47001463, 0.47007601],
       [0.5295875 , 0.52999133, 0.52998537, 0.52992399]])

In [53]:
model.timestep_2.hidden_state

array([[0.99195588, 0.99866091, 0.99926061, 0.99839471],
       [0.99784975, 0.99941239, 0.99957269, 0.99913954],
       [0.9960653 , 0.99951336, 0.99970848, 0.99909163],
       [0.98854368, 0.9956519 , 0.99779915, 0.99780434],
       [0.99812025, 0.99933286, 0.99969128, 0.99974024]])