In [79]:
import numpy as np


class RNN:

    def __init__(self, in_features, hidden_size, n_classes, activation='tanh'):
        self.in_features = in_features
        self.hidden_size = hidden_size
        self.n_classes = n_classes
        self.activation = activation
        
        self.Waa = self.init_weight_matrix(size=(self.hidden_size, self.hidden_size))
        self.Wax = self.init_weight_matrix(size=(self.hidden_size, self.in_features))
        self.Way = self.init_weight_matrix(size=(self.n_classes, self.hidden_size))
        self.ba = self.init_weight_matrix(size=(self.hidden_size, 1))
        self.by = self.init_weight_matrix(size=(self.n_classes, 1))
        self.a0 = np.zeros(hidden_size)

    def init_weight_matrix(self, size):
        np.random.seed(1)
        W = np.random.uniform(size=size)
        return W
    
    def tanh(self, z):
        return (np.exp(z) - np.exp(-z))/(np.exp(z) + np.exp(-z))
    
    def softmax(self, z):
        return np.exp(z) / np.sum(np.exp(z), axis=1, keepdims=True)

    def forward(self, x):
        a_t = self.a0
        a = []
        for i in range(x.shape[1]):
            x_t = x[:, i].reshape(self.in_features, 1)
            z_t_next = np.dot(self.Waa, a_t).reshape(-1, 1) + np.dot(self.Wax, x_t).reshape(-1, 1) + self.ba
            a_t = self.tanh(z_t_next)
            a.append(a_t)
        a = np.array(a)[:, :, 0].T
        y_hat = []
        for i in range(x.shape[1]):
            z_t = np.dot(self.Way, a[:, i].reshape(-1, 1)) + self.by
            y_hat.append(self.softmax(z_t.T).T)
        y_hat = np.array(y_hat)
        return y_hat[:, :, 0].T
    

def read_matrix(n_rows, dtype=float):
    return np.array([list(map(dtype, input().split())) for _ in range(n_rows)])

def print_matrix(matrix):
    for row in matrix:
        print(' '.join(map(str, row)))

def solution():
    in_features, hidden_size, n_classes = 3, 2, 3
    input_vectors = np.array([[0.0, -1.0, 2.0, 3.0],
                              [-3.0, 0.0, 1.0, 4.0],
                             [--2, 1, 2 , 3]])

    rnn = RNN(in_features, hidden_size, n_classes)
    output = rnn.forward(input_vectors).round(3)
    print_matrix(output)

solution()

0.301 0.388 0.531 0.538
0.504 0.429 0.321 0.317
0.195 0.183 0.148 0.145


In [68]:
import torch
import torch.nn as nn
inputs = torch.Tensor(np.array([[0.0, -1.0, 2.0],
                              [-3.0, 0.0, 1.0]]))

print(inputs)
# Number of features used as input. (Number of columns)
INPUT_SIZE = 3
# Number of previous time stamps taken into account.
SEQ_LENGTH = 2
# Number of features in last hidden state ie. number of output time-
# steps to predict.See image below for more clarity.
HIDDEN_SIZE = 2
# Number of stacked rnn layers.
NUM_LAYERS = 1
BATCH_SIZE = 1
inputs = inputs.view(BATCH_SIZE, SEQ_LENGTH, INPUT_SIZE)
rnn = nn.RNN(input_size=INPUT_SIZE, hidden_size=HIDDEN_SIZE, num_layers =NUM_LAYERS, batch_first=True)
rnn.forward(inputs)

tensor([[ 0., -1.,  2.],
        [-3.,  0.,  1.]])


(tensor([[[ 0.3187, -0.3320],
          [-0.1620, -0.0676]]], grad_fn=<TransposeBackward1>),
 tensor([[[-0.1620, -0.0676]]], grad_fn=<StackBackward0>))