In [1]:
import torch
from torch import nn


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.4 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/usr/local/Cellar/python@3.10/3.10.16/Frameworks/Python.framework/Versions/3.10/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/local/Cellar/python@3.10/3.10.16/Frameworks/Python.framework/Versions/3.10/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/Users/Eric/PycharmProjects/RecurrentNetworks/.venv/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File

In [23]:
class RecurrentLayer(nn.Module):

    def __init__(self, embedded_dim: int, hidden_dim: int):
        super().__init__()
        self.hidden_dim = hidden_dim

        self.hidden_w: torch.Tensor = nn.Parameter(torch.ones(hidden_dim, hidden_dim))
        self.input_w: torch.Tensor = nn.Parameter(torch.ones(embedded_dim, hidden_dim))
        self.b: torch.Tensor = nn.Parameter(torch.ones(hidden_dim))
        self.tanh = nn.Tanh()

    def forward(self, x_seq: torch.Tensor) -> torch.Tensor:
        batch_size, input_dim = x.shape[:-1]
        hidden_state = torch.zeros(batch_size, self.hidden_dim).to(x_seq.device.type)

        hidden_states = torch.Tensor()
        for step in range(input_dim):
            z = torch.matmul(hidden_state, self.hidden_w) + torch.matmul(x_seq[..., step, :], self.input_w) + self.b
            hidden_state = self.tanh(z)
            hidden_states = torch.cat((hidden_states, hidden_state.unsqueeze(1)), dim= -2)
        return hidden_states

x = torch.randn((4, 2, 3))
rnn = RecurrentLayer(3, 6)
logit = rnn(x)
logit.shape

torch.Size([4, 1, 12])

In [32]:
class Gate(nn.Module):

    def __init__(self, embedded_dim: int, hidden_dim: int, activation_function: nn.Module):
        super().__init__()
        self.hidden_w = nn.Parameter(torch.ones((hidden_dim, hidden_dim)))
        self.input_w = nn.Parameter(torch.ones((embedded_dim, hidden_dim)))
        self.b = nn.Parameter(torch.ones(hidden_dim))
        self.activation_function = activation_function

    def forward(self, x: torch.Tensor, h: torch.Tensor) -> torch.Tensor:
        z = (torch.matmul(h, self.hidden_w) + torch.matmul(x, self.input_w)) + self.b
        return self.activation_function(z)

class LSTMLayer(nn.Module):

    def __init__(self, embedded_dim, hidden_dim):
        super().__init__()
        self.hidden_dim = hidden_dim

        self.forget_gate = Gate(embedded_dim, hidden_dim, nn.Sigmoid())
        self.input_gate = Gate(embedded_dim, hidden_dim, nn.Sigmoid())
        self.candidate_layer = Gate(embedded_dim, hidden_dim, nn.Tanh())
        self.output_gate = Gate(embedded_dim, hidden_dim, nn.Sigmoid())
        self.tanh = nn.Tanh()

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        batch_size, input_dim = x.shape[:-1]
        device = x.device.type
        c_t = torch.zeros((batch_size, self.hidden_dim)).to(device)
        h_t = torch.zeros((batch_size, self.hidden_dim)).to(device)

        hidden_states: torch.Tensor = torch.Tensor()
        for step in range(input_dim):
            x_t = x[:, step, :]
            f_t = self.forget_gate(x_t, h_t)
            i_t = self.input_gate(x_t, h_t)
            candidate_c = self.candidate_layer(x_t, h_t)

            c_t = (f_t * c_t) + (i_t * candidate_c)
            o_t = self.output_gate(x_t, h_t)
            h_t = o_t * self.tanh(c_t)
            hidden_states = torch.cat((hidden_states, h_t.unsqueeze(1)), dim= -2)

        return hidden_states


x = torch.randn((4, 2, 3))
lstm = LSTMLayer(3, 6)
logit = lstm(x)
logit.shape

torch.Size([4, 2, 6])

In [34]:
class GRULayer(nn.Module):

    def __init__(self, embedded_dim: int, hidden_dim: int, reset_first: bool = False):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.reset_first = reset_first

        self.reset_gate = Gate(embedded_dim, hidden_dim, nn.Sigmoid())
        self.update_gate = Gate(embedded_dim, hidden_dim, nn.Sigmoid())

        self.hidden_w = nn.Parameter(torch.ones((hidden_dim, hidden_dim)))
        self.input_w = nn.Parameter(torch.ones((embedded_dim, hidden_dim)))
        self.bias = nn.Parameter(torch.ones(hidden_dim))
        self.tanh = nn.Tanh()


    def forward(self, x: torch.Tensor) -> torch.Tensor:
        batch_size, input_dim = x.shape[:-1]
        h_t = torch.zeros((batch_size, self.hidden_dim)).to(x.device.type)

        hidden_states: torch.Tensor = torch.Tensor()
        for step in range(input_dim):
            x_t = x[:, step, :]
            r_t = self.reset_gate(x_t, h_t)
            z_t = self.update_gate(x_t, h_t)

            candidate_h = self.tanh((r_t * torch.matmul(h_t, self.hidden_w)) + torch.matmul(x_t, self.input_w) + self.bias)

            h_t = (z_t * h_t) + ((1 - z_t) * candidate_h)
            hidden_states = torch.cat((hidden_states, h_t.unsqueeze(1)), dim= -2)

        return hidden_states

x = torch.randn((4, 2, 3))
gru = GRULayer(3, 6)
logit = gru(x)
logit.shape




torch.Size([4, 2, 6])