# RNN

In [1]:
import torch
import torch.nn as nn
from torchinfo import summary
from torch.utils.data import Dataset, DataLoader, TensorDataset

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [13]:
L = 3
N = 1
H_in = 1
H_out = 2
N_layers = 1

class RNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.rnn = nn.RNN(H_in, H_out, N_layers, batch_first=True)

    def forward(self, x):
        out, h_i = self.rnn(x)
        return out, h_i

In [14]:
model = RNN()

In [15]:
model

RNN(
  (rnn): RNN(1, 2, batch_first=True)
)

In [16]:
tensor = torch.rand((N, L, H_in))

In [17]:
summary(model, tensor[0].shape, verbose=2)

Layer (type:depth-idx)                   Output Shape              Param #
RNN                                      [3, 2]                    --
├─RNN: 1-1                               [3, 2]                    10
│    └─weight_ih_l0                                                ├─2
│    └─weight_hh_l0                                                ├─4
│    └─bias_ih_l0                                                  ├─2
│    └─bias_hh_l0                                                  └─2
Total params: 10
Trainable params: 10
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00


Layer (type:depth-idx)                   Output Shape              Param #
RNN                                      [3, 2]                    --
├─RNN: 1-1                               [3, 2]                    10
│    └─weight_ih_l0                                                ├─2
│    └─weight_hh_l0                                                ├─4
│    └─bias_ih_l0                                                  ├─2
│    └─bias_hh_l0                                                  └─2
Total params: 10
Trainable params: 10
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00

In [13]:
for name, param in model.named_parameters():
  print(f"Name: {name}, Shape: {param.shape}, \n{param}")

Name: rnn.weight_ih_l0, Shape: torch.Size([1, 1]), 
Parameter containing:
tensor([[0.4722]], device='cuda:0', requires_grad=True)
Name: rnn.weight_hh_l0, Shape: torch.Size([1, 1]), 
Parameter containing:
tensor([[-0.7875]], device='cuda:0', requires_grad=True)
Name: rnn.bias_ih_l0, Shape: torch.Size([1]), 
Parameter containing:
tensor([0.0023], device='cuda:0', requires_grad=True)
Name: rnn.bias_hh_l0, Shape: torch.Size([1]), 
Parameter containing:
tensor([0.2934], device='cuda:0', requires_grad=True)
Name: rnn.weight_ih_l1, Shape: torch.Size([1, 1]), 
Parameter containing:
tensor([[0.7062]], device='cuda:0', requires_grad=True)
Name: rnn.weight_hh_l1, Shape: torch.Size([1, 1]), 
Parameter containing:
tensor([[-0.6334]], device='cuda:0', requires_grad=True)
Name: rnn.bias_ih_l1, Shape: torch.Size([1]), 
Parameter containing:
tensor([-0.9629], device='cuda:0', requires_grad=True)
Name: rnn.bias_hh_l1, Shape: torch.Size([1]), 
Parameter containing:
tensor([-0.8364], device='cuda:0', requ

In [None]:
# │    └─weight_ih_l0                                                ├─1
# │    └─weight_hh_l0                                                ├─1
# │    └─bias_ih_l0                                                  ├─1
# │    └─bias_hh_l0                                                  ├─1
│    └─weight_ih_l1                                                ├─1
│    └─weight_hh_l1                                                ├─1
│    └─bias_ih_l1                                                  ├─1
│    └─bias_hh_l1                                                  └─1

In [107]:
from torch.utils.tensorboard import SummaryWriter

In [108]:
writer = SummaryWriter()

In [109]:
writer.add_graph(model, tensor[0].to(device))

In [110]:
model(tensor[0].to(device))

(tensor([[0.2998],
         [0.0704],
         [0.3293]], device='cuda:0', grad_fn=<SqueezeBackward1>),
 tensor([[0.0649],
         [0.3293]], device='cuda:0', grad_fn=<SqueezeBackward1>))

In [124]:
import numpy as np

class SimpleRNN:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        # Inicialização dos pesos e biases
        self.Wxh = np.random.randn(hidden_size, input_size)
        self.Whh = np.random.randn(hidden_size, hidden_size)
        self.Why = np.random.randn(output_size, hidden_size)
        self.bh = np.zeros((hidden_size, 1))
        self.by = np.zeros((output_size, 1))
        
    def forward(self, inputs):
        # Inicialização do estado oculto
        h_prev = np.zeros((self.hidden_size, 1))
        
        # Lista para armazenar os estados ocultos ao longo do tempo
        self.hs = []
        
        # Loop através da sequência de entrada
        for x in inputs:
            # Atualização do estado oculto
            h_next = np.tanh(np.dot(self.Wxh, x) + np.dot(self.Whh, h_prev) + self.bh)
            # Atualização do estado oculto anterior
            h_prev = h_next
            # Armazenar o estado oculto atual na lista
            self.hs.append(h_next)
        
        # Cálculo da saída
        output = np.dot(self.Why, h_next) + self.by
        
        return output
    
# Parâmetros
input_size = 1
hidden_size = 3
output_size = 1

# Criar instância do modelo SimpleRNN
rnn = SimpleRNN(input_size, hidden_size, output_size)

# Exemplo de entrada
input_data = [np.array([[1.0]])]  # Entrada de forma (tamanho_da_entrada, batch_size)

# Passagem direta (forward pass)
output = rnn.forward(input_data)

print("Saída da RNN:", output)


Saída da RNN: [[-0.18685616]]


In [132]:
input_size = 1
hidden_size = 2
output_size = 1

In [133]:
# Inicialização dos pesos e biases
Wxh = np.random.randn(hidden_size, input_size)
Whh = np.random.randn(hidden_size, hidden_size)
Why = np.random.randn(output_size, hidden_size)
bh = np.zeros((hidden_size, 1))
by = np.zeros((output_size, 1))

In [134]:
Wxh

array([[ 0.09066306],
       [-0.25522501]])

In [135]:
Whh

array([[ 0.66600222, -0.20784111],
       [-0.12338027, -1.59624334]])

In [136]:
Why

array([[ 0.35042311, -0.89052236]])

In [137]:
bh

array([[0.],
       [0.]])

In [138]:
by

array([[0.]])

In [162]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class BasicRNN(nn.Module):
    def __init__(self, n_inputs, n_neurons):
        super(BasicRNN, self).__init__()

        self.Wx = torch.randn(n_inputs, n_neurons) # n_inputs X n_neurons
        self.Wh = torch.randn(n_neurons, n_neurons) # n_neurons X n_neurons

        self.b = torch.zeros(1, n_neurons) # 1 X n_neurons

    def forward(self, x, hidden):
        return torch.tanh(torch.mm(x, self.Wx) + torch.mm(hidden, self.Wh) + self.b)


class CleanBasicRNN(nn.Module):

    def __init__(self, batch_size, n_inputs, n_neurons):
        super(CleanBasicRNN, self).__init__()

        self.rnn = BasicRNN(n_inputs, n_neurons)
        self.hx = torch.randn(batch_size, n_neurons) # initialize hidden state

    def forward(self, X):
        output = []

        # for each time step
        for i in range(2):
            self.hx = self.rnn(X[i], self.hx)
            output.append(self.hx)

        return output, self.hx


In [164]:
class CharRNN(torch.nn.Module):

    def __init__(self,input_size,hidden_size,output_size, n_layers = 1):

        super(CharRNN, self).__init__()
        self.input_size  = input_size
        self.hidden_size = hidden_size
        self.n_layers    = 1

        self.x2h_i = torch.nn.Linear(input_size + hidden_size, hidden_size)
        self.x2h_f = torch.nn.Linear(input_size + hidden_size, hidden_size)
        self.x2h_o = torch.nn.Linear(input_size + hidden_size, hidden_size)
        self.x2h_q = torch.nn.Linear(input_size + hidden_size, hidden_size)
        self.h2o   = torch.nn.Linear(hidden_size, output_size)
        self.sigmoid = torch.nn.Sigmoid()
        self.softmax = torch.nn.Softmax()
        self.tanh    = torch.nn.Tanh()

    def forward(self, input, h_t, c_t):

        combined_input = torch.cat((input,h_t),1)

        i_t = self.sigmoid(self.x2h_i(combined_input))
        f_t = self.sigmoid(self.x2h_f(combined_input))
        o_t = self.sigmoid(self.x2h_o(combined_input))
        q_t = self.tanh(self.x2h_q(combined_input))

        c_t_next = f_t*c_t + i_t*q_t
        h_t_next = o_t*self.tanh(c_t_next)

        output = self.softmax(h_t_next)
        return output, h_t, c_t

    def initHidden(self):
        return torch.autograd.Variable(torch.zeros(1, self.hidden_size))

    def weights_init(self,model):

        classname = model.__class__.__name__
        if classname.find('Linear') != -1:
            model.weight.data.normal_(0.0, 0.02)
            model.bias.data.fill_(0)

In [166]:
model = CharRNN(1,1,1)

In [167]:
model

CharRNN(
  (x2h_i): Linear(in_features=2, out_features=1, bias=True)
  (x2h_f): Linear(in_features=2, out_features=1, bias=True)
  (x2h_o): Linear(in_features=2, out_features=1, bias=True)
  (x2h_q): Linear(in_features=2, out_features=1, bias=True)
  (h2o): Linear(in_features=1, out_features=1, bias=True)
  (sigmoid): Sigmoid()
  (softmax): Softmax(dim=None)
  (tanh): Tanh()
)

In [18]:
64 * 64

4096