# Recurrent Neural Networks 

<img src="../images/image8.png" height="400"/>

- Input at time step $t$: $x_t$
- Ouput at time step $t$: $y_t$
- Hidden state at time step $t$: $h_t$

$$h_t=\tanh(W_{hh}^T h_{t-1} + W_{xh}^T x_t)$$




$$y_t=W_{hy}^T h_t$$

In [1]:
import torch
import torch.nn as nn

In [2]:
seq_len = 3
batch_size = 5
input_size = 10
hidden_size = 10
num_layers = 1

x = torch.randn(seq_len, batch_size, input_size)
h0 = torch.randn(num_layers, batch_size, hidden_size)
c0 = torch.randn(num_layers, batch_size, hidden_size)

lstm = nn.LSTM(input_size, hidden_size, num_layers)

output, (hn, cn) = lstm(x, (h0, c0))

print(output.shape)

torch.Size([3, 5, 10])


In [2]:
W_hx = torch.tensor(0.5, requires_grad=True)
W_hh = torch.tensor(0.8, requires_grad=True)  
W_yh = torch.tensor(1.0, requires_grad=True)

x_t = torch.tensor(1.0)  # Entrada en el tiempo t
h_t_minus_1 = torch.tensor(0.0)  # Estado oculto inicial
y_t = torch.tensor(0.5)  # Valor verdadero (target)


# Forward pass
h_t = torch.tanh(W_hx * x_t + W_hh * h_t_minus_1)
y_pred = W_yh * h_t
loss = 0.5 * (y_pred - y_t) ** 2

# Backward pass
loss.backward()

# Gradientes calculados
print("Gradiente respecto a W_hx:", W_hx.grad.item())
print("Gradiente respecto a W_hh:", W_hh.grad.item())
print("Gradiente respecto a W_yh:", W_yh.grad.item())


Gradiente respecto a W_hx: -0.029792867600917816
Gradiente respecto a W_hh: -0.0
Gradiente respecto a W_yh: -0.01750630885362625


<img src="../images/image9.png" height="400"/>

# Long short-term memory

- Resuelve Vanishing and Exploding Gradients Problems.
- Flujo ininterrumpido de la gradiente.
- $C_t$: estado celular (almacena información de largo plazo).


- $f_t = σ(w_f [h_{t-1}, x_t] + b_f )$, $i_t = ...$, $o_t = ...$
- $\hat{C_{t}} = \tanh(w_c[h_{t-1},x_t]+b_c)$

$$C_t = f_t ⊙ C_{t-1} + i_t ⊙ \hat{C_{t}}$$

$$h_t = o_t ⊙ \tanh(C_t)$$

- Forget gate ($f_t$): Decide cuánto del estado anterior se retiene.
- Input gate ($i_t$): Controla cuánto de la nueva información se agrega.

In [4]:

# seq_len: cantidad de pasos de tiempo (t)
# x: [seq_len, batch_size, input_size]
seq_len = 3
input_size = 10
batch_size = 5

x = torch.randn(seq_len, batch_size, input_size)

hidden_size = 20
num_layers = 1

lstm = nn.LSTM(input_size, hidden_size, num_layers)

h0 = torch.zeros(num_layers, batch_size, hidden_size) # initial hidden state
c0 = torch.zeros(num_layers, batch_size, hidden_size) # initial cell state

output, (hn, cn) = lstm(x, (h0, c0))

# torch.Size([3, 5, 20])
print(h0.shape)
print(hn.shape)
print(cn.shape)


torch.Size([1, 5, 20])
torch.Size([1, 5, 20])
torch.Size([1, 5, 20])


In [5]:
class SentimentLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(SentimentLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        _, (hn, _) = self.lstm(x)
        out = self.fc(hn[-1]) # last hidden layer
        return out
    


 
seq_len = 3
batch_size = 5
input_size = 10
hidden_size = 20
num_layers = 1
num_classes = 2

model = SentimentLSTM(input_size, hidden_size, num_layers, num_classes)

x = torch.randn(batch_size, seq_len, input_size)

y = model(x) # forward

print(y)
print(y.shape)



tensor([[-0.2440, -0.2586],
        [-0.2549, -0.2535],
        [-0.2108, -0.3007],
        [-0.1166, -0.0822],
        [-0.2338, -0.2223]], grad_fn=<AddmmBackward0>)
torch.Size([5, 2])


In [6]:
import torch
import torch.nn as nn

class LSTMCell(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(LSTMCell, self).__init__()
        self.hidden_size = hidden_size
        self.wf = nn.Linear(input_size + hidden_size, hidden_size, bias=True)  # Forget gate
        self.wi = nn.Linear(input_size + hidden_size, hidden_size, bias=True)  # Input gate
        self.wo = nn.Linear(input_size + hidden_size, hidden_size, bias=True)  # Output gate
        self.wc = nn.Linear(input_size + hidden_size, hidden_size, bias=True)  # Candidate cell state

    def forward(self, xt, ht, ct):  # Input, previous hidden state, previous cell state
        zt = torch.cat([xt, ht], dim=1)  # Concatenating input and hidden state

        ft = torch.sigmoid(self.wf(zt))  # Forget gate
        it = torch.sigmoid(self.wi(zt))  # Input gate
        ot = torch.sigmoid(self.wo(zt))  # Output gate
        cct = torch.tanh(self.wc(zt))    # Candidate cell state

        ct = ft * ct + it * cct  # Update cell state
        ht = ot * torch.tanh(ct)  # Update hidden state

        return ht, ct

In [None]:
# 𝑧𝑡 = 𝜎(𝜔𝑧 ∙ [ℎ𝑡−1, 𝑥𝑡] + 𝑏𝑧)
# 𝑟𝑡 = 𝜎(𝜔𝑟 ∙ [ℎ𝑡−1, 𝑥𝑡] + 𝑏𝑟)
# ℎℎ𝑡 = tanh(𝜔ℎ ∙ [𝑟𝑡 ⨀ ℎ𝑡−1, 𝑥𝑡] + 𝑏ℎ)
# ℎ𝑡 = (1 − 𝑧𝑡) ⨀ ℎ𝑡−1 + 𝑧𝑡 ⨀ ℎℎ𝑡
    
class GRUCell(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(GRUCell, self).__init__()
        self.hidden_size = hidden_size
        self.wz = nn.Linear(input_size + hidden_size, hidden_size, bias=True)  # Update gate
        self.wr = nn.Linear(input_size + hidden_size, hidden_size, bias=True)  # Reset gate
        self.wh = nn.Linear(input_size + hidden_size, hidden_size, bias=True)  # Candidate hidden state

    def forward(self, xt, ht):  # Input, previous hidden state
        xt_ht = torch.cat([xt, ht], dim=1)

        zt = torch.sigmoid(self.wz(xt_ht))  # Update gate
        rt = torch.sigmoid(self.wr(xt_ht))  # Reset gate

        h_tilde = torch.tanh(self.wh(torch.cat([xt, rt * ht], dim=1)))  # Candidate hidden state

        ht = (1 - zt) * ht + zt * h_tilde  # Final hidden state

        return ht

In [None]:
class LSTMcell(nn.Module):
    def __init__(self, in_chan, n_hidden):
        super(LSTMcell, self).__init__()
        self.in_chan = in_chan
        self.n_hidden = n_hidden

        self.W = nn.Linear(in_chan + n_hidden, 4*n_hidden, bias=True)
        self.Sigmoid = nn.Sigmoid()
        self.Tanh = nn.Tanh()

    def forward(self, xt, ht, ct):

        zt = torch.cat((xt, ht), dim=1)

        fox = self.W(zt)

        it, ft, ot, cct = torch.chunk(fox, 4, dim=1)
        

        ct = ft*ct + it*cct
        ht = ot*self.Tanh(ct)