# Recurrent Neural Networks 

<img src="images/image8.png" height="400"/>

- Input at time step $t$: $x_t$
- Ouput at time step $t$: $y_t$
- Hidden state at time step $t$: $h_t$

$$h_t=\tanh(W_{hh}^T h_{t-1} + W_{xh}^T x_t)$$




$$y_t=W_{hy}^T h_t$$

In [2]:
import torch
import torch.nn as nn

W_hx = torch.tensor(0.5, requires_grad=True)
W_hh = torch.tensor(0.8, requires_grad=True)  
W_yh = torch.tensor(1.0, requires_grad=True)

x_t = torch.tensor(1.0)  # Entrada en el tiempo t
h_t_minus_1 = torch.tensor(0.0)  # Estado oculto inicial
y_t = torch.tensor(0.5)  # Valor verdadero (target)


# Forward pass
h_t = torch.tanh(W_hx * x_t + W_hh * h_t_minus_1)
y_pred = W_yh * h_t
loss = 0.5 * (y_pred - y_t) ** 2

# Backward pass
loss.backward()

# Gradientes calculados
print("Gradiente respecto a W_hx:", W_hx.grad.item())
print("Gradiente respecto a W_hh:", W_hh.grad.item())
print("Gradiente respecto a W_yh:", W_yh.grad.item())


Gradiente respecto a W_hx: -0.029792867600917816
Gradiente respecto a W_hh: -0.0
Gradiente respecto a W_yh: -0.01750630885362625


<img src="images/image9.png" height="400"/>

# Long short-term memory

- Resuelve Vanishing and Exploding Gradients Problems.
- Flujo ininterrumpido de la gradiente.
- $C_t$: estado celular (almacena información de largo plazo).


- $f_t = σ(w_f [h_{t-1}, x_t] + b_f )$, $i_t = ...$, $o_t = ...$
- $\hat{C_{t}} = \tanh(w_c[h_{t-1},x_t]+b_c)$

$$C_t = f_t ⊙ C_{t-1} + i_t ⊙ \hat{C_{t}}$$

$$h_t = o_t ⊙ \tanh(C_t)$$

- Forget gate ($f_t$): Decide cuánto del estado anterior se retiene.
- Input gate ($i_t$): Controla cuánto de la nueva información se agrega.

In [4]:

lstm = nn.LSTM(input_size=10, hidden_size=20, num_layers=1)
input_seq = torch.randn(5, 3, 10)  # Secuencia de longitud 5, batch size 3, features 10

h0 = torch.zeros(1, 3, 20)  # Estado inicial oculto
c0 = torch.zeros(1, 3, 20)  # Estado inicial celular

# Salida y gradientes
output, (hn, cn) = lstm(input_seq, (h0, c0))
loss = output.sum()
loss.backward()

print("Gradientes del peso de entrada: ", lstm.weight_ih_l0.grad)


Gradientes del peso de entrada:  tensor([[ 7.3595e-02,  5.1353e-01,  4.8269e-01, -2.7651e-01,  2.7734e-03,
          2.4555e-02,  3.6766e-01,  1.0977e-02,  1.0595e-01,  9.1252e-02],
        [-1.5730e-01, -4.2321e-01, -6.6765e-01,  2.2311e-01, -1.0151e-01,
         -3.1406e-01, -2.3387e-01,  7.5129e-02,  1.5490e-02, -4.1527e-01],
        [ 5.4487e-01,  6.8584e-02,  5.0336e-01, -4.3454e-01,  1.8972e-01,
          3.2093e-01,  2.5845e-01, -1.8462e-01,  5.4906e-01, -1.1708e-01],
        [-1.4148e-01,  3.1261e-01,  2.6108e-01, -3.2385e-01, -6.0244e-01,
          1.5842e-01, -2.4347e-01, -5.7092e-01, -3.3145e-01,  3.1986e-01],
        [ 2.4790e-01, -5.7107e-01, -8.2786e-02,  4.3635e-01,  2.0141e-01,
          1.7559e-01, -6.4008e-01,  2.1406e-01,  4.8573e-01,  3.7510e-02],
        [ 4.3726e-01, -4.8447e-01,  1.0363e-01,  4.1317e-02,  3.2591e-01,
          2.5817e-01,  4.2933e-01, -1.6833e-02,  3.1638e-01, -7.1364e-01],
        [ 1.6832e-01, -3.3658e-01, -7.6101e-02,  1.4695e-01, -4.6314e-01,

In [None]:


class LSTMcell(nn.Module):
    def __init__(self, in_chan, n_hidden):
        super(LSTMcell, self).__init__()
        self.in_chan = in_chan
        self.n_hidden = n_hidden

        self.wf = nn.Linear(in_chan + n_hidden, n_hidden, bias=True) # forget gate
        self.wi = nn.Linear(in_chan + n_hidden, n_hidden) # input gate
        self.wo = nn.Linear(in_chan + n_hidden, n_hidden) # output gate
        self.wc = nn.Linear(in_chan + n_hidden, n_hidden) # candidate cell gate

        self.Sigmoid = nn.Sigmoid()
        self.Tanh = nn.Tanh()


    def forward(self, xt, ht, ct):

        zt = torch.cat((xt, ht), dim=1)

        ft = self.Sigmoid(self.wf(zt))
        it = self.Sigmoid(self.wi(zt))
        ot = self.Sigmoid(self.wo(zt))
        cct = self.Tanh(self.wc(zt)) # candidate

        ct = ft*ct + it*cct
        ht = ot*self.Tanh(ct)
    
class LSTMcell(nn.Module):
    def __init__(self, in_chan, n_hidden):
        super(LSTMcell, self).__init__()
        self.in_chan = in_chan
        self.n_hidden = n_hidden

        self.W = nn.Linear(in_chan + n_hidden, 4*n_hidden, bias=True)
        self.Sigmoid = nn.Sigmoid()
        self.Tanh = nn.Tanh()

    def forward(self, xt, ht, ct):

        zt = torch.cat((xt, ht), dim=1)

        fox = self.W(zt)

        it, ft, ot, cct = torch.chunk(fox, 4, dim=1)
        

        ct = ft*ct + it*cct
        ht = ot*self.Tanh(ct)
        
