<img src="https://github.com/hernancontigiani/ceia_memorias_especializacion/raw/master/Figures/logoFIUBA.jpg" width="500" align="center">


# Procesamiento de lenguaje natural
## RNN many-to-one

#### Datos
El objecto es utilizar una serie de sucuencias númericas (datos sintéticos) para poner a prueba el uso de las redes RNN. Este ejemplo se inspiró en otro artículo, lo tienen como referencia en el siguiente link:\
[LINK](https://stackabuse.com/solving-sequence-problems-with-lstm-in-keras/)

In [28]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [7]:
# Generar datos sintéticos
X = list()
y = list()

# X será una lista de 1 a 45 agrupado de a 3 números consecutivos
# [ [1, 2, 3], [4, 5, 6], ....]
X = [ [x, x+1, x+2] for x in range(1, 46, 3)]

# "y" (target) se obtiene como la suma de cada grupo de 3 números de entrada
y = [sum(x) for x in X]

X = np.array(X).reshape(len(X), 3, 1)   # (n_samples, seq_len, n_features)
y = np.array(y).reshape(-1, 1)

print("datos X:", X)
print("datos y:", y)

datos X: [[[ 1]
  [ 2]
  [ 3]]

 [[ 4]
  [ 5]
  [ 6]]

 [[ 7]
  [ 8]
  [ 9]]

 [[10]
  [11]
  [12]]

 [[13]
  [14]
  [15]]

 [[16]
  [17]
  [18]]

 [[19]
  [20]
  [21]]

 [[22]
  [23]
  [24]]

 [[25]
  [26]
  [27]]

 [[28]
  [29]
  [30]]

 [[31]
  [32]
  [33]]

 [[34]
  [35]
  [36]]

 [[37]
  [38]
  [39]]

 [[40]
  [41]
  [42]]

 [[43]
  [44]
  [45]]]
datos y: [[  6]
 [ 15]
 [ 24]
 [ 33]
 [ 42]
 [ 51]
 [ 60]
 [ 69]
 [ 78]
 [ 87]
 [ 96]
 [105]
 [114]
 [123]
 [132]]


In [4]:
# Train/valid split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

#Los pasamos a tensores para poder meterlos a la RRN
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32)

In [8]:
y = np.asanyarray(y)
y.shape

(15, 1)

### 2 - Entrenar el modelo

In [9]:
input_shape = X[0].shape
input_shape

(3, 1)

In [10]:
output_shape = 1
output_shape

1

In [25]:
class LSTMModel(nn.Module):
    def __init__(self, input_size=1, hidden_size=64, output_size=1, bidirectional=False):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True, bidirectional=bidirectional)
        self.fc = nn.Linear(hidden_size * (2 if bidirectional else 1), output_size)
        self.relu = nn.ReLU()

    def forward(self, x):
        out, _ = self.lstm(x)
        out = out[:, -1, :]   # última salida de la secuencia
        out = self.relu(out)
        out = self.fc(out)
        return out

In [26]:

def train_model_es(model, X_train, y_train, X_val, y_val,
                   epochs=500, batch_size=5, lr=0.005,
                   patience=60, min_delta=1e-4):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    history = {"train_loss": [], "val_loss": []}

    n_samples = X_train.shape[0]
    best_val_loss = float('inf')
    patience_counter = 0
    best_model_state = None

    for epoch in range(epochs):
        model.train()
        perm = torch.randperm(n_samples)
        batch_losses = []

        for i in range(0, n_samples, batch_size):
            idx = perm[i:i+batch_size]
            xb, yb = X_train[idx], y_train[idx]

            optimizer.zero_grad()
            outputs = model(xb)
            loss = criterion(outputs, yb)
            loss.backward()
            optimizer.step()
            batch_losses.append(loss.item())

        # Validación
        model.eval()
        with torch.no_grad():
            val_pred = model(X_val)
            val_loss = criterion(val_pred, y_val).item()

        train_loss_mean = np.mean(batch_losses)
        history["train_loss"].append(train_loss_mean)
        history["val_loss"].append(val_loss)

        # Early stopping
        if val_loss + min_delta < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            best_model_state = model.state_dict()
        else:
            patience_counter += 1

        if patience_counter >= patience:
            print(f"Early stopping en epoch {epoch+1}")
            break

        if (epoch+1) % 50 == 0:
            print(f"Epoch [{epoch+1}/{epochs}], Train Loss: {train_loss_mean:.4f}, Val Loss: {val_loss:.4f}")

    # Restaurar mejor modelo
    if best_model_state is not None:
        model.load_state_dict(best_model_state)

    return history

In [27]:

model1 = LSTMModel()
hist1 = train_model_es(model1, X_train, y_train, X_val, y_val)


RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x5 and 64x1)

In [None]:
#Curva de entrenamiento
plt.plot(hist1["train_loss"], label="train")
plt.plot(hist1["val_loss"], label="valid")
plt.legend()
plt.title("LSTM ")
plt.show()

In [None]:

# Ensayo
x_test = np.array([[50, 51, 52]]).reshape((1, 3, 1))
x_test = torch.tensor(x_test, dtype=torch.float32)
y_test = torch.tensor([[sum([50, 51, 52])]], dtype=torch.float32)

with torch.no_grad():
    y_hat1 = model1(x_test).item()

print("Modelo Simple y_test:", y_test.item(), "y_hat:", y_hat1)

### 3 - Bidirectional RNN (BRNN)

In [None]:
# En esta oportunidad se utilizará Bidirectional, dentro se especifica
# que lo que se desea hacer bidireccional es una capa LSTM

# En el summary se puede observar que la cantidad de parámetros
# de nuestor nueva capa LSTM bidireccional es el doble que la anterior

model2 = LSTMModel(bidir=True)


model2 = LSTMModel(bidirectional=True)
hist2 = train_model_es(model2, X_train, y_train, X_val, y_val)

with torch.no_grad():
    y_hat2 = model2(x_test).item()

print("y_test:", y_test.item())
print("y_hat (bidirectional):", y_hat2)

In [None]:
hist2 = train_model(model2, X_train, y_train, X_val, y_val)


In [None]:
plt.plot(hist2["train_loss"], label="train")
plt.plot(hist2["val_loss"], label="valid")
plt.legend()
plt.title("LSTM Bidireccional")
plt.show()

In [None]:
# Ensayo
x_test = np.array([[50, 51, 52]]).reshape((1, 3, 1))
x_test = torch.tensor(x_test, dtype=torch.float32)
y_test = torch.tensor([[sum([50, 51, 52])]], dtype=torch.float32)

with torch.no_grad():
    y_hat2 = model2(x_test).item()

print("Modelo Bidireccional y_test:", y_test.item(), "y_hat:", y_hat2)

### 4 - Conclusión
Implementar un modelo bidireccional basado en RNN (en este caso LSTM) es muy sensillo. En este ejemplo no se explotó su potencialidad pero queda como nota de como implementar una capa BRNN.