# Econotrend — LSTM Forecast (PyTorch)

Notebook base para cargar datos, preparar secuencias y entrenar un modelo LSTM para pronóstico del índice VIX simulado.
_Archivo de datos esperado_: `econotrend_vix_sim.csv` (columnas: `date`, `vix`).

In [None]:
# --- Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler

import warnings
warnings.filterwarnings("ignore")
print("PyTorch version:", torch.__version__)


In [None]:
# --- Configuración y utilidades
DATA_PATH = "econotrend_vix_sim.csv"
LOOKBACK = 10
BATCH_SIZE = 64
EPOCHS = 40
LR = 1e-3
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DEVICE


In [None]:
# --- Cargar datos y EDA básico
df = pd.read_csv(DATA_PATH, parse_dates=["date"])
df = df.sort_values("date").reset_index(drop=True)

print(df.head())
print("\nResumen:")
print(df.describe())

plt.figure(figsize=(10,4))
plt.plot(df["date"], df["vix"])
plt.title("Serie VIX simulada")
plt.xlabel("Fecha")
plt.ylabel("VIX")
plt.tight_layout()
plt.show()

# Normalización [0,1]
scaler = MinMaxScaler(feature_range=(0,1))
vix_scaled = scaler.fit_transform(df[["vix"]]).astype(np.float32)


In [None]:
# --- Construcción de ventanas (lookback=10) -> predicción del siguiente día
def build_sequences(series, lookback=10):
    X, y = [], []
    for i in range(len(series) - lookback):
        X.append(series[i:i+lookback])
        y.append(series[i+lookback])
    X = np.array(X)  # (N, lookback, 1)
    y = np.array(y)  # (N, 1)
    return X, y

X, y = build_sequences(vix_scaled, LOOKBACK)
print("Shapes:", X.shape, y.shape)

# División 80/20 respetando orden temporal
split_idx = int(len(X) * 0.8)
X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]

class SeqDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X).float()
        self.y = torch.from_numpy(y).float()
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_loader = DataLoader(SeqDataset(X_train, y_train), batch_size=BATCH_SIZE, shuffle=False)
test_loader  = DataLoader(SeqDataset(X_test,  y_test ), batch_size=BATCH_SIZE, shuffle=False)


In [None]:
# --- Definición del modelo LSTM
class VIXLSTM(nn.Module):
    def __init__(self, input_size=1, hidden_size=64, num_layers=2, dropout=0.1):
        super().__init__()
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers,
                            batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, 1)
    def forward(self, x):
        out, _ = self.lstm(x)
        out = out[:, -1, :]  # último paso temporal
        out = self.fc(out)
        return out

model = VIXLSTM().to(DEVICE)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR)


In [None]:
# --- Entrenamiento
train_losses = []

for epoch in range(1, EPOCHS+1):
    model.train()
    epoch_loss = 0.0
    for xb, yb in train_loader:
        xb = xb.to(DEVICE)
        yb = yb.to(DEVICE)
        optimizer.zero_grad()
        pred = model(xb)
        loss = criterion(pred, yb)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item() * xb.size(0)
    epoch_loss /= len(train_loader.dataset)
    train_losses.append(epoch_loss)
    if epoch % 5 == 0 or epoch == 1:
        print(f"Epoch {epoch:03d} | Train MSE: {epoch_loss:.6f}")

# Curva de pérdidas
plt.figure(figsize=(7,4))
plt.plot(train_losses)
plt.title("Curva de entrenamiento (MSE)")
plt.xlabel("Época")
plt.ylabel("MSE")
plt.tight_layout()
plt.show()


In [None]:
# --- Evaluación en test + baseline de persistencia
model.eval()
with torch.no_grad():
    X_test_t = torch.from_numpy(X_test).float().to(DEVICE)
    preds_scaled = model(X_test_t).cpu().numpy()

# Desescalar
y_test_inv = scaler.inverse_transform(y_test)
preds_inv = scaler.inverse_transform(preds_scaled)

mae = mean_absolute_error(y_test_inv, preds_inv)
rmse = mean_squared_error(y_test_inv, preds_inv, squared=False)
r2 = r2_score(y_test_inv, preds_inv)

# Baseline de persistencia: y_hat = último valor del lookback
persist_scaled = X_test[:, -1, :]  # (N, 1)
persist_inv = scaler.inverse_transform(persist_scaled)

mae_p = mean_absolute_error(y_test_inv, persist_inv)
rmse_p = mean_squared_error(y_test_inv, persist_inv, squared=False)
r2_p = r2_score(y_test_inv, persist_inv)

print(f"MAE (LSTM): {mae:.4f} | RMSE (LSTM): {rmse:.4f} | R2 (LSTM): {r2:.4f}")
print(f"MAE (Persistencia): {mae_p:.4f} | RMSE (Persistencia): {rmse_p:.4f} | R2 (Persistencia): {r2_p:.4f}")

# Gráfico comparación real vs predicho en el set de test
plt.figure(figsize=(10,4))
plt.plot(df["date"].iloc[LOOKBACK+split_idx+1 : LOOKBACK+split_idx+1+len(y_test_inv)], y_test_inv, label="Real")
plt.plot(df["date"].iloc[LOOKBACK+split_idx+1 : LOOKBACK+split_idx+1+len(preds_inv)], preds_inv, label="Predicho (LSTM)")
plt.title("Comparación real vs predicho (Test)")
plt.xlabel("Fecha")
plt.ylabel("VIX")
plt.legend()
plt.tight_layout()
plt.show()


## Reflexión breve
- La serie simulada presenta **tendencia suave** y **ruido estacional** (semanal y mensual), además de choques aleatorios.
- El LSTM puede capturar parte de las dependencias temporales; sin embargo, en datos financieros reales con alto ruido, el poder predictivo puede acercarse al de un **random walk**. Se recomienda comparar siempre con un baseline fuerte (persistencia) y analizar si las mejoras son **estables** y **estadísticamente significativas**.


## Extras (optativo)
- Probar una **GRU** o **LSTM bidireccional** (cambiar la clase del modelo).
- Añadir **regresores externos** (p. ej., rendimiento del S&P 500 simulado).
- Guardar y recargar pesos con `torch.save(model.state_dict(), "vix_lstm.pt")` y `model.load_state_dict(torch.load("vix_lstm.pt"))`.
- Implementar un **pronóstico multi-step** (roll-forward durante 5 días usando la ventana más reciente + predicciones previas).
