In [7]:
import pandas as pd

# Ruta del dataset
file_path = "C:/archive/jena_climate_2009_2016.csv"

# Cargar dataset
df = pd.read_csv(file_path)

# Mostrar primeras filas
print("Primeras filas del dataset:")
print(df.head())

# Información general (columnas, tipos de datos, valores nulos, memoria)
print("\nInformación general:")
print(df.info())

# Estadísticas descriptivas
print("\nEstadísticas descriptivas:")
print(df.describe().T)

# Ver nombres de columnas
print("\nColumnas del dataset:")
print(df.columns)

# Revisar si hay valores nulos
print("\nValores nulos por columna:")
print(df.isnull().sum())


Primeras filas del dataset:
             Date Time  p (mbar)  T (degC)  Tpot (K)  Tdew (degC)  rh (%)  \
0  01.01.2009 00:10:00    996.52     -8.02    265.40        -8.90    93.3   
1  01.01.2009 00:20:00    996.57     -8.41    265.01        -9.28    93.4   
2  01.01.2009 00:30:00    996.53     -8.51    264.91        -9.31    93.9   
3  01.01.2009 00:40:00    996.51     -8.31    265.12        -9.07    94.2   
4  01.01.2009 00:50:00    996.51     -8.27    265.15        -9.04    94.1   

   VPmax (mbar)  VPact (mbar)  VPdef (mbar)  sh (g/kg)  H2OC (mmol/mol)  \
0          3.33          3.11          0.22       1.94             3.12   
1          3.23          3.02          0.21       1.89             3.03   
2          3.21          3.01          0.20       1.88             3.02   
3          3.26          3.07          0.19       1.92             3.08   
4          3.27          3.08          0.19       1.92             3.09   

   rho (g/m**3)  wv (m/s)  max. wv (m/s)  wd (deg)  
0    

In [9]:
import pandas as pd
import numpy as np

# Cargar dataset original
file_path = "C:/archive/jena_climate_2009_2016.csv"
df = pd.read_csv(file_path)

# Convertir Date Time a datetime y ponerlo como índice
df["Date Time"] = pd.to_datetime(df["Date Time"], format="%d.%m.%Y %H:%M:%S")
df = df.set_index("Date Time")

# Reemplazar valores erróneos (-9999.0) por NaN
df["wv (m/s)"] = df["wv (m/s)"].replace(-9999.0, np.nan)
df["max. wv (m/s)"] = df["max. wv (m/s)"].replace(-9999.0, np.nan)

# Interpolación para completar los valores faltantes
df["wv (m/s)"] = df["wv (m/s)"].interpolate()
df["max. wv (m/s)"] = df["max. wv (m/s)"].interpolate()

# Verificar que ya no hay valores erróneos
print("Valores faltantes después de limpiar:")
print(df.isna().sum())

# Guardar dataset limpio
output_path = "C:/archive/jena_climate_clean.csv"
df.to_csv(output_path)

print(f"\n Dataset limpio guardado en: {output_path}")


Valores faltantes después de limpiar:
p (mbar)           0
T (degC)           0
Tpot (K)           0
Tdew (degC)        0
rh (%)             0
VPmax (mbar)       0
VPact (mbar)       0
VPdef (mbar)       0
sh (g/kg)          0
H2OC (mmol/mol)    0
rho (g/m**3)       0
wv (m/s)           0
max. wv (m/s)      0
wd (deg)           0
dtype: int64

 Dataset limpio guardado en: C:/archive/jena_climate_clean.csv


In [11]:
import torch
import torch.nn as nn

class LSTMEncoder(nn.Module):
    """
    Encoder LSTM para secuencias univariadas o multivariadas.
    Input shape: (batch, seq_len, n_features)
    Salida: tupla (h_n, c_n) para inicializar el decoder.
    """
    def __init__(self, n_features: int = 1, hidden_size: int = 128, num_layers: int = 2, dropout: float = 0.2):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size=n_features,
                            hidden_size=hidden_size,
                            num_layers=num_layers,
                            batch_first=True,
                            dropout=dropout if num_layers > 1 else 0.0)

    def forward(self, x):
        # x: (batch, seq_len, features)
        outputs, (h_n, c_n) = self.lstm(x)
        # outputs: (batch, seq_len, hidden_size)
        return h_n, c_n


class LSTMDecoder(nn.Module):
    """
    Decoder LSTM que genera una secuencia paso-a-paso.
    Cada paso recibe como entrada la predicción previa (o el valor real si teacher forcing).
    """
    def __init__(self, out_features: int = 1, hidden_size: int = 128, num_layers: int = 2, dropout: float = 0.2):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size=out_features,
                            hidden_size=hidden_size,
                            num_layers=num_layers,
                            batch_first=True,
                            dropout=dropout if num_layers > 1 else 0.0)
        self.fc = nn.Linear(hidden_size, out_features)

    def forward(self, y_prev, hidden):
        """
        y_prev: (batch, 1, out_features)  -> input para el paso actual
        hidden: tuple (h_n, c_n) como proveniente del encoder o del paso anterior
        retorna: y_pred (batch, 1, out_features), hidden actualizado
        """
        out, hidden = self.lstm(y_prev, hidden)   # out: (batch, 1, hidden_size)
        y_pred = self.fc(out)                     # (batch, 1, out_features)
        return y_pred, hidden


class Seq2SeqLSTM(nn.Module):
    """
    Seq2Seq LSTM completo (Encoder + Decoder).
    - input_features: número de features en la entrada (1 para serie univariada)
    - output_features: número de features en la salida (1 para predicción univariada)
    - hidden_size, num_layers, dropout: arquitectura LSTM
    Forward:
      src: (batch, src_len, input_features)
      target_len: pasos a predecir (int)
      teacher_forcing_ratio: entre 0 y 1. Si >0 y se pasa target_seq, utiliza teacher forcing aleatoriamente.
      target_seq (opcional): (batch, target_len, output_features) para teacher forcing.
    Devuelve:
      outputs: (batch, target_len, output_features)
    """
    def __init__(self,
                 input_features: int = 1,
                 output_features: int = 1,
                 hidden_size: int = 128,
                 num_layers: int = 2,
                 dropout: float = 0.2,
                 init_scale: float = 0.1):
        super().__init__()
        self.encoder = LSTMEncoder(n_features=input_features,
                                   hidden_size=hidden_size,
                                   num_layers=num_layers,
                                   dropout=dropout)
        self.decoder = LSTMDecoder(out_features=output_features,
                                   hidden_size=hidden_size,
                                   num_layers=num_layers,
                                   dropout=dropout)
        # inicialización simple y controlable
        self._init_weights(init_scale)

    def _init_weights(self, scale: float = 0.1):
        # inicializa pesos lineales y LSTM para estabilidad al inicio
        for name, p in self.named_parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform_(p, gain=1.0)
            else:
                # biases a cero
                nn.init.constant_(p, 0.0)
        # reducir escala de fc final si existe
        try:
            nn.init.uniform_(self.decoder.fc.weight, -scale, scale)
            if self.decoder.fc.bias is not None:
                nn.init.constant_(self.decoder.fc.bias, 0.0)
        except Exception:
            pass

    def forward(self, src, target_len: int, teacher_forcing_ratio: float = 0.0, target_seq: torch.Tensor = None):
        """
        src: (batch, src_len, input_features)
        target_len: int (horizonte de predicción)
        teacher_forcing_ratio: probabilidad de usar el target real como entrada al decoder
        target_seq: (batch, target_len, output_features) - opcional para teacher forcing
        """
        batch_size = src.size(0)
        device = src.device
        # encoder
        h_n, c_n = self.encoder(src)  # cada uno: (num_layers, batch, hidden)
        hidden = (h_n, c_n)

        # primer input al decoder: último paso del src (última característica observada)
        # si input_features != output_features, podría usarse una proyección; aquí asumimos compatible (o univariado)
        y_prev = src[:, -1:, :self.decoder.fc.out_features]  # (batch, 1, out_features)
        outputs = []

        for t in range(target_len):
            y_pred, hidden = self.decoder(y_prev, hidden)  # (batch,1,out_features)
            outputs.append(y_pred)
            # decidir teacher forcing
            if (target_seq is not None) and (torch.rand(1).item() < teacher_forcing_ratio):
                # usar el valor real (teacher forcing)
                y_prev = target_seq[:, t:t+1, :]
            else:
                # usar la predicción como siguiente entrada (detach para evitar grafo innecesario)
                y_prev = y_pred.detach()

        outputs = torch.cat(outputs, dim=1)  # (batch, target_len, out_features)
        return outputs


# ejemplo:
if __name__ == "__main__":
    # prueba rápida de shapes
    model = Seq2SeqLSTM(input_features=1, output_features=1, hidden_size=64, num_layers=2, dropout=0.1)
    x = torch.randn(8, 30, 1)   # batch 8, 30 pasos de entrada, 1 feature
    out = model(x, target_len=30, teacher_forcing_ratio=0.5, target_seq=torch.randn(8,30,1))
    print("Output shape:", out.shape)  # -> (8, 30, 1)


Output shape: torch.Size([8, 30, 1])


In [None]:
import os
import random
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm  

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler

# Config reproducible
SEED = 42
def seed_everything(seed=SEED):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything()

# Modelo Seq2Seq LSTM
class Seq2SeqLSTM(nn.Module):
    def __init__(self, input_features=1, output_features=1, hidden_size=128, num_layers=2, dropout=0.2):
        super().__init__()
        self.encoder = nn.LSTM(input_size=input_features,
                               hidden_size=hidden_size,
                               num_layers=num_layers,
                               batch_first=True,
                               dropout=dropout if num_layers > 1 else 0.0)
        self.decoder = nn.LSTM(input_size=output_features,
                               hidden_size=hidden_size,
                               num_layers=num_layers,
                               batch_first=True,
                               dropout=dropout if num_layers > 1 else 0.0)
        self.fc = nn.Linear(hidden_size, output_features)

        for name, p in self.named_parameters():
            if "weight" in name and p.dim() > 1:
                nn.init.xavier_uniform_(p)
            elif "bias" in name:
                nn.init.constant_(p, 0.0)

    def forward(self, src, target_len, teacher_forcing_ratio=0.0, target_seq=None):
        batch_size = src.size(0)
        device = src.device

        # Encoder
        _, (hidden, cell) = self.encoder(src)

        # Decoder: primer input
        last_obs = src[:, -1:, :]
        out_features = self.fc.out_features
        if last_obs.size(-1) != out_features:
            proj = nn.Linear(last_obs.size(-1), out_features).to(device)
            with torch.no_grad():
                y_prev = proj(last_obs)
        else:
            y_prev = last_obs[:, :, :out_features]

        outputs = []
        for t in range(target_len):
            out, (hidden, cell) = self.decoder(y_prev, (hidden, cell))
            y_pred = self.fc(out)
            outputs.append(y_pred)
            if self.training and target_seq is not None and torch.rand(1).item() < teacher_forcing_ratio:
                y_prev = target_seq[:, t:t+1, :]
            else:
                y_prev = y_pred.detach()
        return torch.cat(outputs, dim=1)

# Parámetros de entrenamiento
DATA_PATH = Path("C:/archive/jena_climate_clean.csv")
TARGET_COL = "T (degC)"

INPUT_DAYS = 7    
OUTPUT_DAYS = 7
BATCH_SIZE = 64
HIDDEN_SIZE = 128
NUM_LAYERS = 2
DROPOUT = 0.2
LR = 1e-3
WEIGHT_DECAY = 1e-5
EPOCHS = 10       
PATIENCE = 5
CLIP_GRAD = 1.0
NUM_WORKERS = 0

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Usando dispositivo:", DEVICE)

# Cargar CSV
df = pd.read_csv(DATA_PATH, index_col=0, parse_dates=True)
df = df.iloc[:100000] 

if TARGET_COL not in df.columns:
    raise ValueError(f"Columna objetivo '{TARGET_COL}' no encontrada.")

# Inferir frecuencia y pasos por día
freq = pd.infer_freq(df.index)
if freq is None:
    diffs = df.index.to_series().diff().dropna()
    delta_sec = diffs.mode().iloc[0].total_seconds()
else:
    freq_tdelta = pd.tseries.frequencies.to_offset(freq)
    delta_sec = freq_tdelta.delta.total_seconds()
steps_per_day = int(86400 / delta_sec)
print(f"Pasos por día: {steps_per_day}")

INPUT_LEN = INPUT_DAYS * steps_per_day
OUTPUT_LEN = OUTPUT_DAYS * steps_per_day

# 5) Escalado
values = df[[TARGET_COL]].values.astype(np.float32)
scaler = MinMaxScaler()
values_scaled = scaler.fit_transform(values)

# 6) Crear secuencias
def create_sequences(data, input_len, output_len):
    xs, ys = [], []
    max_start = len(data) - input_len - output_len
    for i in range(max_start + 1):
        xs.append(data[i:i+input_len])
        ys.append(data[i+input_len:i+input_len+output_len])
    return np.array(xs), np.array(ys)

X, Y = create_sequences(values_scaled, INPUT_LEN, OUTPUT_LEN)
print("Secuencias creadas:", X.shape, Y.shape)

# Split
n = len(X)
train_end = int(n*0.7)
val_end = int(n*0.85)

X_train, Y_train = X[:train_end], Y[:train_end]
X_val, Y_val = X[train_end:val_end], Y[train_end:val_end]
X_test, Y_test = X[val_end:], Y[val_end:]

# Dataset y DataLoader
class TimeSeriesDataset(Dataset):
    def __init__(self, X, Y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.Y = torch.tensor(Y, dtype=torch.float32)
    def __len__(self): return len(self.X)
    def __getitem__(self, idx): return self.X[idx], self.Y[idx]

train_loader = DataLoader(TimeSeriesDataset(X_train, Y_train), batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
val_loader = DataLoader(TimeSeriesDataset(X_val, Y_val), batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
test_loader = DataLoader(TimeSeriesDataset(X_test, Y_test), batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)

# Modelo, loss, optimizador
model = Seq2SeqLSTM(input_features=1, output_features=1, hidden_size=HIDDEN_SIZE,
                    num_layers=NUM_LAYERS, dropout=DROPOUT).to(DEVICE)
criterion = nn.MSELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=2, factor=0.5)

# Entrenamiento con tqdm y early stopping
best_val_loss = float("inf")
epochs_no_improve = 0
best_path = Path("best_seq2seq_fast.pt")

for epoch in range(1, EPOCHS+1):
    model.train()
    train_loss = 0.0
    for Xb, Yb in tqdm(train_loader, desc=f"Epoch {epoch}/{EPOCHS} [Train]", leave=False):
        Xb, Yb = Xb.to(DEVICE), Yb.to(DEVICE)
        optimizer.zero_grad()
        outputs = model(Xb, target_len=OUTPUT_LEN, teacher_forcing_ratio=0.5, target_seq=Yb)
        loss = criterion(outputs, Yb)
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), CLIP_GRAD)
        optimizer.step()
        train_loss += loss.item() * Xb.size(0)
    train_loss /= len(train_loader.dataset)

    # Validación
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for Xb, Yb in tqdm(val_loader, desc=f"Epoch {epoch}/{EPOCHS} [Val]", leave=False):
            Xb, Yb = Xb.to(DEVICE), Yb.to(DEVICE)
            preds = model(Xb, target_len=OUTPUT_LEN, teacher_forcing_ratio=0.0)
            loss = criterion(preds, Yb)
            val_loss += loss.item() * Xb.size(0)
    val_loss /= len(val_loader.dataset)

    scheduler.step(val_loss)
    print(f"Epoch {epoch:03d} | Train MSE: {train_loss:.6f} | Val MSE: {val_loss:.6f}")

    if val_loss < best_val_loss - 1e-8:
        best_val_loss = val_loss
        epochs_no_improve = 0
        torch.save(model.state_dict(), best_path)
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= PATIENCE:
            print("Early stopping activado.")
            break

print(f"Mejor modelo guardado en {best_path}")


Usando dispositivo: cpu
Pasos por día: 144
Secuencias creadas: (97985, 1008, 1) (97985, 1008, 1)


Epoch 1/10 [Train]:   5%|▍         | 52/1072 [03:00<57:43,  3.40s/it]  

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch

# 1) Cargar el mejor modelo
model.load_state_dict(torch.load("best_seq2seq_fast.pt", map_location=DEVICE))
model.eval()

# 2) Obtener predicciones del test
preds_list, trues_list = [], []

with torch.no_grad():
    for Xb, Yb in test_loader:
        Xb, Yb = Xb.to(DEVICE), Yb.to(DEVICE)
        preds = model(Xb, target_len=OUTPUT_LEN, teacher_forcing_ratio=0.0)
        preds_list.append(preds.cpu().numpy())
        trues_list.append(Yb.cpu().numpy())

preds_arr = np.concatenate(preds_list, axis=0)  # (N_samples, OUTPUT_LEN, 1)
trues_arr = np.concatenate(trues_list, axis=0)

# 3) Desescalar
preds_flat = preds_arr.reshape(-1, 1)
trues_flat = trues_arr.reshape(-1, 1)

preds_rescaled = scaler.inverse_transform(preds_flat).reshape(preds_arr.shape)
trues_rescaled = scaler.inverse_transform(trues_flat).reshape(trues_arr.shape)

# 4) Métricas finales
mae_final = np.mean(np.abs(preds_rescaled - trues_rescaled))
mse_final = np.mean((preds_rescaled - trues_rescaled)**2)

print(f"TEST -> MAE: {mae_final:.4f} | MSE: {mse_final:.4f}")

# 5) Gráficos para comparar

# Comparar la primera muestra del test
idx = 0
plt.figure(figsize=(12,5))
plt.plot(trues_rescaled[idx, :, 0], label="Real", linewidth=2)
plt.plot(preds_rescaled[idx, :, 0], label="Predicho", linestyle='--')
plt.title(f"Predicción de {OUTPUT_DAYS} días - sample {idx}")
plt.xlabel("Paso temporal")
plt.ylabel(TARGET_COL)
plt.legend()
plt.show()

# 6) Comparación general (promedio)
plt.figure(figsize=(12,5))
plt.plot(np.mean(trues_rescaled, axis=0)[:,0], label="Real promedio")
plt.plot(np.mean(preds_rescaled, axis=0)[:,0], label="Predicho promedio", linestyle='--')
plt.title(f"Promedio sobre todo el test ({len(trues_rescaled)} samples)")
plt.xlabel("Paso temporal")
plt.ylabel(TARGET_COL)
plt.legend()
plt.show()
