In [16]:
import os, math, time
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# Đọc dữ liệu Jena Climate
df = pd.read_csv("jena_climate_2009_2016.csv")
print("Dữ liệu gốc:", df.shape)
print(df.head())

# Chỉ dùng một số cột (ví dụ: nhiệt độ, áp suất, gió)
cols = ["T (degC)", "p (mbar)", "wv (m/s)", "max. wv (m/s)"]
data = df[cols].values

# Chuẩn hóa
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data)

# Hàm tạo chuỗi dữ liệu (input_len = số ngày quan sát, target_len = số ngày dự báo)
def create_sequences(dataset, input_len=72, target_len=24):
    X, y = [], []
    for i in range(len(dataset) - input_len - target_len):
        X.append(dataset[i:i+input_len])
        y.append(dataset[i+input_len:i+input_len+target_len, 0])  # Dự báo nhiệt độ
    return np.array(X), np.array(y)

X, y = create_sequences(data_scaled, 72, 24)
print("Tập mẫu:", X.shape, y.shape)

# Chia train / val / test
train_size = int(0.7 * len(X))
val_size = int(0.15 * len(X))
X_train, y_train = X[:train_size], y[:train_size]
X_val, y_val = X[train_size:train_size+val_size], y[train_size:train_size+val_size]
X_test, y_test = X[train_size+val_size:], y[train_size+val_size:]

print(f"Train: {X_train.shape}, Val: {X_val.shape}, Test: {X_test.shape}")

# Tạo DataLoader
batch_size = 16
train_loader = DataLoader(TensorDataset(torch.tensor(X_train).float(), torch.tensor(y_train).float()),
                          batch_size=batch_size, shuffle=True)
val_loader = DataLoader(TensorDataset(torch.tensor(X_val).float(), torch.tensor(y_val).float()),
                        batch_size=batch_size, shuffle=False)
test_loader = DataLoader(TensorDataset(torch.tensor(X_test).float(), torch.tensor(y_test).float()),
                         batch_size=batch_size, shuffle=False)


Device: cpu
Dữ liệu gốc: (420551, 15)
             Date Time  p (mbar)  T (degC)  Tpot (K)  Tdew (degC)  rh (%)  \
0  01.01.2009 00:10:00    996.52     -8.02    265.40        -8.90    93.3   
1  01.01.2009 00:20:00    996.57     -8.41    265.01        -9.28    93.4   
2  01.01.2009 00:30:00    996.53     -8.51    264.91        -9.31    93.9   
3  01.01.2009 00:40:00    996.51     -8.31    265.12        -9.07    94.2   
4  01.01.2009 00:50:00    996.51     -8.27    265.15        -9.04    94.1   

   VPmax (mbar)  VPact (mbar)  VPdef (mbar)  sh (g/kg)  H2OC (mmol/mol)  \
0          3.33          3.11          0.22       1.94             3.12   
1          3.23          3.02          0.21       1.89             3.03   
2          3.21          3.01          0.20       1.88             3.02   
3          3.26          3.07          0.19       1.92             3.08   
4          3.27          3.08          0.19       1.92             3.09   

   rho (g/m**3)  wv (m/s)  max. wv (m/s)  wd (de

In [17]:
import torch.optim as optim

def train_and_eval(model, train_loader, val_loader, device, epochs=10, lr=1e-3, target_len=24, model_name='model', teacher_forcing=0.0):
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = torch.nn.MSELoss()
    history = {'train_loss':[], 'val_mae':[], 'val_rmse':[]}
    os.makedirs('checkpoints', exist_ok=True)

    for epoch in range(epochs):
        t0 = time.time()
        model.train()
        running_loss = 0.0
        for x_batch, y_batch in train_loader:
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)

            optimizer.zero_grad()
            if hasattr(model, "forward"):  # hỗ trợ cả Seq2SeqAttn & Transformer
                if "teacher_forcing_ratio" in model.forward.__code__.co_varnames:
                    outputs = model(x_batch, target_len, teacher_forcing_ratio=teacher_forcing, y_true=y_batch)
                else:
                    outputs = model(x_batch, target_len)
            else:
                outputs = model(x_batch, target_len)

            if outputs.dim() == 3:
                outputs = outputs.squeeze(-1)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        train_loss = running_loss / len(train_loader)
        history['train_loss'].append(train_loss)

        # Validation
        model.eval()
        preds, trues = [], []
        with torch.no_grad():
            for x_val, y_val in val_loader:
                x_val = x_val.to(device)
                y_val = y_val.to(device)
                out = model(x_val, target_len)
                if out.dim() == 3:
                    out = out.squeeze(-1)
                preds.append(out.cpu().numpy())
                trues.append(y_val.cpu().numpy())

        preds = np.concatenate(preds)
        trues = np.concatenate(trues)
        mae = mean_absolute_error(trues, preds)
        rmse = math.sqrt(mean_squared_error(trues, preds))
        history['val_mae'].append(mae)
        history['val_rmse'].append(rmse)

        print(f"[{model_name}] Epoch {epoch+1}/{epochs} | Train Loss: {train_loss:.4f} | MAE: {mae:.4f} | RMSE: {rmse:.4f} | Time: {time.time()-t0:.1f}s")
        torch.save(model.state_dict(), f"checkpoints/{model_name}_epoch{epoch+1}.pt")

    return history


In [None]:
from seq2seq_attention import Seq2SeqAttn
from transformer import TimeSeriesTransformer

input_dim = X_train.shape[2]
target_len = y_train.shape[1]

# 1️⃣ LSTM cơ bản (không Attention)
lstm_base = Seq2SeqAttn(input_dim=input_dim, hidden_dim=32, output_dim=1, num_layers=1)
hist_lstm = train_and_eval(lstm_base, train_loader, val_loader, device, epochs=5, lr=1e-3,
                           target_len=target_len, model_name='LSTM_BASE', teacher_forcing=0.0)

# 2️⃣ LSTM + Attention
lstm_attn = Seq2SeqAttn(input_dim=input_dim, hidden_dim=32, output_dim=1, num_layers=1)
hist_attn = train_and_eval(lstm_attn, train_loader, val_loader, device, epochs=5, lr=1e-3,
                           target_len=target_len, model_name='LSTM_ATTN', teacher_forcing=0.2)

# 3️⃣ Transformer
trans_model = TimeSeriesTransformer(input_dim=input_dim, d_model=64, nhead=4,
                                    num_encoder_layers=2, num_decoder_layers=2,
                                    dim_feedforward=128, dropout=0.1,
                                    target_len=target_len, output_dim=1)
hist_trans = train_and_eval(trans_model, train_loader, val_loader, device, epochs=5, lr=1e-3,
                            target_len=target_len, model_name='TRANSFORMER')
