In [16]:
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import r2_score, mean_absolute_percentage_error, mean_squared_error
from sklearn.linear_model import LinearRegression, RidgeCV, LassoCV

from models.tft import TemporalFusionTransformer
from models.tcn import TemporalConvolutionalNetwork
from models.hfm import HybridForecastingModel
from models.dcf import DistributionalConditionalForecast
from models.pft import ProbabilisticForecastTransformer
from models.cqv import ConditionalQuantileVAE

from loader import train_loader, test_loader, val_loader, scaler

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
tft = TemporalFusionTransformer(
	seq_input_dim=2, 
	cal_input_dim=18, 
	d_model=64, 
	nhead=4, 
	num_layers=2, 
	output_dim=2, 
	dropout=0.1
)

tft.to(device)
checkpoint = torch.load("checkpoints/TFT.pth", map_location=device)
tft.load_state_dict(checkpoint["model_state_dict"])
tft.eval()

tcn = TemporalConvolutionalNetwork(
	seq_input_dim=2, 
	cal_input_dim=18, 
	d_model=64, 
	nhead=4, 
	num_layers=2, 
	tcn_channels=[64, 64, 64],
	kernel_size=3, 
	dropout=0.1, 
	output_dim=2,
)

tcn.to(device)
checkpoint = torch.load("checkpoints/TCN.pth", map_location=device)
tcn.load_state_dict(checkpoint["model_state_dict"])
tcn.eval()

hfm = HybridForecastingModel(
	window_size=30,
	num_series=2,
	static_dim=18,
	d_model=64,
	nhead=4,
	num_layers_transformer=2,
    n_blocks_nhits=3,
	nhits_hidden_dim=128, 
	nhits_n_layers=3, 
	deepar_num_layers=2,
    dropout=0.1, 
	output_dim=2
)

hfm.to(device)
checkpoint = torch.load("checkpoints/HFM.pth", map_location=device)
hfm.load_state_dict(checkpoint["model_state_dict"])
hfm.eval()

dcf = DistributionalConditionalForecast(
    window_size=30, 
	num_series=2, 
	static_dim=18,
    latent_dim=32, 
	hidden_dim=128,
    dropout=0.1, 
	output_dim=2
)

dcf.to(device)
checkpoint = torch.load("checkpoints/DCF.pth", map_location=device)
dcf.load_state_dict(checkpoint["model_state_dict"])
dcf.eval()

pft = ProbabilisticForecastTransformer(
	window_size=30, 
	num_series=2, 
	static_dim=18,
    latent_dim=32, 
	d_model=64, 
	nhead=4, 
	num_layers=2,
    hidden_dim=128, 
	dropout=0.1, 
	output_dim=2
)

pft.to(device)
checkpoint = torch.load("checkpoints/PFT.pth", map_location=device)
pft.load_state_dict(checkpoint["model_state_dict"])
pft.eval()

cqv = ConditionalQuantileVAE(
	window_size=30, 
	num_series=2, 
	static_dim=18,
    latent_dim=32, 
	hidden_dim=128,
    dropout=0.1, 
	output_dim=2, 
	num_quantiles=3
)

cqv.to(device)
checkpoint = torch.load("checkpoints/CQV.pth", map_location=device)
cqv.load_state_dict(checkpoint["model_state_dict"])
cqv.eval()

print("Models loaded successfully!")

Models loaded successfully!


In [18]:
# Hàm thu thập dự báo từ từng mô hình trên một DataLoader
def get_meta_features(loader, models, device):
    preds = {"tft": [], "tcn": [], "hfm": [], "dcf_mu": [], "dcf_lower": [], "dcf_upper": [], "pft_mu": [], "pft_lower": [], "pft_upper": [],
	"cqv_mu": [], "cqv_lower": [], "cqv_upper": []}
    static_feats = []  # từ x_cal của mỗi sample
    actuals = []
    with torch.no_grad():
        for x_seq, x_cal, y in loader:
            x_seq = x_seq.to(device)
            x_cal = x_cal.to(device)
            # Thu thập dự báo của mỗi mô hình
            pred_tft = models["tft"](x_seq, x_cal)            # (batch, 2)
            pred_tcn = models["tcn"](x_seq, x_cal)            # (batch, 2)
            pred_hfm = models["hfm"](x_seq, x_cal)            # (batch, 2)
            out_dcf, _, _ = models["dcf"](x_seq, x_cal)       # (batch, 4) => [μ, logvar] cho 2 biến
            out_pft, _, _ = models["pft"](x_seq, x_cal)       # (batch, 4) => [μ, logvar] cho 2 biến
            out_cqv, _, _ = models["cqv"](x_seq, x_cal)       # (batch, 6) => [μ, logvar] cho 2 biến và 3 quantiles
            # Tách ra μ và logvar cho DCF
            mu_dcf = out_dcf[:, :2]  # (batch, 2)
            logvar_dcf = out_dcf[:, 2:]  # (batch, 2)
            sigma_dcf = torch.exp(0.5 * logvar_dcf)
            lower_dcf = mu_dcf - 1.96 * sigma_dcf
            upper_dcf = mu_dcf + 1.96 * sigma_dcf
			# Tách ra μ và logvar cho PFT
            mu_pft = out_pft[:, :2]  # (batch, 2)
            logvar_pft = out_pft[:, 2:]  # (batch, 2)
            sigma_pft = torch.exp(0.5 * logvar_pft)
            lower_pft = mu_pft - 1.96 * sigma_pft
            upper_pft = mu_pft + 1.96 * sigma_pft
            # Tách ra μ và logvar cho CQV
            mu_cqv = out_cqv[:, :2]  # (batch, 2)
            logvar_cqv = out_cqv[:, 2:4]  # (batch, 2)
            sigma_cqv = torch.exp(0.5 * logvar_cqv)
            lower_cqv = mu_cqv - 1.96 * sigma_cqv
            upper_cqv = mu_cqv + 1.96 * sigma_cqv

            preds["tft"].append(pred_tft.cpu().numpy())
            preds["tcn"].append(pred_tcn.cpu().numpy())
            preds["hfm"].append(pred_hfm.cpu().numpy())
            preds["dcf_mu"].append(mu_dcf.cpu().numpy())
            preds["dcf_lower"].append(lower_dcf.cpu().numpy())
            preds["dcf_upper"].append(upper_dcf.cpu().numpy())
            preds["pft_mu"].append(mu_pft.cpu().numpy())
            preds["pft_lower"].append(lower_pft.cpu().numpy())
            preds["pft_upper"].append(upper_pft.cpu().numpy()) 
            preds["cqv_mu"].append(mu_cqv.cpu().numpy())
            preds["cqv_lower"].append(lower_cqv.cpu().numpy())
            preds["cqv_upper"].append(upper_cqv.cpu().numpy())
            static_feats.append(x_cal.cpu().numpy())
            actuals.append(y.cpu().numpy())
    
    for key in preds:
        preds[key] = np.concatenate(preds[key], axis=0)
    static_feats = np.concatenate(static_feats, axis=0)
    actuals = np.concatenate(actuals, axis=0)
    
    # Ghép các đặc trưng theo thứ tự: TFT, TCN, HFM, DCF μ, DCF lower, DCF upper, static features
    meta_features = np.concatenate([
        preds["tft"],
        preds["tcn"],
        preds["hfm"],
        preds["dcf_mu"],
        preds["dcf_lower"],
        preds["dcf_upper"],
        preds["pft_mu"],
        preds["pft_lower"],
        preds["pft_upper"],
        preds["cqv_mu"],
        preds["cqv_lower"],
        preds["cqv_upper"],
        static_feats
    ], axis=1)
    
    return meta_features, actuals

# Thu thập meta-features trên tập validation
models = {"tft": tft, "tcn": tcn, "hfm": hfm, "dcf": dcf, "pft": pft, "cqv": cqv}
X_val = None
y_val = None
X_val, y_val = get_meta_features(val_loader, models, device)  

# Thu thập meta-features trên tập test
X_test, y_test = get_meta_features(test_loader, models, device)

X_meta_val = X_val  
y_val_orig = scaler.inverse_transform(y_val)  
X_meta_test = X_test
y_test_orig = scaler.inverse_transform(y_test)

# Huấn luyện meta-model sử dụng RidgeCV
# Sử dụng các giá trị alpha từ một dải ví dụ
alphas = np.logspace(-1, 1, 3)
meta_model = RidgeCV(alphas=alphas, scoring='neg_mean_squared_error')
meta_model.fit(X_meta_val, y_val_orig)

# Dự báo trên tập test
meta_preds = meta_model.predict(X_meta_test)  # (n_test, 2)

r2 = r2_score(y_test_orig, meta_preds)
mape = mean_absolute_percentage_error(y_test_orig, meta_preds)
rmse = np.sqrt(mean_squared_error(y_test_orig, meta_preds))
print(f"Ensemble (Ridge) Test R-squared: {r2:.4f}")
print(f"Ensemble (Ridge) Test MAPE: {mape:.4f}")
print(f"Ensemble (Ridge) Test RMSE: {rmse:.4f}")

Ensemble (Ridge) Test R-squared: 0.9793
Ensemble (Ridge) Test MAPE: 0.2198
Ensemble (Ridge) Test RMSE: 99924.9250


In [31]:
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
import numpy as np

# Xác định dải alpha cần tìm (ví dụ: từ 1e-3 đến 1e3)
alpha_range = np.logspace(-1, 1, 3)
param_grid = {'alpha': alpha_range}

ridge = Ridge()
grid_search = GridSearchCV(ridge, param_grid, scoring='neg_mean_squared_error', cv=5)
grid_search.fit(X_meta_val, y_val_orig)

best_alpha = grid_search.best_params_['alpha']
print("Best alpha found:", best_alpha)

# Sau đó, sử dụng best_alpha để huấn luyện meta-model
meta_model = Ridge(alpha=best_alpha)
meta_model.fit(X_meta_val, y_val_orig)

# Dự báo trên tập test
meta_preds = meta_model.predict(X_meta_test)
r2 = r2_score(y_test_orig, meta_preds)
mape = mean_absolute_percentage_error(y_test_orig, meta_preds)
rmse = np.sqrt(mean_squared_error(y_test_orig, meta_preds))
print(f"Ensemble (Ridge) Test R-squared: {r2:.4f}")
print(f"Ensemble (Ridge) Test MAPE: {mape:.4f}")
print(f"Ensemble (Ridge) Test RMSE: {rmse:.4f}")

Best alpha found: 10.0
Ensemble (Ridge) Test R-squared: 0.9793
Ensemble (Ridge) Test MAPE: 0.2198
Ensemble (Ridge) Test RMSE: 99924.1406


In [42]:
from sklearn.linear_model import MultiTaskLassoCV, MultiTaskLasso
import numpy as np
from sklearn.metrics import r2_score, mean_absolute_percentage_error, mean_squared_error
import warnings
warnings.filterwarnings("ignore")

# Giả sử X_meta_val, y_val_orig, X_meta_test, y_test_orig đã được định nghĩa

# Sử dụng MultiTaskLassoCV để tìm alpha tối ưu
alpha_range = np.logspace(-1, 1, 10)
multi_lasso_cv = MultiTaskLassoCV(alphas=alpha_range, cv=5, random_state=42)
multi_lasso_cv.fit(X_meta_val, y_val_orig)
best_alpha = multi_lasso_cv.alpha_
print("Best alpha found by MultiTaskLassoCV:", best_alpha)

# Sau đó, huấn luyện mô hình MultiTaskLasso với alpha tốt nhất
meta_model = MultiTaskLasso(alpha=best_alpha, random_state=42)
meta_model.fit(X_meta_val, y_val_orig)

# Dự báo trên tập test
meta_preds = meta_model.predict(X_meta_test)

r2 = r2_score(y_test_orig, meta_preds)
mape = mean_absolute_percentage_error(y_test_orig, meta_preds)
rmse = np.sqrt(mean_squared_error(y_test_orig, meta_preds))

print(f"Ensemble (MultiTaskLasso) Test R-squared: {r2:.4f}")
print(f"Ensemble (MultiTaskLasso) Test MAPE: {mape:.4f}")
print(f"Ensemble (MultiTaskLasso) Test RMSE: {rmse:.4f}")

Best alpha found by MultiTaskLassoCV: 10.0
Ensemble (MultiTaskLasso) Test R-squared: 0.9805
Ensemble (MultiTaskLasso) Test MAPE: 0.2213
Ensemble (MultiTaskLasso) Test RMSE: 98042.6328


In [44]:
# Tính chỉ số trên Units và Revenue 

# Units
y_test_units = y_test_orig[:, 0]
meta_preds_units = meta_preds[:, 0]
r2_units = r2_score(y_test_units, meta_preds_units)
mape_units = mean_absolute_percentage_error(y_test_units, meta_preds_units)
rmse_units = np.sqrt(mean_squared_error(y_test_units, meta_preds_units))

# Revenue
y_test_revenue = y_test_orig[:, 1]
meta_preds_revenue = meta_preds[:, 1]
r2_revenue = r2_score(y_test_revenue, meta_preds_revenue)

mape_revenue = mean_absolute_percentage_error(y_test_revenue, meta_preds_revenue)
rmse_revenue = np.sqrt(mean_squared_error(y_test_revenue, meta_preds_revenue))

print(f"Ensemble (Ridge) Test R-squared (Units): {r2_units:.4f}")
print(f"Ensemble (Ridge) Test MAPE (Units): {mape_units:.4f}")
print(f"Ensemble (Ridge) Test RMSE (Units): {rmse_units:.4f}")
print(f"Ensemble (Ridge) Test R-squared (Revenue): {r2_revenue:.4f}")
print(f"Ensemble (Ridge) Test MAPE (Revenue): {mape_revenue:.4f}")
print(f"Ensemble (Ridge) Test RMSE (Revenue): {rmse_revenue:.4f}")

Ensemble (Ridge) Test R-squared (Units): 0.9794
Ensemble (Ridge) Test MAPE (Units): 0.2342
Ensemble (Ridge) Test RMSE (Units): 18.5565
Ensemble (Ridge) Test R-squared (Revenue): 0.9815
Ensemble (Ridge) Test MAPE (Revenue): 0.2083
Ensemble (Ridge) Test RMSE (Revenue): 138653.2344
