In [3]:
import os
import pandas as pd
import numpy as np
import pickle
from datetime import datetime
from sklearn.linear_model import RidgeCV
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Define the function
def train_meta_models_with_different_windows(
    backtest_dir="Data/BacktestingResults",
    model_dir="Model/Meta",
    output_windows=[100, 50, 25, 10]
):
    model_files = {
        "Arimax": os.path.join(backtest_dir, "Arimax_backtest.csv"),
        "XGBoost": os.path.join(backtest_dir, "XGBoost_backtest.csv"),
        "RandomForest": os.path.join(backtest_dir, "RandomForest_backtest.csv"),
        "LSTM": os.path.join(backtest_dir, "LSTM_backtest.csv"),
    }

    # Load all predictions
    model_predictions = []
    for model_name, filepath in model_files.items():
        df = pd.read_csv(filepath, parse_dates=["Date"])
        df = df.rename(columns={"Predicted Price": f"{model_name}_Predicted"})
        model_predictions.append(df[["Date", f"{model_name}_Predicted", "Actual Price"]])

    # Merge all model predictions
    ensemble_df = model_predictions[0][["Date", "Actual Price"]]
    for df in model_predictions:
        ensemble_df = ensemble_df.merge(df.drop(columns="Actual Price"), on="Date", how="inner")

    ensemble_df = ensemble_df.drop_duplicates(subset="Date").sort_values("Date").reset_index(drop=True)

    results = {}
    today_str = datetime.now().strftime("%Y-%m-%d")
    os.makedirs(model_dir, exist_ok=True)

    for window in output_windows:
        df_window = ensemble_df.tail(window).copy()

        X = df_window[[f"{name}_Predicted" for name in model_files.keys()]]
        y = df_window["Actual Price"]

        meta_model = RidgeCV(alphas=np.logspace(-4, 4, 50), cv=5)
        meta_model.fit(X, y)

        y_pred = meta_model.predict(X)
        metrics = {
            "MAE": mean_absolute_error(y, y_pred),
            "RMSE": np.sqrt(mean_squared_error(y, y_pred)),
            "R²": r2_score(y, y_pred)
        }

        model_filename = f"meta_model_last_{window}days_{today_str}.pkl"
        model_path = os.path.join(model_dir, model_filename)
        with open(model_path, "wb") as f:
            pickle.dump(meta_model, f)

        results[f"{window}_days"] = {
            "Model Path": model_path,
            "Coefficients": dict(zip(X.columns, meta_model.coef_)),
            "Intercept": meta_model.intercept_,
            "Evaluation Metrics": metrics
        }

    return results

# Call the function and return result
#results = train_meta_models_with_different_windows()
#results


In [4]:
def print_formatted_meta_model_results(results: dict):
    print("\nMeta-Model Training Summary Across Time Windows")
    print("───────────────────────────────────────────────────────────────")
    
    for window, info in results.items():
        print(f"\nWindow: Last {window} Days")
        print(f"Model Path : {info['Model Path']}")
        
        print("Coefficients:")
        for k, v in info["Coefficients"].items():
            print(f"   - {k:<24}: {float(v):.4f}")

        print(f"Intercept   : {float(info['Intercept']):.4f}")
        
        print("Evaluation Metrics:")
        print(f"   - MAE       : {float(info['Evaluation Metrics']['MAE']):.4f}")
        print(f"   - RMSE      : {float(info['Evaluation Metrics']['RMSE']):.4f}")
        print(f"   - R²        : {float(info['Evaluation Metrics']['R²']):.4f}")
    
    print("───────────────────────────────────────────────────────────────\n")


In [5]:
results = train_meta_models_with_different_windows(backtest_dir="Data/BacktestingResults", model_dir="Model/Meta", output_windows=[100, 50, 25, 10]) 
print_formatted_meta_model_results(results)



Meta-Model Training Summary Across Time Windows
───────────────────────────────────────────────────────────────

Window: Last 100_days Days
Model Path : Model/Meta\meta_model_last_100days_2025-06-24.pkl
Coefficients:
   - Arimax_Predicted        : 0.9476
   - XGBoost_Predicted       : -0.1610
   - RandomForest_Predicted  : 0.1574
   - LSTM_Predicted          : 0.0452
Intercept   : 0.7797
Evaluation Metrics:
   - MAE       : 0.1517
   - RMSE      : 0.1912
   - R²        : 0.9976

Window: Last 50_days Days
Model Path : Model/Meta\meta_model_last_50days_2025-06-24.pkl
Coefficients:
   - Arimax_Predicted        : 0.8551
   - XGBoost_Predicted       : -0.2454
   - RandomForest_Predicted  : 0.2926
   - LSTM_Predicted          : 0.0554
Intercept   : 3.2878
Evaluation Metrics:
   - MAE       : 0.1705
   - RMSE      : 0.2122
   - R²        : 0.9901

Window: Last 25_days Days
Model Path : Model/Meta\meta_model_last_25days_2025-06-24.pkl
Coefficients:
   - Arimax_Predicted        : 0.6054
   - X