In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.metrics import r2_score, mean_squared_error

In [2]:
LSTMPATH = "Data/LSTM"
RFPATH = "Data/Random Forest"
TRANSFORMERPATH = "Data/Transformer"
WLSPATH = "Data/WLS"

In [3]:
def extractGeneral(folder_path, output_csv='summary_metrics.csv'):
    summary_rows = []
    for file_name in os.listdir(folder_path):
        if file_name.endswith('.csv'):
            file_path = os.path.join(folder_path, file_name)
            try:
                df = pd.read_csv(file_path)
                if df.empty:
                    continue  
                first_row = df.iloc[0][['stock_id', 'model_name', 'mse', 'qlike', 'r^2']]
                summary_rows.append(first_row)
            except Exception as e:
                print(f"Error processing {file_name}: {e}")
    if summary_rows:
        summary_df = pd.DataFrame(summary_rows)
        summary_df.to_csv(os.path.join(folder_path, output_csv), index=False)
        print(f"Summary saved to {output_csv}")
    else:
        print("No data extracted.")

In [6]:
extractGeneral(WLSPATH)

Summary saved to summary_metrics.csv


In [7]:
extractGeneral(TRANSFORMERPATH)

Summary saved to summary_metrics.csv


In [8]:
extractGeneral(RFPATH)

Summary saved to summary_metrics.csv


In [None]:
extractGeneral(LSTMPATH)

In [7]:
def qlike_loss(actual,forecast,eps=1e-8):
    a = np.clip(actual,   eps, None)
    f = np.clip(forecast, eps, None)
    r = a / f
    return np.mean(r - np.log(r) - 1.0)

def evaluate(folder_path):
    all_preds = []
    all_trues = []
    for file_name in os.listdir(folder_path):
        if file_name.endswith('.csv'):
            file_path = os.path.join(folder_path, file_name)
            try:
                df = pd.read_csv(file_path)
                if 'pred_vol' in df.columns and 'true_vol' in df.columns:
                    all_preds.extend(df['pred_vol'].values)
                    all_trues.extend(df['true_vol'].values)
            except Exception as e:
                print(f"Error processing {file_name}: {e}")
    if not all_preds or not all_trues:
        print("No valid prediction data found.")
        return
    y_true = np.array(all_trues)
    y_pred = np.array(all_preds)
    r2 = r2_score(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    qlike = qlike_loss(y_true, y_pred)
    print(f"Combined Evaluation Metrics:")
    print(f"R²   : {r2:.9f}")
    print(f"MSE  : {mse:.9f}")
    print(f"QLIKE: {qlike:.9f}")

In [8]:
evaluate(TRANSFORMERPATH)

Combined Evaluation Metrics:
R²   : 0.638204994
MSE  : 0.000000171
QLIKE: 0.206424960


In [9]:
evaluate(RFPATH)

Combined Evaluation Metrics:
R²   : 0.635772536
MSE  : 0.000000177
QLIKE: 0.204956429


In [10]:
evaluate(WLSPATH)

Combined Evaluation Metrics:
R²   : 0.591312317
MSE  : 0.000000204
QLIKE: 0.376301507


In [11]:
evaluate(LSTMPATH)

Combined Evaluation Metrics:
R²   : 0.628302419
MSE  : 0.000000156
QLIKE: 0.258099956
