In [44]:
import pandas as pd
import matplotlib.pyplot as plt
import os

# # Define paths
# output_dir_non_winsor = './OoS_metrics_rf/non_winsorized'
# output_dir_winsor = './OoS_metrics_rf/winsorized'


In [7]:
## pdc off

import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


custom_colors = {
    'CatBoost': 'blue',
    'LightGBM': 'green',
    'Random Forest': 'orange',
    'SVM': 'purple',
    'XGBoost': 'pink'
}

# List of folder paths and their labels for different horizons
horizons = ['tau_1', 'tau_3', 'tau_6', 'tau_9']
folders_template = [
    ('C:\\Users\\shafiullah.qureshi\\OneDrive - Government of Alberta\\Documents\\nowcast_gdp\\gdp_paper_updated\\OoS_metrics_Catboost_off\\winsorized\\OoS_metrics_{}_winsorized.csv', 'CatBoost'),
    ('C:\\Users\\shafiullah.qureshi\\OneDrive - Government of Alberta\\Documents\\nowcast_gdp\\gdp_paper_updated\\OoS_metrics_LightGBM_off\\winsorized\\OoS_metrics_{}_winsorized.csv', 'LightGBM'),
    ('C:\\Users\\shafiullah.qureshi\\OneDrive - Government of Alberta\\Documents\\nowcast_gdp\\gdp_paper_updated\\OoS_metrics_rf_off\\winsorized\\OoS_metrics_{}_winsorized.csv', 'Random Forest'),
    ('C:\\Users\\shafiullah.qureshi\\OneDrive - Government of Alberta\\Documents\\nowcast_gdp\\gdp_paper_updated\\OoS_metrics_GBM_off\\winsorized\\OoS_metrics_{}_winsorized.csv', 'GBM'),
    ('C:\\Users\\shafiullah.qureshi\\OneDrive - Government of Alberta\\Documents\\nowcast_gdp\\gdp_paper_updated\\OoS_metrics_xgboost_off\\winsorized\\OoS_metrics_{}_winsorized.csv', 'XGBoost')
    
]

# Create a folder to save the graphs and combined CSVgdp_paper_updated
output_folder = 'graph_pdc_off'
os.makedirs(output_folder, exist_ok=True)

# List of metrics to plot
metrics = ['rmse', 'mae', 'mad', 'r_sq']

# Mapping for nice Y-axis labels
metric_labels = {
    'rmse': 'RMSE',
    'mae': 'MAE',
    'mad': 'MAD',
    'r_sq': 'R²'
}

# Initialize a DataFrame to store all combined metrics
combined_summary_df = pd.DataFrame()

# Iterate over each horizon and plot the metrics
for horizon in horizons:
    print(f"\n\033[1mHorizon: {horizon}\033[0m")
    for metric in metrics:
        plt.figure(figsize=(14, 8))  # Updated size for better visibility
        data_list = []
        model_labels = []
        metric_data_summary = []

        print(f"\n\033[1m{metric.upper()}\033[0m")
        for folder_template, model_name in folders_template:
            file_path = folder_template.format(horizon)
            
            # Load the CSV file
            try:
                data = pd.read_csv(file_path)
                data = data.dropna(subset=['ssample_end_date'])
                # Collect data for the current metric
                data_list.append(data[metric])
                model_labels.append(model_name)

                # Calculate and print raw median and IQR
                median = data[metric].median()
                iqr = data[metric].quantile(0.75) - data[metric].quantile(0.25)
                print(f"Model: {model_name} - Median: {median:.10f}, IQR: {iqr:.10f}")
                
                # Append to summary list
                metric_data_summary.append({
                    'Horizon': horizon,
                    'Metric': metric,
                    'Model': model_name,
                    'Median': median,
                    'IQR': iqr
                })

            except FileNotFoundError:
                print(f"File not found: {file_path}")
                continue

        # Plot all models' data for the current metric with custom colors
        box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
        for patch, model_name in zip(box['boxes'], model_labels):
            patch.set_facecolor(custom_colors.get(model_name, 'grey'))

        # Customize Y-axis for R-squared to show negative scale if applicable
        if metric == 'r_sq':
            plt.ylim(-1, 1)
            plt.axhline(0, color='red', linestyle='--', linewidth=0.5)

        # Enhance visibility of axis labels
        plt.xlabel('Model', fontsize=14)
        plt.ylabel(metric_labels.get(metric, metric), fontsize=14)
        plt.xticks(fontsize=12)
        plt.yticks(fontsize=12)
        
        # Save the figure without title
        file_name = f'{output_folder}/{metric}_{horizon}.png'
        plt.grid(True, linestyle='--', alpha=0.5)
        plt.savefig(file_name, bbox_inches='tight')
        plt.close()

        # Append the metric data to the combined summary DataFrame
        combined_summary_df = pd.concat([combined_summary_df, pd.DataFrame(metric_data_summary)], ignore_index=True)

# Save the combined metrics data as a single CSV
combined_csv_file_name = f'{output_folder}/combined_metrics_summary.csv'
combined_summary_df.to_csv(combined_csv_file_name, index=False)
print(f"Combined summary CSV saved: {combined_csv_file_name}")


[1mHorizon: tau_1[0m

[1mRMSE[0m
Model: CatBoost - Median: 0.0001498102, IQR: 0.0002586318
Model: LightGBM - Median: 0.0006989237, IQR: 0.0014414482
Model: Random Forest - Median: 0.0001553500, IQR: 0.0002539505
Model: GBM - Median: 0.0000693380, IQR: 0.0001521635
Model: XGBoost - Median: 0.0001116698, IQR: 0.0002548106

[1mMAE[0m
Model: CatBoost - Median: 0.0001498102, IQR: 0.0002586318
Model: LightGBM - Median: 0.0006989237, IQR: 0.0014414482
Model: Random Forest - Median: 0.0001553500, IQR: 0.0002539505
Model: GBM - Median: 0.0000693380, IQR: 0.0001521635
Model: XGBoost - Median: 0.0001116698, IQR: 0.0002548106

[1mMAD[0m


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)


Model: CatBoost - Median: 0.0001498102, IQR: 0.0002586318
Model: LightGBM - Median: 0.0006989237, IQR: 0.0014414482
Model: Random Forest - Median: 0.0001553500, IQR: 0.0002539505
Model: GBM - Median: 0.0000693380, IQR: 0.0001521635
Model: XGBoost - Median: 0.0001116698, IQR: 0.0002548106

[1mR_SQ[0m
Model: CatBoost - Median: 0.9935100783, IQR: 0.0504651038
Model: LightGBM - Median: 0.8173439321, IQR: 0.4251673859
Model: Random Forest - Median: 0.9924215808, IQR: 0.0275250163
Model: GBM - Median: 0.9980328973, IQR: 0.0170972610
Model: XGBoost - Median: 0.9945610405, IQR: 0.0222239739


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mHorizon: tau_3[0m

[1mRMSE[0m
Model: CatBoost - Median: 0.0002533806, IQR: 0.0003939246
Model: LightGBM - Median: 0.0012423862, IQR: 0.0015437367
Model: Random Forest - Median: 0.0002113448, IQR: 0.0003894245
Model: GBM - Median: 0.0001287312, IQR: 0.0002199007
Model: XGBoost - Median: 0.0002229829, IQR: 0.0006000963

[1mMAE[0m
Model: CatBoost - Median: 0.0002357206, IQR: 0.0002889680
Model: LightGBM - Median: 0.0010724744, IQR: 0.0013767688
Model: Random Forest - Median: 0.0001775426, IQR: 0.0002712855
Model: GBM - Median: 0.0001096571, IQR: 0.0001717593
Model: XGBoost - Median: 0.0001865691, IQR: 0.0005439987

[1mMAD[0m
Model: CatBoost - Median: 0.0002357206, IQR: 0.0002889680
Model: LightGBM - Median: 0.0010724744, IQR: 0.0013767688


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)


Model: Random Forest - Median: 0.0001775426, IQR: 0.0002712855
Model: GBM - Median: 0.0001096571, IQR: 0.0001717593
Model: XGBoost - Median: 0.0001865691, IQR: 0.0005439987

[1mR_SQ[0m
Model: CatBoost - Median: 0.9896702694, IQR: 0.0313759204
Model: LightGBM - Median: 0.8040663108, IQR: 0.3718721266
Model: Random Forest - Median: 0.9916069521, IQR: 0.0184566839
Model: GBM - Median: 0.9971702471, IQR: 0.0092120599
Model: XGBoost - Median: 0.9907380072, IQR: 0.0436174154

[1mHorizon: tau_6[0m

[1mRMSE[0m
Model: CatBoost - Median: 0.0003302499, IQR: 0.0004112985
Model: LightGBM - Median: 0.0012024935, IQR: 0.0011911639
Model: Random Forest - Median: 0.0003139269, IQR: 0.0004360669
Model: GBM - Median: 0.0001756658, IQR: 0.0003266678
Model: XGBoost - Median: 0.0002556783, IQR: 0.0007835758


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mMAE[0m
Model: CatBoost - Median: 0.0002651631, IQR: 0.0002795260
Model: LightGBM - Median: 0.0010362228, IQR: 0.0008965564
Model: Random Forest - Median: 0.0002522868, IQR: 0.0002540609
Model: GBM - Median: 0.0001207170, IQR: 0.0001618129
Model: XGBoost - Median: 0.0002159449, IQR: 0.0003538090

[1mMAD[0m
Model: CatBoost - Median: 0.0002651631, IQR: 0.0002795260
Model: LightGBM - Median: 0.0010362228, IQR: 0.0008965564
Model: Random Forest - Median: 0.0002522868, IQR: 0.0002540609
Model: GBM - Median: 0.0001207170, IQR: 0.0001618129
Model: XGBoost - Median: 0.0002159449, IQR: 0.0003538090

[1mR_SQ[0m
Model: CatBoost - Median: 0.9855469176, IQR: 0.0255210120
Model: LightGBM - Median: 0.7987176141, IQR: 0.2674339867


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)


Model: Random Forest - Median: 0.9872618455, IQR: 0.0248963382
Model: GBM - Median: 0.9965864272, IQR: 0.0097519197
Model: XGBoost - Median: 0.9898091199, IQR: 0.0536349543

[1mHorizon: tau_9[0m

[1mRMSE[0m
Model: CatBoost - Median: 0.0004254394, IQR: 0.0004145962
Model: LightGBM - Median: 0.0012522939, IQR: 0.0012391488
Model: Random Forest - Median: 0.0004130207, IQR: 0.0004455364
Model: GBM - Median: 0.0003580344, IQR: 0.0004507380
Model: XGBoost - Median: 0.0003470628, IQR: 0.0007319115

[1mMAE[0m
Model: CatBoost - Median: 0.0002987883, IQR: 0.0002944021
Model: LightGBM - Median: 0.0009683433, IQR: 0.0007785673


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)


Model: Random Forest - Median: 0.0002738885, IQR: 0.0002553538
Model: GBM - Median: 0.0001689566, IQR: 0.0002396514
Model: XGBoost - Median: 0.0002823804, IQR: 0.0003591419

[1mMAD[0m
Model: CatBoost - Median: 0.0002987883, IQR: 0.0002944021
Model: LightGBM - Median: 0.0009683433, IQR: 0.0007785673
Model: Random Forest - Median: 0.0002738885, IQR: 0.0002553538
Model: GBM - Median: 0.0001689566, IQR: 0.0002396514
Model: XGBoost - Median: 0.0002823804, IQR: 0.0003591419


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mR_SQ[0m
Model: CatBoost - Median: 0.9790125471, IQR: 0.0376052393
Model: LightGBM - Median: 0.7780081549, IQR: 0.2529356281
Model: Random Forest - Median: 0.9806361688, IQR: 0.0444611464
Model: GBM - Median: 0.9896228345, IQR: 0.0304635093
Model: XGBoost - Median: 0.9822993221, IQR: 0.0646291623
Combined summary CSV saved: graph_pdc_off/combined_metrics_summary.csv


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)


In [5]:
### pdc gt


import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set a color palette for the boxplots

custom_colors = {
    'CatBoost': 'blue',
    'LightGBM': 'green',
    'Random Forest': 'orange',
    'SVM': 'purple',
    'XGBoost': 'pink'
}

# List of folder paths and their labels for different horizons
horizons = ['tau_1', 'tau_3', 'tau_6', 'tau_9']

folders_template = [
    ('C:\\Users\\shafiullah.qureshi\\OneDrive - Government of Alberta\\Documents\\nowcast_gdp\\gdp_paper_updated\\OoS_metrics_Catboost_gt\\winsorized\\OoS_metrics_{}_winsorized.csv', 'CatBoost'),
    ('C:\\Users\\shafiullah.qureshi\\OneDrive - Government of Alberta\\Documents\\nowcast_gdp\\gdp_paper_updated\\OoS_metrics_LightGBM_gt\\winsorized\\OoS_metrics_{}_winsorized.csv', 'LightGBM'),
    ('C:\\Users\\shafiullah.qureshi\\OneDrive - Government of Alberta\\Documents\\nowcast_gdp\\gdp_paper_updated\\OoS_metrics_rf_gt\\winsorized\\OoS_metrics_{}_winsorized.csv', 'Random Forest'),
    ('C:\\Users\\shafiullah.qureshi\\OneDrive - Government of Alberta\\Documents\\nowcast_gdp\\gdp_paper_updated\\OoS_metrics_GBM_gt\\winsorized\\OoS_metrics_{}_winsorized.csv', 'GBM'),
    ('C:\\Users\\shafiullah.qureshi\\OneDrive - Government of Alberta\\Documents\\nowcast_gdp\\gdp_paper_updated\\OoS_metrics_xgboost_gt\\winsorized\\OoS_metrics_{}_winsorized.csv', 'XGBoost')
]

# Create a folder to save the graphs and combined CSV
output_folder = 'graph_pdc_gt'
os.makedirs(output_folder, exist_ok=True)

# List of metrics to plot
metrics = ['rmse', 'mae', 'mad', 'r_sq']

# Mapping for nice Y-axis labels
metric_labels = {
    'rmse': 'RMSE',
    'mae': 'MAE',
    'mad': 'MAD',
    'r_sq': 'R²'
}

# Initialize a DataFrame to store all combined metrics
combined_summary_df = pd.DataFrame()

# Iterate over each horizon and plot the metrics
for horizon in horizons:
    print(f"\n\033[1mHorizon: {horizon}\033[0m")
    for metric in metrics:
        plt.figure(figsize=(14, 8))  # Updated size for better visibility
        data_list = []
        model_labels = []
        metric_data_summary = []

        print(f"\n\033[1m{metric.upper()}\033[0m")
        for folder_template, model_name in folders_template:
            file_path = folder_template.format(horizon)
            
            # Load the CSV file
            try:
                data = pd.read_csv(file_path)
                data = data.dropna(subset=['ssample_end_date'])
                # Collect data for the current metric
                data_list.append(data[metric])
                model_labels.append(model_name)

                # Calculate and print raw median and IQR
                median = data[metric].median()
                iqr = data[metric].quantile(0.75) - data[metric].quantile(0.25)
                print(f"Model: {model_name} - Median: {median:.10f}, IQR: {iqr:.10f}")
                
                # Append to summary list
                metric_data_summary.append({
                    'Horizon': horizon,
                    'Metric': metric,
                    'Model': model_name,
                    'Median': median,
                    'IQR': iqr
                })

            except FileNotFoundError:
                print(f"File not found: {file_path}")
                continue

        # Plot all models' data for the current metric with custom colors
        box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
        for patch, model_name in zip(box['boxes'], model_labels):
            patch.set_facecolor(custom_colors.get(model_name, 'grey'))

        # Customize Y-axis for R-squared to show negative scale if applicable
        if metric == 'r_sq':
            plt.ylim(-1, 1)
            plt.axhline(0, color='red', linestyle='--', linewidth=0.5)

        # Enhance visibility of axis labels
        plt.xlabel('Model', fontsize=14)
        plt.ylabel(metric_labels.get(metric, metric), fontsize=14)
        plt.xticks(fontsize=12)
        plt.yticks(fontsize=12)
        
        # Save the figure without title
        file_name = f'{output_folder}/{metric}_{horizon}.png'
        plt.grid(True, linestyle='--', alpha=0.5)
        plt.savefig(file_name, bbox_inches='tight')
        plt.close()

        # Append the metric data to the combined summary DataFrame
        combined_summary_df = pd.concat([combined_summary_df, pd.DataFrame(metric_data_summary)], ignore_index=True)

# Save the combined metrics data as a single CSV
combined_csv_file_name = f'{output_folder}/combined_metrics_summary.csv'
combined_summary_df.to_csv(combined_csv_file_name, index=False)
print(f"Combined summary CSV saved: {combined_csv_file_name}")



[1mHorizon: tau_1[0m

[1mRMSE[0m
Model: CatBoost - Median: 0.0001204683, IQR: 0.0001722886
Model: LightGBM - Median: 0.0006116745, IQR: 0.0013085218
Model: Random Forest - Median: 0.0001427265, IQR: 0.0002817477
Model: GBM - Median: 0.0000711435, IQR: 0.0001534643
Model: XGBoost - Median: 0.0001142797, IQR: 0.0002461331

[1mMAE[0m
Model: CatBoost - Median: 0.0001204683, IQR: 0.0001722886
Model: LightGBM - Median: 0.0006116745, IQR: 0.0013085218
Model: Random Forest - Median: 0.0001427265, IQR: 0.0002817477
Model: GBM - Median: 0.0000711435, IQR: 0.0001534643
Model: XGBoost - Median: 0.0001142797, IQR: 0.0002461331


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mMAD[0m
Model: CatBoost - Median: 0.0001204683, IQR: 0.0001722886
Model: LightGBM - Median: 0.0006116745, IQR: 0.0013085218
Model: Random Forest - Median: 0.0001427265, IQR: 0.0002817477
Model: GBM - Median: 0.0000711435, IQR: 0.0001534643
Model: XGBoost - Median: 0.0001142797, IQR: 0.0002461331

[1mR_SQ[0m
Model: CatBoost - Median: 0.9972119702, IQR: 0.0266090539
Model: LightGBM - Median: 0.8610356156, IQR: 0.4624950823
Model: Random Forest - Median: 0.9927109566, IQR: 0.0391935258
Model: GBM - Median: 0.9986915794, IQR: 0.0087539676
Model: XGBoost - Median: 0.9945298550, IQR: 0.0210415639

[1mHorizon: tau_3[0m

[1mRMSE[0m


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)


Model: CatBoost - Median: 0.0002127644, IQR: 0.0003153377
Model: LightGBM - Median: 0.0010383809, IQR: 0.0012146999
Model: Random Forest - Median: 0.0002316564, IQR: 0.0004480742
Model: GBM - Median: 0.0001365800, IQR: 0.0003003676
Model: XGBoost - Median: 0.0002050849, IQR: 0.0003720324

[1mMAE[0m
Model: CatBoost - Median: 0.0001803933, IQR: 0.0002108286
Model: LightGBM - Median: 0.0008451376, IQR: 0.0008609899
Model: Random Forest - Median: 0.0001991974, IQR: 0.0003139322
Model: GBM - Median: 0.0001200811, IQR: 0.0002014705
Model: XGBoost - Median: 0.0001785677, IQR: 0.0003186061


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mMAD[0m
Model: CatBoost - Median: 0.0001803933, IQR: 0.0002108286
Model: LightGBM - Median: 0.0008451376, IQR: 0.0008609899
Model: Random Forest - Median: 0.0001991974, IQR: 0.0003139322
Model: GBM - Median: 0.0001200811, IQR: 0.0002014705
Model: XGBoost - Median: 0.0001785677, IQR: 0.0003186061

[1mR_SQ[0m
Model: CatBoost - Median: 0.9939238532, IQR: 0.0130363118
Model: LightGBM - Median: 0.8574186757, IQR: 0.2787814533
Model: Random Forest - Median: 0.9917237211, IQR: 0.0212610908
Model: GBM - Median: 0.9960026818, IQR: 0.0078673157
Model: XGBoost - Median: 0.9914421278, IQR: 0.0202252567


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mHorizon: tau_6[0m

[1mRMSE[0m
Model: CatBoost - Median: 0.0002243555, IQR: 0.0002459254
Model: LightGBM - Median: 0.0013433158, IQR: 0.0011044675
Model: Random Forest - Median: 0.0003155361, IQR: 0.0004228666
Model: GBM - Median: 0.0001934334, IQR: 0.0003638679
Model: XGBoost - Median: 0.0002442392, IQR: 0.0007934329

[1mMAE[0m
Model: CatBoost - Median: 0.0001868071, IQR: 0.0001513334
Model: LightGBM - Median: 0.0010955645, IQR: 0.0008238171
Model: Random Forest - Median: 0.0002661055, IQR: 0.0003029591
Model: GBM - Median: 0.0001413574, IQR: 0.0001851461
Model: XGBoost - Median: 0.0002061815, IQR: 0.0003660538


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mMAD[0m
Model: CatBoost - Median: 0.0001868071, IQR: 0.0001513334
Model: LightGBM - Median: 0.0010955645, IQR: 0.0008238171
Model: Random Forest - Median: 0.0002661055, IQR: 0.0003029591
Model: GBM - Median: 0.0001413574, IQR: 0.0001851461
Model: XGBoost - Median: 0.0002061815, IQR: 0.0003660538

[1mR_SQ[0m
Model: CatBoost - Median: 0.9936193714, IQR: 0.0143197460
Model: LightGBM - Median: 0.7920188228, IQR: 0.2949539340
Model: Random Forest - Median: 0.9826328893, IQR: 0.0326855781
Model: GBM - Median: 0.9948451580, IQR: 0.0205953085
Model: XGBoost - Median: 0.9904685267, IQR: 0.0539116290

[1mHorizon: tau_9[0m

[1mRMSE[0m


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)


Model: CatBoost - Median: 0.0002618857, IQR: 0.0003856190
Model: LightGBM - Median: 0.0013247313, IQR: 0.0011189887
Model: Random Forest - Median: 0.0004439445, IQR: 0.0005323537
Model: GBM - Median: 0.0002471105, IQR: 0.0005714959
Model: XGBoost - Median: 0.0003189286, IQR: 0.0007319115

[1mMAE[0m
Model: CatBoost - Median: 0.0001990752, IQR: 0.0002129588
Model: LightGBM - Median: 0.0010834825, IQR: 0.0007910646
Model: Random Forest - Median: 0.0002790083, IQR: 0.0002826542
Model: GBM - Median: 0.0001624854, IQR: 0.0002166421
Model: XGBoost - Median: 0.0002673838, IQR: 0.0003602236


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mMAD[0m
Model: CatBoost - Median: 0.0001990752, IQR: 0.0002129588
Model: LightGBM - Median: 0.0010834825, IQR: 0.0007910646
Model: Random Forest - Median: 0.0002790083, IQR: 0.0002826542
Model: GBM - Median: 0.0001624854, IQR: 0.0002166421
Model: XGBoost - Median: 0.0002673838, IQR: 0.0003602236

[1mR_SQ[0m
Model: CatBoost - Median: 0.9925522171, IQR: 0.0274296787
Model: LightGBM - Median: 0.7425022323, IQR: 0.2922721169
Model: Random Forest - Median: 0.9765899184, IQR: 0.0459104716
Model: GBM - Median: 0.9923569560, IQR: 0.0414701574
Model: XGBoost - Median: 0.9833254223, IQR: 0.0678704161


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)


Combined summary CSV saved: graph_pdc_gt/combined_metrics_summary.csv


In [8]:
## pdc get gt off


import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set a color palette for the boxplots

custom_colors = {
    'CatBoost': 'blue',
    'LightGBM': 'green',
    'Random Forest': 'orange',
    'SVM': 'purple',
    'XGBoost': 'pink'
}

# List of folder paths and their labels for different horizons
horizons = ['tau_1', 'tau_3', 'tau_6', 'tau_9']

# List of folder paths and their labels for different horizons
folders_template = [
    ('C:\\Users\\shafiullah.qureshi\\OneDrive - Government of Alberta\\Documents\\nowcast_gdp\\gdp_paper_updated\\OoS_metrics_Catboost_gt_off\\winsorized\\OoS_metrics_{}_winsorized.csv', 'CatBoost'),
    ('C:\\Users\\shafiullah.qureshi\\OneDrive - Government of Alberta\\Documents\\nowcast_gdp\\gdp_paper_updated\\OoS_metrics_LightGBM_gt_off\\winsorized\\OoS_metrics_{}_winsorized.csv', 'LightGBM'),
    ('C:\\Users\\shafiullah.qureshi\\OneDrive - Government of Alberta\\Documents\\nowcast_gdp\\gdp_paper_updated\\OoS_metrics_rf_gt_off\\winsorized\\OoS_metrics_{}_winsorized.csv', 'Random Forest'),
    ('C:\\Users\\shafiullah.qureshi\\OneDrive - Government of Alberta\\Documents\\nowcast_gdp\\gdp_paper_updated\\OoS_metrics_GBM_off_gt\\winsorized\\OoS_metrics_{}_winsorized.csv', 'GBM'),
    ('C:\\Users\\shafiullah.qureshi\\OneDrive - Government of Alberta\\Documents\\nowcast_gdp\\gdp_paper_updated\\OoS_metrics_xgboost_gt_off\\winsorized\\OoS_metrics_{}_winsorized.csv', 'XGBoost')
]

# Create a folder to save the graphs and combined CSV
output_folder = 'graph_pdc_gt_off'
os.makedirs(output_folder, exist_ok=True)

# List of metrics to plot
metrics = ['rmse', 'mae', 'mad', 'r_sq']

# Mapping for nice Y-axis labels
metric_labels = {
    'rmse': 'RMSE',
    'mae': 'MAE',
    'mad': 'MAD',
    'r_sq': 'R²'
}

# Initialize a DataFrame to store all combined metrics
combined_summary_df = pd.DataFrame()

# Iterate over each horizon and plot the metrics
for horizon in horizons:
    print(f"\n\033[1mHorizon: {horizon}\033[0m")
    for metric in metrics:
        plt.figure(figsize=(14, 8))  # Updated size for better visibility
        data_list = []
        model_labels = []
        metric_data_summary = []

        print(f"\n\033[1m{metric.upper()}\033[0m")
        for folder_template, model_name in folders_template:
            file_path = folder_template.format(horizon)
            
            # Load the CSV file
            try:
                data = pd.read_csv(file_path)
                data = data.dropna(subset=['ssample_end_date'])
                # Collect data for the current metric
                data_list.append(data[metric])
                model_labels.append(model_name)

                # Calculate and print raw median and IQR
                median = data[metric].median()
                iqr = data[metric].quantile(0.75) - data[metric].quantile(0.25)
                print(f"Model: {model_name} - Median: {median:.10f}, IQR: {iqr:.10f}")
                
                # Append to summary list
                metric_data_summary.append({
                    'Horizon': horizon,
                    'Metric': metric,
                    'Model': model_name,
                    'Median': median,
                    'IQR': iqr
                })

            except FileNotFoundError:
                print(f"File not found: {file_path}")
                continue

        # Plot all models' data for the current metric with custom colors
        box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
        for patch, model_name in zip(box['boxes'], model_labels):
            patch.set_facecolor(custom_colors.get(model_name, 'grey'))

        # Customize Y-axis for R-squared to show negative scale if applicable
        if metric == 'r_sq':
            plt.ylim(-1, 1)
            plt.axhline(0, color='red', linestyle='--', linewidth=0.5)

        # Enhance visibility of axis labels
        plt.xlabel('Model', fontsize=14)
        plt.ylabel(metric_labels.get(metric, metric), fontsize=14)
        plt.xticks(fontsize=12)
        plt.yticks(fontsize=12)
        
        # Save the figure without title
        file_name = f'{output_folder}/{metric}_{horizon}.png'
        plt.grid(True, linestyle='--', alpha=0.5)
        plt.savefig(file_name, bbox_inches='tight')
        plt.close()

        # Append the metric data to the combined summary DataFrame
        combined_summary_df = pd.concat([combined_summary_df, pd.DataFrame(metric_data_summary)], ignore_index=True)

# Save the combined metrics data as a single CSV
combined_csv_file_name = f'{output_folder}/combined_metrics_summary.csv'
combined_summary_df.to_csv(combined_csv_file_name, index=False)
print(f"Combined summary CSV saved: {combined_csv_file_name}")


[1mHorizon: tau_1[0m

[1mRMSE[0m
Model: CatBoost - Median: 0.0001330241, IQR: 0.0002645719
Model: LightGBM - Median: 0.0006633648, IQR: 0.0014068901
Model: Random Forest - Median: 0.0001615137, IQR: 0.0002557363
Model: GBM - Median: 0.0000752996, IQR: 0.0001498024
Model: XGBoost - Median: 0.0001116698, IQR: 0.0002529876

[1mMAE[0m
Model: CatBoost - Median: 0.0001330241, IQR: 0.0002645719
Model: LightGBM - Median: 0.0006633648, IQR: 0.0014068901
Model: Random Forest - Median: 0.0001615137, IQR: 0.0002557363
Model: GBM - Median: 0.0000752996, IQR: 0.0001498024
Model: XGBoost - Median: 0.0001116698, IQR: 0.0002529876


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mMAD[0m
Model: CatBoost - Median: 0.0001330241, IQR: 0.0002645719
Model: LightGBM - Median: 0.0006633648, IQR: 0.0014068901
Model: Random Forest - Median: 0.0001615137, IQR: 0.0002557363
Model: GBM - Median: 0.0000752996, IQR: 0.0001498024
Model: XGBoost - Median: 0.0001116698, IQR: 0.0002529876

[1mR_SQ[0m
Model: CatBoost - Median: 0.9922323609, IQR: 0.0338228197
Model: LightGBM - Median: 0.8387649017, IQR: 0.4295075322
Model: Random Forest - Median: 0.9924215808, IQR: 0.0278407936
Model: GBM - Median: 0.9979041446, IQR: 0.0161543457
Model: XGBoost - Median: 0.9943257565, IQR: 0.0249498009


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mHorizon: tau_3[0m

[1mRMSE[0m
Model: CatBoost - Median: 0.0002464207, IQR: 0.0003566170
Model: LightGBM - Median: 0.0011876968, IQR: 0.0015708448
Model: Random Forest - Median: 0.0002071082, IQR: 0.0003948615
Model: GBM - Median: 0.0001332484, IQR: 0.0002198974
Model: XGBoost - Median: 0.0002296427, IQR: 0.0005968399

[1mMAE[0m
Model: CatBoost - Median: 0.0002107423, IQR: 0.0002727228
Model: LightGBM - Median: 0.0010174358, IQR: 0.0014610627
Model: Random Forest - Median: 0.0001703297, IQR: 0.0002631401
Model: GBM - Median: 0.0001085748, IQR: 0.0001693528
Model: XGBoost - Median: 0.0001933940, IQR: 0.0005433208


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mMAD[0m
Model: CatBoost - Median: 0.0002107423, IQR: 0.0002727228
Model: LightGBM - Median: 0.0010174358, IQR: 0.0014610627
Model: Random Forest - Median: 0.0001703297, IQR: 0.0002631401
Model: GBM - Median: 0.0001085748, IQR: 0.0001693528
Model: XGBoost - Median: 0.0001933940, IQR: 0.0005433208

[1mR_SQ[0m
Model: CatBoost - Median: 0.9915222987, IQR: 0.0183094161
Model: LightGBM - Median: 0.8207166749, IQR: 0.3040865037
Model: Random Forest - Median: 0.9919602389, IQR: 0.0185119053
Model: GBM - Median: 0.9971125664, IQR: 0.0093236017
Model: XGBoost - Median: 0.9905506378, IQR: 0.0431553615

[1mHorizon: tau_6[0m

[1mRMSE[0m
Model: CatBoost - Median: 0.0003302499, IQR: 0.0003564509
Model: LightGBM - Median: 0.0011834668, IQR: 0.0011655770


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)


Model: Random Forest - Median: 0.0003139269, IQR: 0.0004389060
Model: GBM - Median: 0.0001652941, IQR: 0.0003674319
Model: XGBoost - Median: 0.0002556783, IQR: 0.0007835758

[1mMAE[0m
Model: CatBoost - Median: 0.0002585327, IQR: 0.0002572837
Model: LightGBM - Median: 0.0010264705, IQR: 0.0008949370
Model: Random Forest - Median: 0.0002522868, IQR: 0.0002299371
Model: GBM - Median: 0.0001226164, IQR: 0.0001576017
Model: XGBoost - Median: 0.0002159449, IQR: 0.0003538090

[1mMAD[0m


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)


Model: CatBoost - Median: 0.0002585327, IQR: 0.0002572837
Model: LightGBM - Median: 0.0010264705, IQR: 0.0008949370
Model: Random Forest - Median: 0.0002522868, IQR: 0.0002299371
Model: GBM - Median: 0.0001226164, IQR: 0.0001576017
Model: XGBoost - Median: 0.0002159449, IQR: 0.0003538090

[1mR_SQ[0m
Model: CatBoost - Median: 0.9847594920, IQR: 0.0236470639
Model: LightGBM - Median: 0.8036488070, IQR: 0.2590038209
Model: Random Forest - Median: 0.9869206955, IQR: 0.0254725875
Model: GBM - Median: 0.9965981121, IQR: 0.0163149240
Model: XGBoost - Median: 0.9898091199, IQR: 0.0536349543


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mHorizon: tau_9[0m

[1mRMSE[0m
Model: CatBoost - Median: 0.0004254394, IQR: 0.0004193544
Model: LightGBM - Median: 0.0012589013, IQR: 0.0012391488
Model: Random Forest - Median: 0.0004130207, IQR: 0.0004369404
Model: GBM - Median: 0.0003580344, IQR: 0.0004458225
Model: XGBoost - Median: 0.0003470628, IQR: 0.0007319115

[1mMAE[0m
Model: CatBoost - Median: 0.0002975885, IQR: 0.0003017501
Model: LightGBM - Median: 0.0009683433, IQR: 0.0007884604
Model: Random Forest - Median: 0.0002741140, IQR: 0.0002545458
Model: GBM - Median: 0.0001719580, IQR: 0.0002332915
Model: XGBoost - Median: 0.0002823804, IQR: 0.0003591419


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mMAD[0m
Model: CatBoost - Median: 0.0002975885, IQR: 0.0003017501
Model: LightGBM - Median: 0.0009683433, IQR: 0.0007884604
Model: Random Forest - Median: 0.0002741140, IQR: 0.0002545458
Model: GBM - Median: 0.0001719580, IQR: 0.0002332915
Model: XGBoost - Median: 0.0002823804, IQR: 0.0003591419

[1mR_SQ[0m
Model: CatBoost - Median: 0.9796429489, IQR: 0.0375533892
Model: LightGBM - Median: 0.7835192876, IQR: 0.2382786621
Model: Random Forest - Median: 0.9794630423, IQR: 0.0439579232
Model: GBM - Median: 0.9895149859, IQR: 0.0300460968
Model: XGBoost - Median: 0.9822993221, IQR: 0.0658664890
Combined summary CSV saved: graph_pdc_gt_off/combined_metrics_summary.csv


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)


In [49]:
## h2o off



import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set a color palette for the boxplots

custom_colors = {
    'CatBoost': 'blue',
    'LightGBM': 'green',
    'Random Forest': 'orange',
    'SVM': 'purple',
    'XGBoost': 'pink'
}

# List of folder paths and their labels for different horizons
horizons = ['tau_1', 'tau_3', 'tau_6', 'tau_9']

folders_template = [
    ('C:\\Users\\shafiullah.qureshi\\OneDrive - Government of Alberta\\Documents\\nowcast_gdp\\gdp_paper\\OoS_metrics_h2o_Catboost_off\\winsorized\\OoS_metrics_{}_winsorized.csv', 'CatBoost'),
    ('C:\\Users\\shafiullah.qureshi\\OneDrive - Government of Alberta\\Documents\\nowcast_gdp\\gdp_paper\\OoS_metrics_h2o_LightGBM_off\\winsorized\\OoS_metrics_{}_winsorized.csv', 'LightGBM'),
    ('C:\\Users\\shafiullah.qureshi\\OneDrive - Government of Alberta\\Documents\\nowcast_gdp\\gdp_paper\\OoS_metrics_h2o_RF_off\\winsorized\\OoS_metrics_{}_winsorized.csv', 'Random Forest'),
    ('C:\\Users\\shafiullah.qureshi\\OneDrive - Government of Alberta\\Documents\\nowcast_gdp\\gdp_paper\\OoS_metrics_h2o_xgboost_off\\winsorized\\OoS_metrics_{}_winsorized.csv', 'XGBoost'),
    ('C:\\Users\\shafiullah.qureshi\\OneDrive - Government of Alberta\\Documents\\nowcast_gdp\\gdp_paper\\OoS_metrics_SVM_h2o_off\\winsorized\\OoS_metrics_{}_winsorized.csv', 'SVM')
]

# Create a folder to save the graphs and combined CSV
output_folder = 'graph_h2o_off'
os.makedirs(output_folder, exist_ok=True)

# List of metrics to plot
metrics = ['rmse', 'mae', 'mad', 'r_sq']

# Mapping for nice Y-axis labels
metric_labels = {
    'rmse': 'RMSE',
    'mae': 'MAE',
    'mad': 'MAD',
    'r_sq': 'R²'
}

# Initialize a DataFrame to store all combined metrics
combined_summary_df = pd.DataFrame()

# Iterate over each horizon and plot the metrics
for horizon in horizons:
    print(f"\n\033[1mHorizon: {horizon}\033[0m")
    for metric in metrics:
        plt.figure(figsize=(14, 8))  # Updated size for better visibility
        data_list = []
        model_labels = []
        metric_data_summary = []

        print(f"\n\033[1m{metric.upper()}\033[0m")
        for folder_template, model_name in folders_template:
            file_path = folder_template.format(horizon)
            
            # Load the CSV file
            try:
                data = pd.read_csv(file_path)
                data = data.dropna(subset=['ssample_end_date'])
                # Collect data for the current metric
                data_list.append(data[metric])
                model_labels.append(model_name)

                # Calculate and print raw median and IQR
                median = data[metric].median()
                iqr = data[metric].quantile(0.75) - data[metric].quantile(0.25)
                print(f"Model: {model_name} - Median: {median:.10f}, IQR: {iqr:.10f}")
                
                # Append to summary list
                metric_data_summary.append({
                    'Horizon': horizon,
                    'Metric': metric,
                    'Model': model_name,
                    'Median': median,
                    'IQR': iqr
                })

            except FileNotFoundError:
                print(f"File not found: {file_path}")
                continue

        # Plot all models' data for the current metric with custom colors
        box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
        for patch, model_name in zip(box['boxes'], model_labels):
            patch.set_facecolor(custom_colors.get(model_name, 'grey'))

        # Customize Y-axis for R-squared to show negative scale if applicable
        if metric == 'r_sq':
            plt.ylim(-1, 1)
            plt.axhline(0, color='red', linestyle='--', linewidth=0.5)

        # Enhance visibility of axis labels
        plt.xlabel('Model', fontsize=14)
        plt.ylabel(metric_labels.get(metric, metric), fontsize=14)
        plt.xticks(fontsize=12)
        plt.yticks(fontsize=12)
        
        # Save the figure without title
        file_name = f'{output_folder}/{metric}_{horizon}.png'
        plt.grid(True, linestyle='--', alpha=0.5)
        plt.savefig(file_name, bbox_inches='tight')
        plt.close()

        # Append the metric data to the combined summary DataFrame
        combined_summary_df = pd.concat([combined_summary_df, pd.DataFrame(metric_data_summary)], ignore_index=True)

# Save the combined metrics data as a single CSV
combined_csv_file_name = f'{output_folder}/combined_metrics_summary.csv'
combined_summary_df.to_csv(combined_csv_file_name, index=False)
print(f"Combined summary CSV saved: {combined_csv_file_name}")


[1mHorizon: tau_1[0m

[1mRMSE[0m
Model: CatBoost - Median: 0.0002163963, IQR: 0.0006530687
Model: LightGBM - Median: 0.0017047845, IQR: 0.0025111478
Model: Random Forest - Median: 0.0001119224, IQR: 0.0002378336
Model: XGBoost - Median: 0.0017047845, IQR: 0.0025111479
Model: SVM - Median: 0.0017796115, IQR: 0.0024152585

[1mMAE[0m
Model: CatBoost - Median: 0.0002163963, IQR: 0.0006530687
Model: LightGBM - Median: 0.0017047845, IQR: 0.0025111478
Model: Random Forest - Median: 0.0001119224, IQR: 0.0002378336
Model: XGBoost - Median: 0.0017047845, IQR: 0.0025111479
Model: SVM - Median: 0.0017796115, IQR: 0.0024152585


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mMAD[0m
Model: CatBoost - Median: 0.0002163963, IQR: 0.0006530687
Model: LightGBM - Median: 0.0017047845, IQR: 0.0025111478
Model: Random Forest - Median: 0.0001119224, IQR: 0.0002378336
Model: XGBoost - Median: 0.0017047845, IQR: 0.0025111479
Model: SVM - Median: 0.0017796115, IQR: 0.0024152585

[1mR_SQ[0m
Model: CatBoost - Median: 0.9550925033, IQR: 0.2199308071
Model: LightGBM - Median: -0.0000000005, IQR: 0.0000000174
Model: Random Forest - Median: 0.9864417634, IQR: 0.0907996793
Model: XGBoost - Median: -0.0000000023, IQR: 0.0000000513
Model: SVM - Median: -0.0998770109, IQR: 1.0940517382


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mHorizon: tau_3[0m

[1mRMSE[0m
Model: CatBoost - Median: 0.0004805973, IQR: 0.0006801843
Model: LightGBM - Median: 0.0023854827, IQR: 0.0015009848
Model: Random Forest - Median: 0.0001887328, IQR: 0.0005473992
Model: XGBoost - Median: 0.0023854827, IQR: 0.0015009848
Model: SVM - Median: 0.0022257952, IQR: 0.0018389612

[1mMAE[0m
Model: CatBoost - Median: 0.0003873154, IQR: 0.0004801238
Model: LightGBM - Median: 0.0018244846, IQR: 0.0014738961
Model: Random Forest - Median: 0.0001531546, IQR: 0.0003384108
Model: XGBoost - Median: 0.0018244846, IQR: 0.0014738961
Model: SVM - Median: 0.0019755002, IQR: 0.0016475403


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mMAD[0m
Model: CatBoost - Median: 0.0003873154, IQR: 0.0004801238
Model: LightGBM - Median: 0.0018244846, IQR: 0.0014738961
Model: Random Forest - Median: 0.0001531546, IQR: 0.0003384108
Model: XGBoost - Median: 0.0018244846, IQR: 0.0014738961
Model: SVM - Median: 0.0019755002, IQR: 0.0016475403

[1mR_SQ[0m
Model: CatBoost - Median: 0.9352122001, IQR: 0.1236330185
Model: LightGBM - Median: -0.0000000001, IQR: 0.0000000038
Model: Random Forest - Median: 0.9836757551, IQR: 0.0573714000
Model: XGBoost - Median: -0.0000000003, IQR: 0.0000000119
Model: SVM - Median: -0.0402155711, IQR: 0.3485541965


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mHorizon: tau_6[0m

[1mRMSE[0m
Model: CatBoost - Median: 0.0004527641, IQR: 0.0005610386
Model: LightGBM - Median: 0.0022363838, IQR: 0.0009901457
Model: Random Forest - Median: 0.0003554693, IQR: 0.0005007576
Model: XGBoost - Median: 0.0022363838, IQR: 0.0009901456
Model: SVM - Median: 0.0022835047, IQR: 0.0011994649

[1mMAE[0m
Model: CatBoost - Median: 0.0003169480, IQR: 0.0003536689
Model: LightGBM - Median: 0.0017961029, IQR: 0.0010262858
Model: Random Forest - Median: 0.0002491761, IQR: 0.0002782262
Model: XGBoost - Median: 0.0017961029, IQR: 0.0010262858
Model: SVM - Median: 0.0018603430, IQR: 0.0011582328


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mMAD[0m
Model: CatBoost - Median: 0.0003169480, IQR: 0.0003536689
Model: LightGBM - Median: 0.0017961029, IQR: 0.0010262858
Model: Random Forest - Median: 0.0002491761, IQR: 0.0002782262
Model: XGBoost - Median: 0.0017961029, IQR: 0.0010262858
Model: SVM - Median: 0.0018603430, IQR: 0.0011582328

[1mR_SQ[0m
Model: CatBoost - Median: 0.9565789117, IQR: 0.1211928010
Model: LightGBM - Median: -0.0000000003, IQR: 0.0000000033
Model: Random Forest - Median: 0.9706890885, IQR: 0.0711394455
Model: XGBoost - Median: -0.0000000004, IQR: 0.0000000080
Model: SVM - Median: -0.0122632975, IQR: 0.2294317979


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mHorizon: tau_9[0m

[1mRMSE[0m
Model: CatBoost - Median: 0.0005711333, IQR: 0.0006793061
Model: LightGBM - Median: 0.0021479063, IQR: 0.0009608083
Model: Random Forest - Median: 0.0003000807, IQR: 0.0004501699
Model: XGBoost - Median: 0.0021479063, IQR: 0.0009608083
Model: SVM - Median: 0.0021582265, IQR: 0.0010754770

[1mMAE[0m
Model: CatBoost - Median: 0.0003984791, IQR: 0.0003476986
Model: LightGBM - Median: 0.0017947353, IQR: 0.0007964495
Model: Random Forest - Median: 0.0002343359, IQR: 0.0002265562
Model: XGBoost - Median: 0.0017947353, IQR: 0.0007964495
Model: SVM - Median: 0.0017929465, IQR: 0.0007819703


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mMAD[0m
Model: CatBoost - Median: 0.0003984791, IQR: 0.0003476986
Model: LightGBM - Median: 0.0017947353, IQR: 0.0007964495
Model: Random Forest - Median: 0.0002343359, IQR: 0.0002265562
Model: XGBoost - Median: 0.0017947353, IQR: 0.0007964495
Model: SVM - Median: 0.0017929465, IQR: 0.0007819703

[1mR_SQ[0m
Model: CatBoost - Median: 0.9278229622, IQR: 0.0946278670
Model: LightGBM - Median: -0.0000000004, IQR: 0.0000000032
Model: Random Forest - Median: 0.9704085916, IQR: 0.0594904452
Model: XGBoost - Median: -0.0000000002, IQR: 0.0000000069
Model: SVM - Median: -0.0250489948, IQR: 0.1609963833


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)


Combined summary CSV saved: graph_h2o_off/combined_metrics_summary.csv


In [50]:
## h2o gt

import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set a color palette for the boxplots
custom_colors = {
    'CatBoost': 'blue',
    'LightGBM': 'green',
    'Random Forest': 'orange',
    'SVM': 'purple',
    'XGBoost': 'pink'
}

folders_template = [
    ('C:\\Users\\shafiullah.qureshi\\OneDrive - Government of Alberta\\Documents\\nowcast_gdp\\gdp_paper\\OoS_metrics_h2o_Catboost_gt\\winsorized\\OoS_metrics_{}_winsorized.csv', 'CatBoost'),
    ('C:\\Users\\shafiullah.qureshi\\OneDrive - Government of Alberta\\Documents\\nowcast_gdp\\gdp_paper\\OoS_metrics_h2o_LightGBM_gt\\winsorized\\OoS_metrics_{}_winsorized.csv', 'LightGBM'),
    ('C:\\Users\\shafiullah.qureshi\\OneDrive - Government of Alberta\\Documents\\nowcast_gdp\\gdp_paper\\OoS_metrics_h2o_RF_gt\\winsorized\\OoS_metrics_{}_winsorized.csv', 'Random Forest'),
    ('C:\\Users\\shafiullah.qureshi\\OneDrive - Government of Alberta\\Documents\\nowcast_gdp\\gdp_paper\\OoS_metrics_h2o_xgboost_gt\\winsorized\\OoS_metrics_{}_winsorized.csv', 'XGBoost'),
    ('C:\\Users\\shafiullah.qureshi\\OneDrive - Government of Alberta\\Documents\\nowcast_gdp\\gdp_paper\\OoS_metrics_SVM_h2o_gt\\winsorized\\OoS_metrics_{}_winsorized.csv', 'SVM')
]

# List of folder paths and their labels for different horizons
horizons = ['tau_1', 'tau_3', 'tau_6', 'tau_9']

output_folder = 'graph_h2o_gt'
os.makedirs(output_folder, exist_ok=True)

# List of metrics to plot
metrics = ['rmse', 'mae', 'mad', 'r_sq']

# Mapping for nice Y-axis labels
metric_labels = {
    'rmse': 'RMSE',
    'mae': 'MAE',
    'mad': 'MAD',
    'r_sq': 'R²'
}

# Initialize a DataFrame to store all combined metrics
combined_summary_df = pd.DataFrame()

# Iterate over each horizon and plot the metrics
for horizon in horizons:
    print(f"\n\033[1mHorizon: {horizon}\033[0m")
    for metric in metrics:
        plt.figure(figsize=(14, 8))  # Updated size for better visibility
        data_list = []
        model_labels = []
        metric_data_summary = []

        print(f"\n\033[1m{metric.upper()}\033[0m")
        for folder_template, model_name in folders_template:
            file_path = folder_template.format(horizon)
            
            # Load the CSV file
            try:
                data = pd.read_csv(file_path)
                data = data.dropna(subset=['ssample_end_date'])
                # Collect data for the current metric
                data_list.append(data[metric])
                model_labels.append(model_name)

                # Calculate and print raw median and IQR
                median = data[metric].median()
                iqr = data[metric].quantile(0.75) - data[metric].quantile(0.25)
                print(f"Model: {model_name} - Median: {median:.10f}, IQR: {iqr:.10f}")
                
                # Append to summary list
                metric_data_summary.append({
                    'Horizon': horizon,
                    'Metric': metric,
                    'Model': model_name,
                    'Median': median,
                    'IQR': iqr
                })

            except FileNotFoundError:
                print(f"File not found: {file_path}")
                continue

        # Plot all models' data for the current metric with custom colors
        box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
        for patch, model_name in zip(box['boxes'], model_labels):
            patch.set_facecolor(custom_colors.get(model_name, 'grey'))

        # Customize Y-axis for R-squared to show negative scale if applicable
        if metric == 'r_sq':
            plt.ylim(-1, 1)
            plt.axhline(0, color='red', linestyle='--', linewidth=0.5)

        # Enhance visibility of axis labels
        plt.xlabel('Model', fontsize=14)
        plt.ylabel(metric_labels.get(metric, metric), fontsize=14)
        plt.xticks(fontsize=12)
        plt.yticks(fontsize=12)
        
        # Save the figure without title
        file_name = f'{output_folder}/{metric}_{horizon}.png'
        plt.grid(True, linestyle='--', alpha=0.5)
        plt.savefig(file_name, bbox_inches='tight')
        plt.close()

        # Append the metric data to the combined summary DataFrame
        combined_summary_df = pd.concat([combined_summary_df, pd.DataFrame(metric_data_summary)], ignore_index=True)

# Save the combined metrics data as a single CSV
combined_csv_file_name = f'{output_folder}/combined_metrics_summary.csv'
combined_summary_df.to_csv(combined_csv_file_name, index=False)
print(f"Combined summary CSV saved: {combined_csv_file_name}")


[1mHorizon: tau_1[0m

[1mRMSE[0m
Model: CatBoost - Median: 0.0122688671, IQR: 0.0145927643
Model: LightGBM - Median: 0.0135249305, IQR: 0.0266794615
Model: Random Forest - Median: 0.0048753382, IQR: 0.0060850849
Model: XGBoost - Median: 0.0101576352, IQR: 0.0152131020
Model: SVM - Median: 0.0038097226, IQR: 0.0057012649

[1mMAE[0m
Model: CatBoost - Median: 0.0122688671, IQR: 0.0145927643
Model: LightGBM - Median: 0.0135249305, IQR: 0.0266794615
Model: Random Forest - Median: 0.0048753382, IQR: 0.0060850849
Model: XGBoost - Median: 0.0101576352, IQR: 0.0152131020
Model: SVM - Median: 0.0038097226, IQR: 0.0057012649


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mMAD[0m
Model: CatBoost - Median: 0.0122688671, IQR: 0.0145927643
Model: LightGBM - Median: 0.0135249305, IQR: 0.0266794615
Model: Random Forest - Median: 0.0048753382, IQR: 0.0060850849
Model: XGBoost - Median: 0.0101576352, IQR: 0.0152131020
Model: SVM - Median: 0.0038097226, IQR: 0.0057012649

[1mR_SQ[0m
Model: CatBoost - Median: 0.9686662553, IQR: 0.1217938045
Model: LightGBM - Median: 0.9463695607, IQR: 0.2102384177
Model: Random Forest - Median: 0.9920781399, IQR: 0.0357289643
Model: XGBoost - Median: 0.9764248224, IQR: 0.2235009902
Model: SVM - Median: 0.9968524320, IQR: 0.0177518398


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mHorizon: tau_3[0m

[1mRMSE[0m
Model: CatBoost - Median: 0.0150585932, IQR: 0.0095831257
Model: LightGBM - Median: 0.0244726493, IQR: 0.0284131706
Model: Random Forest - Median: 0.0073650462, IQR: 0.0092439668
Model: XGBoost - Median: 0.0149177307, IQR: 0.0337069215
Model: SVM - Median: 0.0041595662, IQR: 0.0029259541

[1mMAE[0m
Model: CatBoost - Median: 0.0132521444, IQR: 0.0081268100
Model: LightGBM - Median: 0.0206267635, IQR: 0.0189848898
Model: Random Forest - Median: 0.0062864358, IQR: 0.0069371362
Model: XGBoost - Median: 0.0128052324, IQR: 0.0221857387
Model: SVM - Median: 0.0036505767, IQR: 0.0026188645


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mMAD[0m
Model: CatBoost - Median: 0.0132521444, IQR: 0.0081268100
Model: LightGBM - Median: 0.0206267635, IQR: 0.0189848898
Model: Random Forest - Median: 0.0062864358, IQR: 0.0069371362
Model: XGBoost - Median: 0.0128052324, IQR: 0.0221857387
Model: SVM - Median: 0.0036505767, IQR: 0.0026188645

[1mR_SQ[0m
Model: CatBoost - Median: 0.9602286989, IQR: 0.0589471867
Model: LightGBM - Median: 0.9169810540, IQR: 0.1970507330
Model: Random Forest - Median: 0.9893578380, IQR: 0.0301689406
Model: XGBoost - Median: 0.9472425685, IQR: 0.1659546486
Model: SVM - Median: 0.9973651439, IQR: 0.0056855218


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mHorizon: tau_6[0m

[1mRMSE[0m
Model: CatBoost - Median: 0.0212155948, IQR: 0.0124328693
Model: LightGBM - Median: 0.0312105923, IQR: 0.0231289445
Model: Random Forest - Median: 0.0103317328, IQR: 0.0107700633
Model: XGBoost - Median: 0.0250328977, IQR: 0.0382561411
Model: SVM - Median: 0.0050564652, IQR: 0.0034262737


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mMAE[0m
Model: CatBoost - Median: 0.0176546293, IQR: 0.0092588146
Model: LightGBM - Median: 0.0233351412, IQR: 0.0169897531
Model: Random Forest - Median: 0.0083868553, IQR: 0.0062820991
Model: XGBoost - Median: 0.0177355381, IQR: 0.0190511157
Model: SVM - Median: 0.0041484795, IQR: 0.0024189012

[1mMAD[0m
Model: CatBoost - Median: 0.0176546293, IQR: 0.0092588146
Model: LightGBM - Median: 0.0233351412, IQR: 0.0169897531
Model: Random Forest - Median: 0.0083868553, IQR: 0.0062820991
Model: XGBoost - Median: 0.0177355381, IQR: 0.0190511157
Model: SVM - Median: 0.0041484795, IQR: 0.0024189012

[1mR_SQ[0m
Model: CatBoost - Median: 0.9399380923, IQR: 0.0455725379
Model: LightGBM - Median: 0.8515688618, IQR: 0.1929166347
Model: Random Forest - Median: 0.9826300254, IQR: 0.0305104267
Model: XGBoost - Median: 0.9244719066, IQR: 0.1756423674
Model: SVM - Median: 0.9968150701, IQR: 0.0039003700


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mHorizon: tau_9[0m

[1mRMSE[0m
Model: CatBoost - Median: 0.0197797452, IQR: 0.0159178872
Model: LightGBM - Median: 0.0373225533, IQR: 0.0325345540
Model: Random Forest - Median: 0.0105483531, IQR: 0.0175478572
Model: XGBoost - Median: 0.0317045768, IQR: 0.0494648643
Model: SVM - Median: 0.0058665107, IQR: 0.0043822506


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mMAE[0m
Model: CatBoost - Median: 0.0165508546, IQR: 0.0107897478
Model: LightGBM - Median: 0.0261997072, IQR: 0.0193824354
Model: Random Forest - Median: 0.0083187166, IQR: 0.0092945226
Model: XGBoost - Median: 0.0215233437, IQR: 0.0273655388
Model: SVM - Median: 0.0046163047, IQR: 0.0029680366

[1mMAD[0m
Model: CatBoost - Median: 0.0165508546, IQR: 0.0107897478
Model: LightGBM - Median: 0.0261997072, IQR: 0.0193824354
Model: Random Forest - Median: 0.0083187166, IQR: 0.0092945226
Model: XGBoost - Median: 0.0215233437, IQR: 0.0273655388
Model: SVM - Median: 0.0046163047, IQR: 0.0029680366

[1mR_SQ[0m
Model: CatBoost - Median: 0.9545939870, IQR: 0.0774850209
Model: LightGBM - Median: 0.8455987751, IQR: 0.1839993864
Model: Random Forest - Median: 0.9821534416, IQR: 0.0303561013
Model: XGBoost - Median: 0.8704339925, IQR: 0.2638120735
Model: SVM - Median: 0.9965967972, IQR: 0.0041814899


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)


Combined summary CSV saved: graph_h2o_gt/combined_metrics_summary.csv


In [51]:
## h2o gt off


import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set a color palette for the boxplots
custom_colors = {
    'CatBoost': 'blue',
    'LightGBM': 'green',
    'Random Forest': 'orange',
    'SVM': 'purple',
    'XGBoost': 'pink'
}

folders_template = [
    ('C:\\Users\\shafiullah.qureshi\\OneDrive - Government of Alberta\\Documents\\nowcast_gdp\\gdp_paper\\OoS_metrics_h2o_Catboost_gt_off\\winsorized\\OoS_metrics_{}_winsorized.csv', 'CatBoost'),
    ('C:\\Users\\shafiullah.qureshi\\OneDrive - Government of Alberta\\Documents\\nowcast_gdp\\gdp_paper\\OoS_metrics_h2o_LightGBM_gt_off\\winsorized\\OoS_metrics_{}_winsorized.csv', 'LightGBM'),
    ('C:\\Users\\shafiullah.qureshi\\OneDrive - Government of Alberta\\Documents\\nowcast_gdp\\gdp_paper\\OoS_metrics_h2o_RF_gt_off\\winsorized\\OoS_metrics_{}_winsorized.csv', 'Random Forest'),
    ('C:\\Users\\shafiullah.qureshi\\OneDrive - Government of Alberta\\Documents\\nowcast_gdp\\gdp_paper\\OoS_metrics_h2o_xgboost_gt_off\\winsorized\\OoS_metrics_{}_winsorized.csv', 'XGBoost'),
    ('C:\\Users\\shafiullah.qureshi\\OneDrive - Government of Alberta\\Documents\\nowcast_gdp\\gdp_paper\\OoS_metrics_SVM_h2o_gt_off\\winsorized\\OoS_metrics_{}_winsorized.csv', 'SVM')
]

# List of folder paths and their labels for different horizons
horizons = ['tau_1', 'tau_3', 'tau_6', 'tau_9']

output_folder = 'graph_h2o_gt_off'
os.makedirs(output_folder, exist_ok=True)

# List of metrics to plot
metrics = ['rmse', 'mae', 'mad', 'r_sq']

# Mapping for nice Y-axis labels
metric_labels = {
    'rmse': 'RMSE',
    'mae': 'MAE',
    'mad': 'MAD',
    'r_sq': 'R²'
}

# Initialize a DataFrame to store all combined metrics
combined_summary_df = pd.DataFrame()

# Iterate over each horizon and plot the metrics
for horizon in horizons:
    print(f"\n\033[1mHorizon: {horizon}\033[0m")
    for metric in metrics:
        plt.figure(figsize=(14, 8))  # Updated size for better visibility
        data_list = []
        model_labels = []
        metric_data_summary = []

        print(f"\n\033[1m{metric.upper()}\033[0m")
        for folder_template, model_name in folders_template:
            file_path = folder_template.format(horizon)
            
            # Load the CSV file
            try:
                data = pd.read_csv(file_path)
                data = data.dropna(subset=['ssample_end_date'])
                # Collect data for the current metric
                data_list.append(data[metric])
                model_labels.append(model_name)

                # Calculate and print raw median and IQR
                median = data[metric].median()
                iqr = data[metric].quantile(0.75) - data[metric].quantile(0.25)
                print(f"Model: {model_name} - Median: {median:.10f}, IQR: {iqr:.10f}")
                
                # Append to summary list
                metric_data_summary.append({
                    'Horizon': horizon,
                    'Metric': metric,
                    'Model': model_name,
                    'Median': median,
                    'IQR': iqr
                })

            except FileNotFoundError:
                print(f"File not found: {file_path}")
                continue

        # Plot all models' data for the current metric with custom colors
        box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
        for patch, model_name in zip(box['boxes'], model_labels):
            patch.set_facecolor(custom_colors.get(model_name, 'grey'))

        # Customize Y-axis for R-squared to show negative scale if applicable
        if metric == 'r_sq':
            plt.ylim(-1, 1)
            plt.axhline(0, color='red', linestyle='--', linewidth=0.5)

        # Enhance visibility of axis labels
        plt.xlabel('Model', fontsize=14)
        plt.ylabel(metric_labels.get(metric, metric), fontsize=14)
        plt.xticks(fontsize=12)
        plt.yticks(fontsize=12)
        
        # Save the figure without title
        file_name = f'{output_folder}/{metric}_{horizon}.png'
        plt.grid(True, linestyle='--', alpha=0.5)
        plt.savefig(file_name, bbox_inches='tight')
        plt.close()

        # Append the metric data to the combined summary DataFrame
        combined_summary_df = pd.concat([combined_summary_df, pd.DataFrame(metric_data_summary)], ignore_index=True)

# Save the combined metrics data as a single CSV
combined_csv_file_name = f'{output_folder}/combined_metrics_summary.csv'
combined_summary_df.to_csv(combined_csv_file_name, index=False)
print(f"Combined summary CSV saved: {combined_csv_file_name}")



[1mHorizon: tau_1[0m

[1mRMSE[0m
Model: CatBoost - Median: 0.0088925497, IQR: 0.0113498252
Model: LightGBM - Median: 0.0060521865, IQR: 0.0225504168
Model: Random Forest - Median: 0.0031501516, IQR: 0.0055914786
Model: XGBoost - Median: 0.0144043167, IQR: 0.0434433995
Model: SVM - Median: 0.0033706017, IQR: 0.0042222050


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mMAE[0m
Model: CatBoost - Median: 0.0088925497, IQR: 0.0113498252
Model: LightGBM - Median: 0.0060521865, IQR: 0.0225504168
Model: Random Forest - Median: 0.0031501516, IQR: 0.0055914786
Model: XGBoost - Median: 0.0144043167, IQR: 0.0434433995
Model: SVM - Median: 0.0033706017, IQR: 0.0042222050

[1mMAD[0m
Model: CatBoost - Median: 0.0088925497, IQR: 0.0113498252
Model: LightGBM - Median: 0.0060521865, IQR: 0.0225504168
Model: Random Forest - Median: 0.0031501516, IQR: 0.0055914786
Model: XGBoost - Median: 0.0144043167, IQR: 0.0434433995
Model: SVM - Median: 0.0033706017, IQR: 0.0042222050


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mR_SQ[0m
Model: CatBoost - Median: 0.9611859646, IQR: 0.1200672870
Model: LightGBM - Median: 0.9675393982, IQR: 0.1780547964
Model: Random Forest - Median: 0.9938414508, IQR: 0.0509772632
Model: XGBoost - Median: 0.8764364740, IQR: 0.3425277906
Model: SVM - Median: 0.9955108210, IQR: 0.0087297856


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mHorizon: tau_3[0m

[1mRMSE[0m
Model: CatBoost - Median: 0.0122588129, IQR: 0.0110253560
Model: LightGBM - Median: 0.0160137081, IQR: 0.0197887390
Model: Random Forest - Median: 0.0058642542, IQR: 0.0070846438
Model: XGBoost - Median: 0.0311160851, IQR: 0.0373863491
Model: SVM - Median: 0.0040285790, IQR: 0.0027916600


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mMAE[0m
Model: CatBoost - Median: 0.0106864513, IQR: 0.0096203101
Model: LightGBM - Median: 0.0126031483, IQR: 0.0153227219
Model: Random Forest - Median: 0.0051978502, IQR: 0.0065162192
Model: XGBoost - Median: 0.0228463437, IQR: 0.0316546662
Model: SVM - Median: 0.0036674540, IQR: 0.0023779908

[1mMAD[0m
Model: CatBoost - Median: 0.0106864513, IQR: 0.0096203101
Model: LightGBM - Median: 0.0126031483, IQR: 0.0153227219


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)


Model: Random Forest - Median: 0.0051978502, IQR: 0.0065162192
Model: XGBoost - Median: 0.0228463437, IQR: 0.0316546662
Model: SVM - Median: 0.0036674540, IQR: 0.0023779908

[1mR_SQ[0m
Model: CatBoost - Median: 0.9545465657, IQR: 0.0712364915
Model: LightGBM - Median: 0.9180141839, IQR: 0.1670884197


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)


Model: Random Forest - Median: 0.9902717504, IQR: 0.0425845707
Model: XGBoost - Median: 0.7422597210, IQR: 0.3145597780
Model: SVM - Median: 0.9946831467, IQR: 0.0073532113

[1mHorizon: tau_6[0m

[1mRMSE[0m
Model: CatBoost - Median: 0.0157782945, IQR: 0.0217249108


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)


Model: LightGBM - Median: 0.0202675201, IQR: 0.0517946731
Model: Random Forest - Median: 0.0058685419, IQR: 0.0127334475
Model: XGBoost - Median: 0.0395430785, IQR: 0.0583704097
Model: SVM - Median: 0.0053836459, IQR: 0.0050920318


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mMAE[0m
Model: CatBoost - Median: 0.0127531578, IQR: 0.0148498816
Model: LightGBM - Median: 0.0144715257, IQR: 0.0252776512
Model: Random Forest - Median: 0.0050573462, IQR: 0.0060160338
Model: XGBoost - Median: 0.0292567887, IQR: 0.0381900182
Model: SVM - Median: 0.0045695699, IQR: 0.0040338104

[1mMAD[0m
Model: CatBoost - Median: 0.0127531578, IQR: 0.0148498816
Model: LightGBM - Median: 0.0144715257, IQR: 0.0252776512
Model: Random Forest - Median: 0.0050573462, IQR: 0.0060160338


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)


Model: XGBoost - Median: 0.0292567887, IQR: 0.0381900182
Model: SVM - Median: 0.0045695699, IQR: 0.0040338104

[1mR_SQ[0m
Model: CatBoost - Median: 0.9424807094, IQR: 0.1273535369
Model: LightGBM - Median: 0.8897304376, IQR: 0.2479954534
Model: Random Forest - Median: 0.9901679668, IQR: 0.0489146458
Model: XGBoost - Median: 0.6541421526, IQR: 0.3521069510
Model: SVM - Median: 0.9948505918, IQR: 0.0072909984


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mHorizon: tau_9[0m

[1mRMSE[0m
Model: CatBoost - Median: 0.0130452181, IQR: 0.0168897641
Model: LightGBM - Median: 0.0238778986, IQR: 0.0470038673
Model: Random Forest - Median: 0.0072679865, IQR: 0.0322968604
Model: XGBoost - Median: 0.0448481004, IQR: 0.0550542641
Model: SVM - Median: 0.0045959877, IQR: 0.0043383152


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)



[1mMAE[0m
Model: CatBoost - Median: 0.0098088268, IQR: 0.0121500626
Model: LightGBM - Median: 0.0167602984, IQR: 0.0277331385
Model: Random Forest - Median: 0.0058122672, IQR: 0.0143564929
Model: XGBoost - Median: 0.0338730515, IQR: 0.0339033780
Model: SVM - Median: 0.0038875207, IQR: 0.0029231033

[1mMAD[0m
Model: CatBoost - Median: 0.0098088268, IQR: 0.0121500626
Model: LightGBM - Median: 0.0167602984, IQR: 0.0277331385
Model: Random Forest - Median: 0.0058122672, IQR: 0.0143564929
Model: XGBoost - Median: 0.0338730515, IQR: 0.0339033780
Model: SVM - Median: 0.0038875207, IQR: 0.0029231033

[1mR_SQ[0m
Model: CatBoost - Median: 0.9640526555, IQR: 0.0676756497
Model: LightGBM - Median: 0.8606386259, IQR: 0.2481256191
Model: Random Forest - Median: 0.9864670220, IQR: 0.1187697062
Model: XGBoost - Median: 0.4953649883, IQR: 0.3644357248


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)


Model: SVM - Median: 0.9948268075, IQR: 0.0058272198
Combined summary CSV saved: graph_h2o_gt_off/combined_metrics_summary.csv


  box = plt.boxplot(data_list, vert=True, patch_artist=True, labels=model_labels)
