In [1]:
import os
import glob
import pandas as pd

from src.visualization.charts import GlobalModelCharts, GlobalModelVariantsComparisonCharts
from src.config import Config

1. For comparison

In [2]:
def fill_group(group):
    basic_val = group.loc[group["Model"].str.lower() == "basic", "record_count"]
    if not basic_val.empty and pd.notnull(basic_val.iloc[0]):
        group["record_count"] = group["record_count"].fillna(basic_val.iloc[0])
    return group

def fill_record_count_by_country(df):
    df["record_count"] = pd.to_numeric(df["record_count"], errors='coerce')
    return df.groupby("country", group_keys=False).apply(fill_group)

def process_and_save(dfs, output_filename):
    df_combined = pd.concat(dfs, ignore_index=True)
    df_combined = fill_record_count_by_country(df_combined)
    
    sort_order = {
        "basic": 1,
        "arimax": 2,
        "arimax_pca": 3,
        "lightgbm": 4,
        "lstm": 5,
        "arimax_lstm": 6,
        "arimax_pca_lstm": 7,
        "lightgbm_lstm": 8,
        "lightgbm_pca_lstm": 9
    }
    df_combined["sort_order"] = df_combined["Model"].map(sort_order)
    df_combined["sort_order"] = df_combined["sort_order"].fillna(99)
    
    df_combined = df_combined.sort_values(by=["country", "sort_order"])
    df_combined = df_combined.drop(columns=["sort_order"])
    
    df_combined = df_combined[["country", "record_count", "MAE", "MAPE", "RMSE", "R2", "Model"]]
    df_combined.to_csv(output_filename, index=False)

    return df_combined

base_dir = "../../output/metrics"
dfs_co2 = []
dfs_co2_pc = []

for model_folder in os.listdir(base_dir):
    folder_path = os.path.join(base_dir, model_folder)
    if not os.path.isdir(folder_path):
        continue

    csv_files = glob.glob(os.path.join(folder_path, "*.csv"))
    
    for csv_file in csv_files:
        filename = os.path.basename(csv_file).lower()
        
        if "test" in filename and "per_capita" not in filename:
            metric_type = "co2"
        elif "test" in filename and "co2_per_capita" in filename:
            metric_type = "co2_per_capita"
        else:
            continue 

        model_name = ""
        folder_lower = model_folder.lower()
        if folder_lower == "arimax":
            model_name = "arimax" if "pca" not in filename else "arimax_pca"
        elif folder_lower == "hybrid":
            if "arimax" in filename:
                model_name = "arimax_lstm" if "pca" not in filename else "arimax_pca_lstm"
            elif "lightgbm" in filename:
                model_name = "lightgbm_lstm" if "pca" not in filename else "lightgbm_pca_lstm"
        elif folder_lower == "lightgbm":
            model_name = "lightgbm"
        elif folder_lower == "lstm":
            model_name = "lstm"
        elif folder_lower == "basic":
            model_name = "basic"
        else:
            model_name = folder_lower

        df = pd.read_csv(csv_file)
        df["Model"] = model_name
        
        if metric_type == "co2":
            dfs_co2.append(df)
        else:
            dfs_co2_pc.append(df)

basic_present_in_co2_pc = any(df["Model"].str.lower().eq("basic").any() for df in dfs_co2_pc)
if not basic_present_in_co2_pc:
    basic_dfs = [df for df in dfs_co2 if df["Model"].str.lower().eq("basic").any()]
    if basic_dfs:
        df_basic = pd.concat(basic_dfs, ignore_index=True)
        df_basic["variant"] = "co2_per_capita"
        dfs_co2_pc.append(df_basic)

df_co2 = process_and_save(dfs_co2, "../../output/metrics/combined_co2_test_metrics.csv")
df_co2_pc = process_and_save(dfs_co2_pc, "../../output/metrics/combined_co2_per_capita_test_metrics.csv")

df_co2['MAPE'] = df_co2['MAPE'].str.rstrip('%').astype(float) / 100.0
df_co2_pc['MAPE'] = df_co2_pc['MAPE'].str.rstrip('%').astype(float) / 100.0

metrics = ['MAE', 'RMSE', 'R2']
for col in metrics:
    df_co2[col] = df_co2[col].astype(str).str.replace(',', '').astype(float)
    df_co2_pc[col] = df_co2_pc[col].astype(str).str.replace(',', '').astype(float)

df_co2['variant'] = 'co2'
df_co2_pc['variant'] = 'co2_per_capita'

df_merged = pd.concat([df_co2, df_co2_pc], ignore_index=True)

df_countries = df_merged[df_merged['country'] != 'Overall']
df_overall = df_merged[df_merged['country'] == 'Overall']


  return df.groupby("country", group_keys=False).apply(fill_group)
  return df.groupby("country", group_keys=False).apply(fill_group)


2. co2

In [3]:
config = Config()

variant = 'co2'
combined_csv_path = os.path.join(config.predictions, f'combined_results_{variant}.csv')
combined_df = pd.read_csv(combined_csv_path)

output_dir = os.path.join(config.predictions, variant)
charts = GlobalModelCharts(combined_df, df_countries, output_dir, variant)
charts.generate_country_individual_charts()
charts.generate_country_combined_chart()
charts.generate_country_barplot_charts()
charts.generate_best_model_barplots()
charts.generate_best_model_barplots(exclude_basic=True)
charts.generate_heatmap_mape()
charts.generate_overall_barchart_charts()

3. co2_per_capita

In [4]:
variant = 'co2_per_capita'
combined_csv_path = os.path.join(config.predictions, f'combined_results_{variant}.csv')
combined_df = pd.read_csv(combined_csv_path)

output_dir = os.path.join(config.predictions, variant)
charts = GlobalModelCharts(combined_df, df_countries, output_dir, variant)
charts.generate_country_individual_charts()
charts.generate_country_combined_chart()
charts.generate_country_barplot_charts()
charts.generate_best_model_barplots()
charts.generate_best_model_barplots(exclude_basic=True)
charts.generate_heatmap_mape()
charts.generate_overall_barchart_charts()

4. co2 vs co2_per_capita

In [5]:
variants_charts = GlobalModelVariantsComparisonCharts(df_countries, config.predictions)
variants_charts.generate_boxplot_metric()