In [0]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np



In [0]:
# Define the horizons (1, 2, 3 months)
horizons = [1, 2, 3]

# RMSE values for different models (populated dataset)
rmse_data_populated = {
    'Naive Forecasting': [627, 840, 985],
    'XGBoost': [1325, 1481, 1646],
    'Linear Regression': [1120.53, 1375.09, 1558.43],
    'Linear Regression Per Customer': [1681, 1863, 1936]
}

# MAE values for different models (populated dataset)
mae_data_populated = {
    'Naive Forecasting': [79, 116, 149],
    'XGBoost': [178, 242, 307],
    'Linear Regression': [248, 333, 408],
    'Linear Regression Per Customer': [252, 288, 312]
}

# MAPE values for different models (populated dataset)
mape_data_populated = {
    'Naive Forecasting': [39, 51, 58],
    'XGBoost': [157, 217, 294],
    'Linear Regression': [496, 681, 847],
    'Linear Regression Per Customer': [56, 62, 71]
}

# R² values for different models (populated dataset)
r2_data_populated = {
    'Naive Forecasting': [0.92, 0.85, 0.80],
    'XGBoost': [0.76, 0.70, 0.65],
    'Linear Regression': [0.79, 0.69, 0.61],
    'Linear Regression Per Customer': [0.65, 0.61, 0.58]
}

# RMSE values for different models (filtered dataset)
rmse_data_filtered = {
    'Naive Forecasting': [850, 1150, 1349],
    'XGBoost': [2474, 2724, 3015],
    'Linear Regression': [1603, 1973, 2222],
    'Linear Regression Per Customer': [1643, 1818, 1880]
}

# MAE values for different models (filtered dataset)
mae_data_filtered = {
    'Naive Forecasting': [112, 163, 205],
    'XGBoost': [261, 356, 443],
    'Linear Regression': [321, 456, 570],
    'Linear Regression Per Customer': [241, 270, 287]
}

# MAPE values for different models (filtered dataset)
mape_data_filtered = {
    'Naive Forecasting': [37, 48, 56],
    'XGBoost': [212, 312, 411],
    'Linear Regression': [489, 801, 1060],
    'Linear Regression Per Customer': [49, 55, 64]
}

# R² values for different models (filtered dataset)
r2_data_filtered = {
    'Naive Forecasting': [0.93, 0.88, 0.84],
    'XGBoost': [0.74, 0.68, 0.61],
    'Linear Regression': [0.82, 0.73, 0.66],
    'Linear Regression Per Customer': [0.57, 0.53, 0.51]
}

# Create subplots (2 rows, 4 columns)
fig, axs = plt.subplots(2, 4, figsize=(20, 10), sharex=True)

# Plot RMSE (Populated Dataset)
for model in rmse_data_populated.keys():
    axs[0, 0].plot(horizons, rmse_data_populated[model], marker='o', label=model)
axs[0, 0].set_title('RMSE - Populated Dataset')
axs[0, 0].set_ylabel('RMSE')
axs[0, 0].grid(True)
axs[0, 0].set_ylim(0, 3100)  # Set consistent limits for RMSE

# Plot MAE (Populated Dataset)
for model in mae_data_populated.keys():
    axs[0, 1].plot(horizons, mae_data_populated[model], marker='o', label=model)
axs[0, 1].set_title('MAE - Populated Dataset')
axs[0, 1].set_ylabel('MAE')
axs[0, 1].grid(True)
axs[0, 1].set_ylim(0, 600)  # Set consistent limits for MAE

# Plot MAPE (Populated Dataset)
for model in mape_data_populated.keys():
    axs[0, 2].plot(horizons, mape_data_populated[model], marker='o', label=model)
axs[0, 2].set_title('MAPE - Populated Dataset')
axs[0, 2].set_ylabel('MAPE')
axs[0, 2].grid(True)
axs[0, 2].set_ylim(0, 1100)  # Set consistent limits for MAPE

# Plot R² (Populated Dataset)
for model in r2_data_populated.keys():
    axs[0, 3].plot(horizons, r2_data_populated[model], marker='o', label=model)
axs[0, 3].set_title('R² - Populated Dataset')
axs[0, 3].set_ylabel('R²')
axs[0, 3].grid(True)
axs[0, 3].set_ylim(0, 1.0)  # Set consistent limits for R²

# Plot RMSE (Filtered Dataset)
for model in rmse_data_filtered.keys():
    axs[1, 0].plot(horizons, rmse_data_filtered[model], marker='o', label=model)
axs[1, 0].set_title('RMSE - Filtered Dataset')
axs[1, 0].set_ylabel('RMSE')
axs[1, 0].grid(True)
axs[1, 0].set_ylim(0, 3100)  # Set consistent limits for RMSE

# Plot MAE (Filtered Dataset)
for model in mae_data_filtered.keys():
    axs[1, 1].plot(horizons, mae_data_filtered[model], marker='o', label=model)
axs[1, 1].set_title('MAE - Filtered Dataset')
axs[1, 1].set_ylabel('MAE')
axs[1, 1].grid(True)
axs[1, 1].set_ylim(0, 600)  # Set consistent limits for MAE

# Plot MAPE (Filtered Dataset)
for model in mape_data_filtered.keys():
    axs[1, 2].plot(horizons, mape_data_filtered[model], marker='o', label=model)
axs[1, 2].set_title('MAPE - Filtered Dataset')
axs[1, 2].set_ylabel('MAPE')
axs[1, 2].grid(True)
axs[1, 2].set_ylim(0, 1100)  # Set consistent limits for MAPE

# Plot R² (Filtered Dataset)
for model in r2_data_filtered.keys():
    axs[1, 3].plot(horizons, r2_data_filtered[model], marker='o', label=model)
axs[1, 3].set_title('R² - Filtered Dataset')
axs[1, 3].set_ylabel('R²')
axs[1, 3].grid(True)
axs[1, 3].set_ylim(0, 1.0)  # Set consistent limits for R²

# Set x-labels
for ax in axs[1, :]:
    ax.set_xlabel('Forecast Horizon (months)')
    ax.set_xticks(horizons)

# Add a single legend in the top left corner of the figure
handles, labels = axs[0, 0].get_legend_handles_labels()
fig.legend(handles, labels, loc='upper right', bbox_to_anchor=(1, 0.95), title='Models')

# Adjust layout
plt.tight_layout(rect=[0, 0.03, 0.85, 0.95])  # Leave space for the legend
plt.suptitle('Model Comparison Across Different Metrics and Datasets', fontsize=16)

# Show the plot
plt.show()


In [0]:
# Define the horizons (1, 2, 3 months)
horizons = [1, 2, 3]
bar_width = 0.2  # Width of the bars

# RMSE values for different models (populated months dataset)
rmse_data_populated = {
    'Naive Forecasting': [627, 840, 985],
    'XGBoost': [1325, 1481, 1646],
    'Linear Regression': [1120.53, 1375.09, 1558.43],
    'Linear Regression Per Customer': [1681, 1863, 1936]
}

# MAE values for different models (populated months dataset)
mae_data_populated = {
    'Naive Forecasting': [79, 116, 149],
    'XGBoost': [178, 242, 307],
    'Linear Regression': [248, 333, 408],
    'Linear Regression Per Customer': [252, 288, 312]
}

# MAPE values for different models (populated months dataset)
mape_data_populated = {
    'Naive Forecasting': [39, 51, 58],
    'XGBoost': [157, 217, 294],
    'Linear Regression': [496, 681, 847],
    'Linear Regression Per Customer': [56, 62, 71]
}

# R² values for different models (populated months dataset)
r2_data_populated = {
    'Naive Forecasting': [0.92, 0.85, 0.80],
    'XGBoost': [0.76, 0.70, 0.65],
    'Linear Regression': [0.79, 0.69, 0.61],
    'Linear Regression Per Customer': [0.65, 0.61, 0.58]
}

# RMSE values for different models (filtered dataset)
rmse_data_filtered = {
    'Naive Forecasting': [850, 1150, 1349],
    'XGBoost': [2474, 2724, 3015],
    'Linear Regression': [1603, 1973, 2222],
    'Linear Regression Per Customer': [1643, 1818, 1880]
}

# MAE values for different models (filtered dataset)
mae_data_filtered = {
    'Naive Forecasting': [112, 163, 205],
    'XGBoost': [261, 356, 443],
    'Linear Regression': [321, 456, 570],
    'Linear Regression Per Customer': [241, 270, 287]
}

# MAPE values for different models (filtered dataset)
mape_data_filtered = {
    'Naive Forecasting': [37, 48, 56],
    'XGBoost': [212, 312, 411],
    'Linear Regression': [489, 801, 1060],
    'Linear Regression Per Customer': [49, 55, 64]
}

# R² values for different models (filtered dataset)
r2_data_filtered = {
    'Naive Forecasting': [0.93, 0.88, 0.84],
    'XGBoost': [0.74, 0.68, 0.61],
    'Linear Regression': [0.82, 0.73, 0.66],
    'Linear Regression Per Customer': [0.57, 0.53, 0.51]
}

# Create subplots (2 rows, 4 columns)
fig, axs = plt.subplots(2, 4, figsize=(20, 10), sharex=True)

# Function to create bar plots
def create_bar_plot(ax, data, title, ylabel):
    bar_positions = np.arange(len(horizons))
    for i, (model, values) in enumerate(data.items()):
        ax.bar(bar_positions + i * bar_width, values, width=bar_width, label=model)
    ax.set_title(title)
    ax.set_ylabel(ylabel)
    ax.set_xticks(bar_positions + bar_width * (len(data) - 1) / 2)
    ax.set_xticklabels(horizons)
    ax.grid(axis='y')

# Plot RMSE (Populated Months Dataset)
create_bar_plot(axs[0, 0], rmse_data_populated, 'RMSE - Populated Months Dataset', 'RMSE')

# Plot MAE (Populated Months Dataset)
create_bar_plot(axs[0, 1], mae_data_populated, 'MAE - Populated Months Dataset', 'MAE')

# Plot MAPE (Populated Months Dataset)
create_bar_plot(axs[0, 2], mape_data_populated, 'MAPE - Populated Months Dataset', 'MAPE')

# Plot R² (Populated Months Dataset)
create_bar_plot(axs[0, 3], r2_data_populated, 'R² - Populated Months Dataset', 'R²')

# Plot RMSE (Filtered Dataset)
create_bar_plot(axs[1, 0], rmse_data_filtered, 'RMSE - Filtered Dataset', 'RMSE')

# Plot MAE (Filtered Dataset)
create_bar_plot(axs[1, 1], mae_data_filtered, 'MAE - Filtered Dataset', 'MAE')

# Plot MAPE (Filtered Dataset)
create_bar_plot(axs[1, 2], mape_data_filtered, 'MAPE - Filtered Dataset', 'MAPE')

# Plot R² (Filtered Dataset)
create_bar_plot(axs[1, 3], r2_data_filtered, 'R² - Filtered Dataset', 'R²')

# Add a single legend in the top left corner of the figure
handles, labels = axs[0, 0].get_legend_handles_labels()
fig.legend(handles, labels, loc='upper right', bbox_to_anchor=(1, 0.95), title='Models')

# Adjust layout
plt.tight_layout(rect=[0, 0.03, 0.85, 0.95])  # Leave space for the legend
plt.suptitle('Model Comparison Across Different Metrics and Datasets', fontsize=16)

# Show the plot
plt.show()
