In [None]:
import os
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error
import math
import pandas as pd  # ensure pd is imported

# ------------------------------------------------------------
# Paths (relative to repo root)
# ------------------------------------------------------------
DATA_DIR = "data"
PLOT_DIR = "plots"

SCORING_PATH = os.path.join(DATA_DIR, "model_scoring_summary.csv")
PRED_PATH = os.path.join(DATA_DIR, "final_predictions_detailed.csv")
os.makedirs(PLOT_DIR, exist_ok=True)

# ------------------------------------------------------------
# 1) Create scoring summary
# ------------------------------------------------------------
metrics_list = []

for (make, body), group_df in final_predictions.groupby(['Make', 'Body_Type']):
    val_test = group_df[group_df['Split'].isin(['Validation', 'Test'])].copy()
    if len(val_test) == 0:
        continue

    y_true = val_test['Units_Sold']
    y_pred = val_test['Predicted_Units_Sold']

    mae = mean_absolute_error(y_true, y_pred)
    rmse = math.sqrt(mean_squared_error(y_true, y_pred))
    n = len(val_test)

    metrics_list.append({
        'Make': make,
        'Body_Type': body,
        'MAE': mae,
        'RMSE': rmse,
        'N_val_test_rows': n
    })

metrics_df = pd.DataFrame(metrics_list).sort_values('MAE').reset_index(drop=True)
metrics_df.to_csv(SCORING_PATH, index=False)
print(f"Saved scoring summary to: {SCORING_PATH}")
print(metrics_df.head(10), "\n")

# ------------------------------------------------------------
# 2) Plotting function
# ------------------------------------------------------------
def plot_group_predictions(group_df, make, body, save_dir=None, show=True):
    dfp = group_df.sort_values('Year_Month').copy()
    fig, ax = plt.subplots(figsize=(10, 4))

    ax.plot(dfp['Year_Month'], dfp['Units_Sold'], label='Actual', linewidth=1.5)
    ax.plot(dfp['Year_Month'], dfp['Predicted_Units_Sold'], linestyle='--', label='Predicted')

    for split_label, colour, alpha in [
        ('Train', '0.92', 0.4),
        ('Validation', '0.85', 0.25),
        ('Test', '0.78', 0.2)
    ]:
        subset = dfp[dfp['Split'] == split_label]
        if len(subset) > 0:
            ax.fill_between(subset['Year_Month'],
                            subset['Units_Sold'].min() - 1,
                            subset['Units_Sold'].max() + 1,
                            facecolor=colour, alpha=alpha,
                            label=split_label if split_label == 'Validation' else "")

    ax.set_title(f"Actual vs Predicted â€” {make} / {body}")
    ax.set_xlabel("Date")
    ax.set_ylabel("Units Sold")
    ax.legend(loc='upper left')
    ax.grid(axis='y', alpha=0.25)

    plt.tight_layout()

    if save_dir:
        safe_name = f"{make}__{body}".replace(" ", "_").replace("/", "_")
        save_path = os.path.join(save_dir, f"{safe_name}.png")
        fig.savefig(save_path, dpi=200)
        print(f"Saved plot: {save_path}")

    if show:
        plt.show()
    else:
        plt.close(fig)

# ------------------------------------------------------------
# 3) Bulk plotting: top N groups by MAE or N
# ------------------------------------------------------------
plot_strategy = 'mae'
top_n = 5

if plot_strategy == 'mae':
    selected = metrics_df.head(top_n)
else:
    selected = metrics_df.sort_values('N_val_test_rows', ascending=False).head(top_n)

for _, row in selected.iterrows():
    make = row['Make']
    body = row['Body_Type']
    group_df = final_predictions[(final_predictions['Make'] == make) & (final_predictions['Body_Type'] == body)]
    if len(group_df) == 0:
        continue
    plot_group_predictions(group_df, make, body, save_dir=PLOT_DIR, show=True)

print(f"Plots saved to: {PLOT_DIR}\n")

# ------------------------------------------------------------
# 4) Save final_predictions
# ------------------------------------------------------------
final_predictions.to_csv(PRED_PATH, index=False)
print(f"Saved final predictions to: {PRED_PATH}\n")
