In [None]:
# Plotting Event-Months of NEQUICK Model vs Real Data (SAITS Imputed) with Accuracy Metrics   final
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
import os
import ipywidgets as widgets
from IPython.display import display
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# ==== USER INPUT PATHS ====
nequick_path = "/content/drive/MyDrive/ISMR-SASTRA/EVENT_MONTHS_NEQUICK_DATA.xlsx"
real_path = "/content/drive/MyDrive/ISMR-SASTRA/EVENT MONTHS-REAL DATA.xlsx"
model_path = "/content/drive/MyDrive/ISMR-SASTRA/saits_vtec_model.pkl"

# ==== LOAD TRAINED SAITS MODEL ====
with open(model_path, 'rb') as f:
    saits_model = pickle.load(f)

# ==== LOAD BOTH DATASETS ====
nequick_xls = pd.ExcelFile(nequick_path)
real_xls = pd.ExcelFile(real_path)

# Mapping months to sheet names
months = {
    "March": "MARCH",
    "June": "JUNE",
    "September": "SEPTEMBER",
    "December": "DECEMBER",
}

# ==== FUNCTION TO EXTRACT VTEC FOR FULL MONTH ====
def get_month_vtec(xls, sheet):
    df = pd.read_excel(xls, sheet_name=sheet, header=None)
    vtec_month = df.to_numpy(dtype=np.float32)
    # Ensure 24 rows (hours)
    if vtec_month.shape[0] < 24:
        pad_rows = 24 - vtec_month.shape[0]
        vtec_month = np.pad(vtec_month, ((0,pad_rows),(0,0)), constant_values=np.nan)
    elif vtec_month.shape[0] > 24:
        vtec_month = vtec_month[:24, :]
    return vtec_month

# ==== FUNCTION TO PLOT AND CALCULATE METRICS ====
def plot_and_metrics(month_label):
    sheet = months[month_label]

    # ---- Load NEQUICK and REAL data ----
    vtec_nequick = get_month_vtec(nequick_xls, sheet)
    vtec_real = get_month_vtec(real_xls, sheet)

    # ---- Impute missing values in real data ----
    vtec_real_imputed = np.zeros_like(vtec_real)
    for day_idx in range(vtec_real.shape[1]):
        day_data = vtec_real[:, day_idx]
        mask = ~np.isnan(day_data)
        pred = saits_model.predict({"X": day_data.reshape(1, 24, 1)})
        imputed = pred["imputation"].flatten()
        vtec_real_imputed[:, day_idx] = np.where(mask, day_data, imputed)

    # ---- Compute monthly averages ----
    nequick_avg = np.nanmean(vtec_nequick, axis=1)
    real_avg = np.nanmean(vtec_real_imputed, axis=1)

    # ---- Calculate metrics ----
    mae = mean_absolute_error(real_avg, nequick_avg)
    mse = mean_squared_error(real_avg, nequick_avg)
    rmse = np.sqrt(mse)
    r2 = r2_score(real_avg, nequick_avg)

    # ---- Plot ----
    plt.figure(figsize=(12,6))
    hours = range(1,25)
    plt.plot(hours, nequick_avg, color='red', lw=2, label='NEQUICK Avg')
    plt.plot(hours, real_avg, color='blue', lw=2, label='Real VTEC (SAITS Imputed) Avg')

    plt.xlabel("Hour (1–24)")
    plt.ylabel("VTEC (TECU)")
    plt.title(f"Averaged VTEC Comparison - {month_label}")
    plt.legend()
    plt.grid(True, linestyle='--', alpha=0.6)
    plt.xticks(hours)
    plt.ylim(0, max(np.nanmax(nequick_avg), np.nanmax(real_avg)) + 20)
    plt.yticks(np.arange(0, int(max(np.nanmax(nequick_avg), np.nanmax(real_avg))) + 21, 10))
    plt.tight_layout()
    plt.show()

    # ---- Print metrics ----
    print(f"Metrics for {month_label}:")
    print(f"MAE  : {mae:.2f} TECU")
    print(f"MSE  : {mse:.2f}")
    print(f"RMSE : {rmse:.2f} TECU")
    print(f"R²   : {r2:.3f}")

# ==== DROPDOWN WIDGET FOR MONTH SELECTION ====
month_dropdown = widgets.Dropdown(
    options=list(months.keys()),
    description='Select Month:',
    value='March'
)

widgets.interact(plot_and_metrics, month_label=month_dropdown)

