In [2]:
import os
import json
import joblib
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error

# Paths
data_dir = r"E:\Codes\Projects\ML\air_quality\data\cleaned_city_data"
models_dir = r"E:\Codes\Projects\ML\air_quality\models"

# Load city-to-model mapping
mapping_file = os.path.join(models_dir, "city_model_map.json")
with open(mapping_file, "r") as f:
    city_model_map = json.load(f)

# Store evaluation results
eval_results = []

for city, model_info in city_model_map.items():
    print(f"\n=== Evaluating {city} ===")
    
    # If it's a dict, get the actual file name
    if isinstance(model_info, dict):
        model_file = model_info.get("model_file")
    else:
        model_file = model_info  # already a string

    data_file = os.path.join(data_dir, f"{city.replace(' ', '_')}.csv")
    
    if not os.path.exists(data_file):
        print(f"⚠️ Data file for {city} not found. Skipping.")
        continue

    # Load and prepare data
    df = pd.read_csv(data_file, parse_dates=['Date'], index_col='Date')
    if 'AQI' not in df.columns:
        print(f"⚠️ No AQI column for {city}. Skipping.")
        continue
    
    df['AQI'] = pd.to_numeric(df['AQI'], errors='coerce')
    df.dropna(subset=['AQI'], inplace=True)

    if len(df) < 30:
        print(f"⚠️ Not enough data for {city}. Skipping.")
        continue

    # Train-Test split (80/20)
    train_size = int(len(df) * 0.8)
    train, test = df.iloc[:train_size], df.iloc[train_size:]

    # Load saved model
    model_path = os.path.join(models_dir, model_file)
    if not os.path.exists(model_path):
        print(f"⚠️ Model file for {city} not found. Skipping.")
        continue
    
    model = joblib.load(model_path)

    # Predict for the test period
    forecast = model.predict(n_periods=len(test))

    # Calculate RMSE
    rmse = np.sqrt(mean_squared_error(test['AQI'], forecast))
    print(f"✅ RMSE: {rmse:.2f}")

    eval_results.append({
        "City": city,
        "RMSE": rmse,
        "Test_Size": len(test)
    })

# Save results to CSV
results_df = pd.DataFrame(eval_results).sort_values(by="RMSE")
results_path = os.path.join(models_dir, "Model_Evaluation_Results.csv")
results_df.to_csv(results_path, index=False)

print("\n=== Evaluation Complete ===")
print(results_df)
print(f"\n💾 Results saved to: {results_path}")



=== Evaluating Ahmedabad ===
✅ RMSE: 262.79

=== Evaluating Aizawl ===
✅ RMSE: 5.44

=== Evaluating Amaravati ===
✅ RMSE: 68.80

=== Evaluating Amritsar ===
✅ RMSE: 121.77

=== Evaluating Bengaluru ===
✅ RMSE: 34.41

=== Evaluating Bhopal ===
✅ RMSE: 33.21

=== Evaluating Brajrajnagar ===
✅ RMSE: 61.54

=== Evaluating Chandigarh ===
✅ RMSE: 26.41

=== Evaluating Chennai ===
✅ RMSE: 36.97

=== Evaluating Coimbatore ===
✅ RMSE: 35.83

=== Evaluating Delhi ===
✅ RMSE: 114.07

=== Evaluating Ernakulam ===
✅ RMSE: 8.23

=== Evaluating Gurugram ===
✅ RMSE: 130.26

=== Evaluating Guwahati ===
✅ RMSE: 142.90

=== Evaluating Hyderabad ===
✅ RMSE: 46.64

=== Evaluating Jaipur ===
✅ RMSE: 51.94

=== Evaluating Jorapokhar ===
✅ RMSE: 55.00

=== Evaluating Kochi ===
✅ RMSE: 23.19

=== Evaluating Kolkata ===
✅ RMSE: 127.95

=== Evaluating Lucknow ===
✅ RMSE: 97.84

=== Evaluating Mumbai ===
✅ RMSE: 54.05

=== Evaluating Patna ===
✅ RMSE: 101.70

=== Evaluating Shillong ===
✅ RMSE: 12.04

=== Evalua