## Dual Model Approach
This approach uses each input variable every time it can in both models for prediction:
- Climate Model
- Consumption Model

In this version the climate model takes as input Latitude, Longitude and Month, giving a climate score as output.
Then the consumption model takes as input the Latitude, Longitude, Month, climate score and lane count

In [None]:
import numpy as np
import pandas as pd
import joblib
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load models
climate_model_path = "../../Model/Exports/Climate Model/knn_model.joblib"
climate_model = joblib.load(climate_model_path)

consumption_model_path = "../../Model/Exports/Consumption Model/random_forest_consumption_model_location_month_lane_climate.pkl"
consumption_model = joblib.load(consumption_model_path)

# Define functions
def predict_climate_score(lat, lon, month):
    query = np.array([[lat, lon, month]], dtype=float)
    return float(climate_model.predict(query)[0])

def predict_consumption(lat, lon, lane_count, month):
    climate_score = predict_climate_score(lat, lon, month)
    query = pd.DataFrame([{
        "Latitude": lat,
        "Longitude": lon,
        "Lane Count": lane_count,
        "Month": month,
        "ClimateType": climate_score
    }])

    return float(consumption_model.predict(query)[0])

def predict_consumption_year(lat, lon, lane_count):
    monthly_consumption = []
    total = 0
    for i in range(1, 13):
        consumption = predict_consumption(lat, lon, lane_count, i)
        total += consumption
        monthly_consumption.append(consumption)
    return {"Total": total, "Monthly": monthly_consumption}


# Performs a test from an input dataset following the clean format
def test_model(data):
    X = data[["Latitude", "Longitude", "Lane Count", "Month"]]
    y_true = data["Estimated Monthly Consumption (kWh)"]
    
    # Make predictions row by row
    y_pred = X.apply(lambda row: predict_consumption(
        row["Latitude"], row["Longitude"], row["Lane Count"], row["Month"]
    ), axis=1)

    # Compute evaluation metrics
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)

    return {
        "X": X,
        "Y": y_true,
        "Prediction": y_pred,
        "MAE": mae,
        "MSE": mse,
        "R2": r2
    }

    


## Run

In [2]:
# Example run
predict_consumption(125.07623, 121.419053, 3, 4)

1095.905

## Test

In [3]:
path = "../../Clean Data/Clean_Electricity_Data_With_Cost.csv"
df = pd.read_csv(path)

print(test_model(df)["R2"])

TypeError: test_model() missing 1 required positional argument: 'data'