## Dual Model Approach
This approach uses each input variable every time it can in both models for prediction:
- Climate Model
- Consumption Model

In this version the climate model takes as input Latitude, Longitude and Month, giving a climate score as output.
Then the consumption model takes as input the Latitude, Longitude, Month, climate score and lane count

In [None]:
import numpy as np
import pandas as pd
import joblib
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


# Load models
climate_model_path = "../../Model/Exports/Climate Model/knn_model.joblib"
climate_model = joblib.load(climate_model_path)

consumption_model_path = "../../Model/Exports/Consumption Model/random_forest_consumption_model_location_month_lane_climate.pkl"
consumption_model = joblib.load(consumption_model_path)

# Define functions
def predict_climate_score(lat, lon, month):
    query = np.array([[lat, lon, month]], dtype=float)
    return float(climate_model.predict(query)[0])

def predict_consumption(lat, lon, lane_count, month):
    climate_score = predict_climate_score(lat, lon, month)
    query = pd.DataFrame([{
        "Latitude": lat,
        "Longitude": lon,
        "Lane Count": lane_count,
        "Month": month,
        "ClimateType": climate_score
    }])

    return float(consumption_model.predict(query)[0])

def predict_consumption_year(lat, lon, lane_count):
    monthly_consumption = []
    total = 0
    for i in range(1, 13):
        consumption = predict_consumption(lat, lon, lane_count, i)
        total += consumption
        monthly_consumption.append(consumption)
    return {"Total": total, "Monthly": monthly_consumption}


import sys
sys.path.append('Cost_Estimation.py')
import Const_Estimation as cost

import importlib 
importlib.reload(cost)

# Performs a test from an input dataset following the clean format
def test_model(data):
    X = data[["Latitude", "Longitude", "Lane Count", "Month"]]
    
    y_true_consumption = data["Estimated Monthly Consumption (kWh)"]
    y_true_cost = data["Estimated Monthly Amount (Before Tax)"]

    # Predict monthly consumption
    y_pred_consumption = X.apply(lambda row: predict_consumption(
        row["Latitude"], row["Longitude"], row["Lane Count"], row["Month"]
    ), axis=1)

    # Predict monthly cost using predicted consumption
    y_pred_cost = X.copy()
    y_pred_cost["Predicted Consumption"] = y_pred_consumption
    y_pred_cost = y_pred_cost.apply(lambda row: cost.calculate_electricity_bill(
        int(row["Month"]), float(row["Predicted Consumption"])
    ), axis=1)

    # Compute metrics for consumption
    mae_c = mean_absolute_error(y_true_consumption, y_pred_consumption)
    mse_c = mean_squared_error(y_true_consumption, y_pred_consumption)
    r2_c = r2_score(y_true_consumption, y_pred_consumption)

    # Compute metrics for cost
    mae_cost = mean_absolute_error(y_true_cost, y_pred_cost)
    mse_cost = mean_squared_error(y_true_cost, y_pred_cost)
    r2_cost = r2_score(y_true_cost, y_pred_cost)

    return {
        "Consumption": {
            "X": X,
            "Y": y_true_consumption,
            "Prediction": y_pred_consumption,
            "MAE": mae_c,
            "MSE": mse_c,
            "R2": r2_c
        },
        "Cost": {
            "Y": y_true_cost,
            "Prediction": y_pred_cost,
            "MAE": mae_cost,
            "MSE": mse_cost,
            "R2": r2_cost
        }
    }

    


## Run

In [9]:
# Example run
predict_consumption(125.07623, 121.419053, 3, 4)

1095.905

## Test

In [10]:
path = "../../Clean Data/Clean_Electricity_Data_With_Cost.csv"
df = pd.read_csv(path)

results = test_model(df)

print("Consumption R2: ", results["Consumption"]["R2"])
print("Cost R2: ", results["Cost"]["R2"])

Consumption R2:  0.9347905224611793
Cost R2:  0.7356168276239905
