<a href="https://colab.research.google.com/github/Rajanshr04/MTP/blob/main/ML_Model_Generation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Data Preprocessing and Surrogate Model Training Script

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import r2_score, mean_squared_error, root_mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from xgboost import XGBRegressor
import joblib

# Load the dataset
data = pd.read_csv("/content/Dataset-HRB-8hrs.csv")  # Adjust path as needed in Google Colab

# Define input and output columns
input_columns = ["COOL_SETPOINTS", "ROOF_R_LEVELS", "WALL_R_LEVELS", "WIN_U_LEVELS", "COP_LEVELS"]
target_energy = "annual_energy_kwh"
target_unmet = "unmet_hours"

X = data[input_columns].copy()
y_energy = data[target_energy].copy()
y_unmet = data[target_unmet].copy()

# Normalize input features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Save the scaler for use during optimization
dump_scaler = joblib.dump(scaler, "scaler.pkl")

# Split the data for evaluation
X_train, X_test, y_energy_train, y_energy_test = train_test_split(X_scaled, y_energy, test_size=0.2, random_state=42)
X_train_u, X_test_u, y_unmet_train, y_unmet_test = train_test_split(X_scaled, y_unmet, test_size=0.2, random_state=42)

# Train XGBoost model for energy
model_energy = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42)
model_energy.fit(X_train, y_energy_train)

# Save the energy model
joblib.dump(model_energy, "best_energy_model.pkl")

# Train XGBoost model for unmet hours
model_unmet = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42)
model_unmet.fit(X_train_u, y_unmet_train)

# Save the unmet hours model
joblib.dump(model_unmet, "best_unmet_model.pkl")

# Evaluate models
# Evaluate models
for model, y_true, y_pred, label in [
    (model_energy, y_energy_test, model_energy.predict(X_test), "Energy"),
    (model_unmet, y_unmet_test, model_unmet.predict(X_test_u), "Unmet Hours")
]:
    # Ensure correct indentation for all lines inside the loop
    r2 = r2_score(y_true, y_pred)
    # Use root_mean_squared_error for RMSE calculation
    rmse = root_mean_squared_error(y_true, y_pred)
    print(f"{label} Model Test R²: {r2:.3f}, RMSE: {rmse:.2f}")


# Test cases (if dataset is loaded successfully)
print("\nSample prediction outputs:")
print("Energy Model Prediction Sample:", model_energy.predict(X_test[:1]))
print("Unmet Hours Model Prediction Sample:", model_unmet.predict(X_test_u[:1]))



Energy Model Test R²: 0.999, RMSE: 12976.26
Unmet Hours Model Test R²: 0.999, RMSE: 6.09

Sample prediction outputs:
Energy Model Prediction Sample: [5313580.5]
Unmet Hours Model Prediction Sample: [3980.4536]
