<a href="https://colab.research.google.com/github/SunbirdAI/lamwo-electrification-project/blob/main/notebooks/rank_minigrid_villages_PUE/predict_PUE_and_rank_candidate_mg_villages.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Predict PUE and rank villages based on PUE score

Inference code for prediction of PUE for a village. The predicted value can also be used to rank the villages and order priority of deployment.

In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
import pickle

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.ensemble import RandomForestRegressor

## Load and instantiate trained models

In [3]:
!unzip ranking_models.zip

Archive:  ranking_models.zip
  inflating: lin_reg.pkl             
  inflating: poly_features.pkl       
  inflating: poly_reg.pkl            
  inflating: rf_reg.pkl              
  inflating: xgb_reg.pkl             


Features used in training

In [4]:
features = ['building_count', 'permanent_building_count', 'educational_facilities',
            'health_facilities', 'social_facilities', 'services', 'primary_roads', 'secondary_roads',
            'tertiary_roads', 'unclassified_roads', 'percentage_crop_land', 'percentage_built_area',
            'mean_pvout_solar_radiation', 'mean_wind_speed']

Load models

In [5]:
with open('lin_reg.pkl', 'rb') as f:
    lin_reg = pickle.load(f)
with open('poly_reg.pkl', 'rb') as f:
    poly_reg = pickle.load(f)
with open('poly_features.pkl', 'rb') as f:
    poly = pickle.load(f)  # Load the saved PolynomialFeatures object
with open('rf_reg.pkl', 'rb') as f:
    rf_reg = pickle.load(f)
with open('xgb_reg.pkl', 'rb') as f:
    xgb_reg = pickle.load(f)

Function to prepare input data and make predictions

In [6]:
def predict_pue(new_data, lin_reg, poly_reg, poly, rf_reg, xgb_reg, features):
    """
    Predict PUE scores (`winch_prob`) for new data using all four models.

    Args:
        new_data (pd.DataFrame or dict): New input data with the same features as training
        lin_reg, poly_reg, rf_reg, xgb_reg: Trained models
        poly: Loaded PolynomialFeatures object
        features: List of feature names

    Returns:
        dict: Predictions from each model
    """
    # Convert input to DataFrame if it’s a dict
    if isinstance(new_data, dict):
        new_data = pd.DataFrame([new_data])

    # Ensure all required features are present
    missing_features = [f for f in features if f not in new_data.columns]
    if missing_features:
        raise ValueError(f"Missing features in new data: {missing_features}")

    # Select and order features
    X_new = new_data[features]

    # Predictions
    pred_lin = lin_reg.predict(X_new)
    X_new_poly = poly.transform(X_new)  # Use loaded poly object to transform data
    pred_poly = poly_reg.predict(X_new_poly)  # Use loaded poly_reg to predict
    pred_rf = rf_reg.predict(X_new)
    pred_xgb = xgb_reg.predict(X_new)

    return {
        "Linear Regression": pred_lin[0] if len(pred_lin) == 1 else pred_lin,
        "Polynomial Regression (degree 2)": pred_poly[0] if len(pred_poly) == 1 else pred_poly,
        "Random Forest Regression": pred_rf[0] if len(pred_rf) == 1 else pred_rf,
        "XGBoost Regression": pred_xgb[0] if len(pred_xgb) == 1 else pred_xgb
    }

## Example usage

In [7]:
sample_data = {
    'Capacity': 0.04,
    'building_count': 300,
    'permanent_building_count': 280,
    'educational_facilities': 10,
    'health_facilities': 0,
    'social_facilities': 0,
    'services': 0,
    'primary_roads': 1,
    'secondary_roads': 1,
    'tertiary_roads': 2,
    'unclassified_roads': 100,
    'percentage_crop_land': 95.0,
    'percentage_built_area': 85.0,
    'mean_pvout_solar_radiation': 1650.0,
    'mean_wind_speed': 18.5,
    'mean_ndvi': 0.35
}

Predict PUE using loaded models

In [8]:
try:
    predictions = predict_pue(sample_data, lin_reg, poly_reg, poly, rf_reg, xgb_reg, features)
    print("\nPUE Predictions for Sample Data:")
    for model, pred in predictions.items():
        print(f"{model}: {pred:.4f}")
except ValueError as e:
    print(f"Error: {e}")


PUE Predictions for Sample Data:
Linear Regression: 1.7560
Polynomial Regression (degree 2): 207.8101
Random Forest Regression: 0.8290
XGBoost Regression: 0.8650
