<a href="https://colab.research.google.com/github/SunbirdAI/lamwo-electrification-project/blob/main/notebooks/rank_minigrid_villages_PUE/rank_candidate_minigrids_lamwo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Predict the PUE value for current minigrid candidate villages and rank them

In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
import pickle

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.ensemble import RandomForestRegressor

### Load and instantiate models

In [2]:
!unzip ranking_models.zip


Archive:  ranking_models.zip
  inflating: lin_reg.pkl             
  inflating: poly_features.pkl       
  inflating: poly_reg.pkl            
  inflating: rf_reg.pkl              
  inflating: xgb_reg.pkl             


Features used in training

In [3]:
features = ['building_count', 'permanent_building_count', 'educational_facilities',
            'health_facilities', 'social_facilities', 'services', 'primary_roads', 'secondary_roads',
            'tertiary_roads', 'unclassified_roads', 'percentage_crop_land', 'percentage_built_area',
            'mean_pvout_solar_radiation', 'mean_wind_speed']

Load models

In [4]:
with open('lin_reg.pkl', 'rb') as f:
    lin_reg = pickle.load(f)
with open('poly_reg.pkl', 'rb') as f:
    poly_reg = pickle.load(f)
with open('poly_features.pkl', 'rb') as f:
    poly = pickle.load(f)  # Load the saved PolynomialFeatures object
with open('rf_reg.pkl', 'rb') as f:
    rf_reg = pickle.load(f)
with open('xgb_reg.pkl', 'rb') as f:
    xgb_reg = pickle.load(f)

Helper function to prepare input data and make predictions

In [5]:
def predict_pue(new_data, lin_reg, poly_reg, poly, rf_reg, xgb_reg, features):
    """
    Predict PUE scores (`winch_prob`) for new data using all four models.

    Args:
        new_data (pd.DataFrame or dict): New input data with the same features as training
        lin_reg, poly_reg, rf_reg, xgb_reg: Trained models
        poly: Loaded PolynomialFeatures object
        features: List of feature names

    Returns:
        dict: Predictions from each model
    """
    # Convert input to DataFrame if it’s a dict
    if isinstance(new_data, dict):
        new_data = pd.DataFrame([new_data])

    # Ensure all required features are present
    missing_features = [f for f in features if f not in new_data.columns]
    if missing_features:
        raise ValueError(f"Missing features in new data: {missing_features}")

    # Select and order features
    X_new = new_data[features]

    # Predictions
    pred_lin = lin_reg.predict(X_new)
    X_new_poly = poly.transform(X_new)  # Use loaded poly object to transform data
    pred_poly = poly_reg.predict(X_new_poly)  # Use loaded poly_reg to predict
    pred_rf = rf_reg.predict(X_new)
    pred_xgb = xgb_reg.predict(X_new)

    # Create result DataFrame
    result = pd.DataFrame({
        'village_id': new_data['village_id'],
        'PUE_Linear_Regression': pred_lin,
        'PUE_Polynomial_Regression': pred_poly,
        'PUE_Random_Forest': pred_rf,
        'PUE_XGBoost': pred_xgb
    })

    return result

### Load candidate minigrid village data and score and rank villages

In [6]:
test_data = pd.read_csv('candidate_villages_testdata.csv')

In [7]:
test_data.shape

(39, 24)

In [8]:
try:
    predictions = predict_pue(test_data, lin_reg, poly_reg, poly, rf_reg, xgb_reg, features)

except ValueError as e:
    print(f"Error: {e}")

Sort by PUE_XGBoost and rank villages (can change to another model if preferred)

In [9]:
predictions_sorted = predictions.sort_values(by='PUE_XGBoost', ascending=False)
predictions_sorted['rank'] = range(1, len(predictions_sorted) + 1)

Display sorted predictions

In [10]:
print("\nSorted PUE Predictions for Test Villages:")
print(predictions_sorted)


Sorted PUE Predictions for Test Villages:
    village_id  PUE_Linear_Regression  PUE_Polynomial_Regression  \
11     5502905               0.884534                   0.830000   
12     5502906               0.231352                   1.890198   
15     5502922               0.472419                  29.824818   
14     5502921               0.306374                  20.837079   
1      5502037               0.250373                   5.558291   
17     5502938               0.524639                   0.662512   
30     5503121               0.454082                   1.282865   
21     5502952               0.513112                   0.677888   
37     5503193               0.643437                   1.881900   
0      5501324               0.442622                   1.084241   
9      5502901               0.560988                  11.339806   
27     5503092               0.495880                  -0.873721   
32     5503134               0.443797                   0.628766   
10   

Save ranked minigrid villages

In [11]:
predictions_sorted.to_csv('village_pue_predictions.csv', index=False)
print("\nPredictions saved to 'village_pue_predictions.csv'")


Predictions saved to 'village_pue_predictions.csv'
