<a href="https://colab.research.google.com/github/SunbirdAI/lamwo-electrification-project/blob/main/notebooks/predicting_minigrid_villages/predict_minigrid_village.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Predict whether a village is a suitable location for a minigrid or not

By adjusting cut off thresholds for the algorithms, we can determine a probability of whether a village will be a viable location for a minigrid deployment or not.

In [1]:
import pandas as pd
import numpy as np
import pickle

Set up minigrid predictor class

In [2]:
class MinigridPredictor:
    def __init__(self, log_reg_path, rf_path, scaler_path, feature_names):
        """Initialize predictor with trained models and scaler"""
        self.log_reg = pickle.load(open(log_reg_path, 'rb'))
        self.rf = pickle.load(open(rf_path, 'rb'))
        self.scaler = pickle.load(open(scaler_path, 'rb'))
        self.feature_names = feature_names
        self.classes = ['Not Minigrid', 'Minigrid']

    def preprocess_input(self, data):
        """Preprocess input data for prediction"""
        if isinstance(data, dict):
            data = pd.DataFrame([data])

        for feature in self.feature_names:
            if feature not in data.columns:
                data[feature] = 0

        data = data[self.feature_names]
        if 'contains_protected_area' in data.columns:
            data['contains_protected_area'] = data['contains_protected_area'].astype(int)

        data = data.fillna(0)
        data_scaled = self.scaler.transform(data)

        return data_scaled

    def predict(self, data, model_type='random_forest', threshold=0.5):
        """Make predictions using specified model and threshold"""
        data_scaled = self.preprocess_input(data)
        model = self.rf if model_type == 'random_forest' else self.log_reg

        # Get probability of minigrid class (class 1)
        probabilities = model.predict_proba(data_scaled)[:, 1]
        predictions = (probabilities >= threshold).astype(int)

        return predictions, probabilities

    def get_prediction_df(self, data, model_type='random_forest', threshold=0.5):
        """Return predictions with probabilities as DataFrame"""
        predictions, probabilities = self.predict(data, model_type, threshold)

        pred_df = pd.DataFrame({
            'Minigrid_Probability': probabilities,
            'Prediction': [self.classes[p] for p in predictions]
        })

        return pred_df


Load trained models

In [3]:
!unzip minigrid_village_predictor_models.zip

Archive:  minigrid_village_predictor_models.zip
   creating: models/
  inflating: models/rf_model.pkl     
  inflating: models/feature_names.txt  
  inflating: models/log_reg_model.pkl  
  inflating: models/scaler.pkl       


Load feature names

In [4]:
with open('models/feature_names.txt', 'r') as f:
    feature_names = f.read().split(',')

Initialize predictor

In [5]:
predictor = MinigridPredictor(
    log_reg_path='models/log_reg_model.pkl',
    rf_path='models/rf_model.pkl',
    scaler_path='models/scaler.pkl',
    feature_names=feature_names
)

## Example 1: Single instance prediction with different thresholds

In [6]:
single_village = {
    'facilities': 1,
    'grid_extension': 0,
    'existing_grid': 0,
    'mean_ndvi': 0.3,
    'mean_wind_speed': 1.2,
    'mean_pvout_solar_radiation': 1600,
    'building_count': 200,
    'permanent_building_count': 50,
    'educational_facilities': 1,
    'health_facilities': 0,
    'social_facilities': 0,
    'services': 0,
    'primary_roads': 0,
    'secondary_roads': 0,
    'tertiary_roads': 1,
    'unclassified_roads': 2,
    'percentage_crop_land': 20.0,
    'percentage_built_area': 5.0,
    'contains_protected_area': False
}

print("\nSingle Instance Prediction (Random Forest):")
for threshold in [0.3, 0.5, 0.7]:
    rf_pred_df = predictor.get_prediction_df(single_village, 'random_forest', threshold)
    print(f"Threshold = {threshold}:")
    print(rf_pred_df)

print("\nSingle Instance Prediction (Logistic Regression):")
for threshold in [0.3, 0.5, 0.7]:
    lr_pred_df = predictor.get_prediction_df(single_village, 'logistic_regression', threshold)
    print(f"Threshold = {threshold}:")
    print(lr_pred_df)


Single Instance Prediction (Random Forest):
Threshold = 0.3:
   Minigrid_Probability Prediction
0              0.634229   Minigrid
Threshold = 0.5:
   Minigrid_Probability Prediction
0              0.634229   Minigrid
Threshold = 0.7:
   Minigrid_Probability    Prediction
0              0.634229  Not Minigrid

Single Instance Prediction (Logistic Regression):
Threshold = 0.3:
   Minigrid_Probability Prediction
0              0.909964   Minigrid
Threshold = 0.5:
   Minigrid_Probability Prediction
0              0.909964   Minigrid
Threshold = 0.7:
   Minigrid_Probability Prediction
0              0.909964   Minigrid


## Example 2: Batch prediction

In [7]:
batch_data = pd.DataFrame([
    {
        'facilities': 0,
        'grid_extension': 0,
        'existing_grid': 0,
        'mean_ndvi': 0.0,
        'mean_wind_speed': 0.0,
        'mean_pvout_solar_radiation': 1550,
        'building_count': 50,
        'permanent_building_count': 10,
        'educational_facilities': 0,
        'health_facilities': 0,
        'social_facilities': 0,
        'services': 0,
        'primary_roads': 0,
        'secondary_roads': 0,
        'tertiary_roads': 0,
        'unclassified_roads': 0,
        'percentage_crop_land': 15.0,
        'percentage_built_area': 2.0,
        'contains_protected_area': True
    },
    {
        'facilities': 2,
        'grid_extension': 1,
        'existing_grid': 0,
        'mean_ndvi': 0.4,
        'mean_wind_speed': 1.5,
        'mean_pvout_solar_radiation': 1650,
        'building_count': 500,
        'permanent_building_count': 100,
        'educational_facilities': 1,
        'health_facilities': 1,
        'social_facilities': 0,
        'services': 1,
        'primary_roads': 1,
        'secondary_roads': 1,
        'tertiary_roads': 2,
        'unclassified_roads': 3,
        'percentage_crop_land': 30.0,
        'percentage_built_area': 15.0,
        'contains_protected_area': False
    }
])

print("\nBatch Prediction (Random Forest) with Threshold=0.3:")
batch_pred_df = predictor.get_prediction_df(batch_data, 'random_forest', threshold=0.3)
print(batch_pred_df)


Batch Prediction (Random Forest) with Threshold=0.3:
   Minigrid_Probability    Prediction
0              0.067089  Not Minigrid
1              0.289363  Not Minigrid
