In [None]:
# ==========================================================
# ENERGY NEED SCORE (v2 with Recommendation Layer)
# ==========================================================

import geopandas as gpd
import pandas as pd
import numpy as np
import os
from datetime import datetime

# --- Step 1: Load Cluster Data ---
try:
    clusters = gpd.read_file("../data_processed/clusters.geojson")
    print(f" Loaded {len(clusters)} clusters.")
    print(f"Columns available in file:\n{list(clusters.columns)}")
except Exception as e:
    raise FileNotFoundError(f" Could not load clusters.geojson: {e}")

# --- Step 2: Identify Available Columns ---
def find_column(columns, keywords):
    for col in columns:
        for key in keywords:
            if key.lower() in col.lower():
                return col
    return None

road_col = find_column(clusters.columns, ["road", "length"])
power_col = find_column(clusters.columns, ["power", "line", "electric"])
grid_col = find_column(clusters.columns, ["grid", "distance", "dist"])

print(f"Detected → Road: {road_col}, Power: {power_col}, Grid/Distance: {grid_col}")

# --- Step 3: Fill Missing Data ---
for col in [road_col, power_col, grid_col]:
    if col and col in clusters.columns:
        clusters[col] = clusters[col].fillna(0)
    else:
        print(f" Warning: column not found → {col}")

# --- Step 4: Normalization Function ---
def normalize(series):
    return (series - series.min()) / (series.max() - series.min() + 1e-9)

# --- Step 5: Compute Base Energy Need Score ---
clusters["road_norm"]  = normalize(clusters[road_col]) if road_col else 0
clusters["power_norm"] = normalize(clusters[power_col]) if power_col else 0
clusters["grid_norm"]  = 1 - normalize(clusters[grid_col]) if grid_col else 1  # farther = higher need

clusters["Score"] = (
    0.5 * (1 - clusters["road_norm"]) +
    0.3 * (1 - clusters["power_norm"]) +
    0.2 * clusters["grid_norm"]
) * 100

print(" Energy Need Score computed successfully.")

# --- Step 6: Categorize Energy Need Level ---
def categorize_score(score):
    if score >= 80:
        return "Very High"
    elif score >= 60:
        return "High"
    elif score >= 40:
        return "Moderate"
    elif score >= 20:
        return "Low"
    else:
        return "Very Low"

clusters["Need_Level"] = clusters["Score"].apply(categorize_score)

# --- Step 7: Recommend Electrification Strategy (overwrite any old field) ---
def recommend_strategy(row):
    if row["Need_Level"] == "Very High":
        return "Connect to Main Grid"
    elif row["Need_Level"] == "High":
        return "Mini-grid (Hybrid Solar/Diesel)"
    elif row["Need_Level"] == "Moderate":
        return "Mini-grid (Solar)"
    elif row["Need_Level"] == "Low":
        return "Off-grid Solar Home Systems"
    else:
        return "Monitor Only"

# Always overwrite existing recommendation field
clusters["recommendation"] = clusters.apply(recommend_strategy, axis=1)

# --- Step 8: Save Outputs ---
os.makedirs("../data_processed", exist_ok=True)

# Backend data
clusters.to_file("../data_processed/clusters_scored_v2.geojson", driver="GeoJSON")
clusters[["cluster_id", "Score", "Need_Level", "recommendation"]].to_csv(
    "../data_processed/scores_v2.csv", index=False
)

#  Copy to frontend for visualization
frontend_data_path = "../frontend/public/data/clusters_scored.geojson"
os.makedirs(os.path.dirname(frontend_data_path), exist_ok=True)
clusters.to_file(frontend_data_path, driver="GeoJSON")

print(" Files saved successfully:")
print(" - data_processed/clusters_scored_v2.geojson")
print(" - data_processed/scores_v2.csv")
print(" - frontend/public/data/clusters_scored.geojson")

# --- Step 9: Summary Output ---
print("\n Summary of Energy Need Scores:")
print(clusters["Score"].describe())

print("\n Top 5 High-Need Clusters:")
print(clusters[["cluster_id", "Score", "recommendation"]]
      .sort_values(by="Score", ascending=False)
      .head())

print(f"\nEnergy Need + Recommendation ready for visualization ({datetime.now().strftime('%Y-%m-%d %H:%M:%S')}).")

🚀 Starting Phase 3 — Energy Need Scoring and Recommendation...
✅ Loaded 20 clusters.
Columns available in file:
['lon', 'lat', 'cluster_id', 'total_road_km', 'dist_to_power_km', 'pop_index', 'norm_road_km', 'norm_dist_power', 'norm_pop', 'energy_need_score', 'recommendation', 'geometry']
Detected → Road: total_road_km, Power: dist_to_power_km, Grid/Distance: dist_to_power_km
✅ Energy Need Score computed successfully.
💾 Files saved successfully:
 - data_processed/clusters_scored_v2.geojson
 - data_processed/scores_v2.csv
 - frontend/public/data/clusters_scored.geojson

📊 Summary of Energy Need Scores:
count    20.000000
mean     66.488928
std      14.419745
min      47.411648
25%      52.772961
50%      66.804120
75%      72.928607
max      99.633669
Name: Score, dtype: float64

🔥 Top 5 High-Need Clusters:
    cluster_id      Score                   recommendation
16          16  99.633669             Connect to Main Grid
9            9  88.431609             Connect to Main Grid
3     