In [1]:
# ============================================
# ⚡️ PHASE 3: ENERGY NEED SCORE + RECOMMENDATION FIELD
# ============================================

import geopandas as gpd
import pandas as pd
import numpy as np
from shapely.ops import nearest_points
from shapely.geometry import Point
import warnings
warnings.filterwarnings("ignore")

print("✅ Libraries loaded successfully.")


# --- Step 1. Load Preprocessed Data ---
clusters = gpd.read_file("../data_processed/road_clusters.geojson")
power = gpd.read_file("../data_processed/power_lines_clean.geojson")
roads = gpd.read_file("../data_processed/roads_clustered.geojson")

print(f"Loaded {len(clusters)} clusters, {len(roads)} roads, {len(power)} power lines.")


# --- Step 2. Compute Distance to Nearest Power Line ---
# Convert both to metric CRS for distance calculations
clusters = clusters.to_crs(3857)
power = power.to_crs(3857)

# Combine all power line geometries into one
power_union = power.geometry.unary_union

# Compute distances (in km)
clusters["dist_to_power_km"] = clusters.geometry.apply(lambda g: g.distance(power_union) / 1000)
clusters = clusters.to_crs(4326)

print("✅ Computed distance to nearest power line (km).")


# --- Step 3. Create a Population Proxy ---
# Synthetic population value based on random distribution (0–1 scale)
# Later can be replaced with real raster extraction (e.g., WorldPop)
np.random.seed(42)
clusters["pop_index"] = np.random.uniform(0.3, 1.0, len(clusters))

print("✅ Added synthetic population proxy (pop_index).")


# --- Step 4. Normalize Metrics for Scoring ---
def normalize(series):
    return (series - series.min()) / (series.max() - series.min())

clusters["norm_road_km"] = normalize(clusters["total_road_km"])
clusters["norm_dist_power"] = normalize(clusters["dist_to_power_km"])
clusters["norm_pop"] = normalize(clusters["pop_index"])

print("✅ Normalized all metrics for comparability.")


# --- Step 5. Compute Composite Energy Need Score ---
# High population + low road density + far from power line = high need
clusters["energy_need_score"] = (
    (clusters["norm_pop"] * 0.4) +
    ((1 - clusters["norm_road_km"]) * 0.3) +
    (clusters["norm_dist_power"] * 0.3)
)

# Scale 0–100 for readability
clusters["energy_need_score"] = (clusters["energy_need_score"] * 100).round(2)

print("✅ Computed composite Energy Need Score (0–100 scale).")


# --- Step 6. Create a Recommendation Field ---
def classify_need(score):
    if score >= 75:
        return "🔴 Critical Priority"
    elif score >= 50:
        return "🟠 High Priority"
    elif score >= 25:
        return "🟡 Moderate Priority"
    else:
        return "🟢 Low Priority"

clusters["recommendation"] = clusters["energy_need_score"].apply(classify_need)

print("✅ Added qualitative recommendation field.")


# --- Step 7. Export Outputs ---
clusters.to_file("../data_processed/clusters.geojson", driver="GeoJSON")
clusters[["cluster_id", "energy_need_score", "recommendation"]].to_csv(
    "../data_processed/scores.csv", index=False
)

print("💾 Saved:")
print(" - data_processed/clusters.geojson")
print(" - data_processed/scores.csv")


# --- Step 8. Quick Summary ---
print("\n📊 Energy Need Score Summary:")
print(clusters[["energy_need_score"]].describe())
print("\n🗺 Distribution by Priority:")
print(clusters["recommendation"].value_counts())

print("\n🎯 Phase 3 complete – clusters now scored and categorized.")

✅ Libraries loaded successfully.
Loaded 20 clusters, 725553 roads, 1475 power lines.
✅ Computed distance to nearest power line (km).
✅ Added synthetic population proxy (pop_index).
✅ Normalized all metrics for comparability.
✅ Computed composite Energy Need Score (0–100 scale).
✅ Added qualitative recommendation field.
💾 Saved:
 - data_processed/clusters.geojson
 - data_processed/scores.csv

📊 Energy Need Score Summary:
       energy_need_score
count          20.000000
mean           35.969500
std            15.229303
min             7.260000
25%            27.735000
50%            35.580000
75%            43.462500
max            67.920000

🗺 Distribution by Priority:
recommendation
🟡 Moderate Priority    13
🟢 Low Priority          4
🟠 High Priority         3
Name: count, dtype: int64

🎯 Phase 3 complete – clusters now scored and categorized.
