In [None]:
# # Pin NumPy to < 2 and reinstall compatible wheels
# %pip install -q "numpy<2"

# # Force-reinstall all geo deps to match the NumPy ABI
# %pip install -q --force-reinstall \
#   "scipy==1.10.*" \
#   "networkx==2.8.8" \
#   "shapely==2.0.*" \
#   "pyproj==3.6.*" \
#   "rtree==1.2.*" \
#   "fiona==1.9.*" \
#   "geopandas==0.14.*" \
#   "osmnx==1.9.4" \
#   folium

# # (Optional) GEE API if you use the NDVI script
# %pip install -q earthengine-api


GEEE toke: 4/1AVGzR1AS0MVg6BzSSkfBv0SS0xIVdGA7KB0veQckyspF7QvKqr5zTmdmOp8

## Location specs

# Green Spaces Build 


In [None]:
# narayanganj_green_access_ndvi_osm.py
# Green access + site candidates + planner-friendly context layers.
# FAST version with batched Earth Engine calls and robustness fixes.
#
# Deps:
#   pip install earthengine-api folium osmnx geopandas shapely networkx rtree
#   # If you hit NumPy 2.x ABI issues with GeoPandas/Shapely wheels:
#   # pip install "numpy<2" && pip install --force-reinstall geopandas shapely pyproj fiona rtree
#
# First run will prompt a browser for Earth Engine auth (ee.Authenticate()).

import os, tempfile, base64
import math
import warnings
from datetime import date
from models.llms import groq_api

try:
    import ee
    import folium
    import networkx as nx
    import osmnx as ox
    import geopandas as gpd
    from shapely.geometry import Point, LineString, Polygon, MultiPolygon
    from shapely.ops import unary_union
except Exception as e:
    raise SystemExit(
        f"\nImport error: {e}\n\n"
        "This often happens when GeoPandas/Shapely wheels were built for NumPy 1.x but you're on NumPy 2.x.\n"
        "Quick fix (in a clean venv):\n"
        "  pip install 'numpy<2'\n"
        "  pip install --force-reinstall geopandas shapely pyproj fiona rtree\n"
        "Or use a fresh 'conda create -n aoi python=3.11' env and install the deps.\n"
    )

warnings.filterwarnings("ignore", category=UserWarning)

# ----------------------------
# FEATURE FLAGS (toggle heavy layers quickly)
# ----------------------------
DO_POP   = True   # population in 10-min walkshed
DO_HAND  = True   # DEM low-lying proxy + slope
DO_SMAP  = True   # SMAP soil moisture
DO_HEAT  = True   # MODIS/ECOSTRESS heat
DO_SOIL  = True   # Soil properties
DO_COUNTS= True   # Nearby counts (kept for popups/CSV; still not printed)

# ----------------------------
# SETTINGS
# ----------------------------
PLACE = "Narayanganj, Dhaka Division, Bangladesh"

# NDVI
NDVI_GREEN_MIN = 0.35

# Walking thresholds
T5 = 5 * 60
T10 = 10 * 60
WALK_MPS = 1.3

# OSM green tags
GREEN_TAGS = {
    "leisure": ["park", "garden"],
    "landuse": ["recreation_ground", "grass"],
    "natural": ["wood"],
}

EDGE_BUFFER_M = 25
TOP_N_CANDIDATES = 20

# GEE composite tries
DATE_TRIES = [
    ("2025-09-01", "2025-09-25", 20),
    ("2025-08-01", "2025-09-25", 40),
    ("2025-06-01", "2025-09-25", 80),
]

# Context radii / scales (tuned for speed)
SITE_BUFFER_M = 800
SITE_BUFFER_M_FALLBACK = 1200
WATER_STATS_RADIUS_M = 150

CURRENT_YEAR = date.today().year
HEAT_START = f"{CURRENT_YEAR}-04-01"
HEAT_END   = f"{CURRENT_YEAR}-06-30"

SMAP_DAYS = 30

# EE scales
POP_SCALE   = 200   # coarser than 100 m → faster population sums
DEM_SCALE   = 30
MODIS_SCALE = 1000
ECOS_SCALE  = 100
SOIL_SCALE  = 250
JRC_SCALE   = 30
SMAP_SCALE  = 9000

# DEM neighborhood for 5th percentile (HAND proxy)
HAND_RADIUS_M = 1500  # was 2000 → faster

# Output
DOWNLOADS = os.path.expanduser("~/Downloads")
OUT_HTML = os.path.join(DOWNLOADS, "narayanganj_green_access_ndvi_osm.html")
OUT_CSV  = os.path.join(DOWNLOADS, "narayanganj_site_context.csv")

# ----------------------------
# EE INIT
# ----------------------------
def ee_init_headless():
    sa = os.environ["EE_SERVICE_ACCOUNT"]       # ee-runner@<project>.iam.gserviceaccount.com
    key_b64 = os.environ["EE_KEY_B64"]          # base64 of the JSON key

    # Write key to a temp file
    with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
        f.write(base64.b64decode(key_b64).decode("utf-8"))
        key_path = f.name

    creds = ee.ServiceAccountCredentials(sa, key_path)
    ee.Initialize(credentials=creds)

# ----------------------------
# NDVI → green polygons
# ----------------------------
def choose_s2_composite(aoi_geom):
    for (start, end, cloud) in DATE_TRIES:
        s2sr = (ee.ImageCollection("COPERNICUS/S2_SR")
                .filterBounds(aoi_geom).filterDate(start, end)
                .filter(ee.Filter.lt("CLOUDY_PIXEL_PERCENTAGE", cloud)))
        if s2sr.size().getInfo() > 0:
            return s2sr.median(), f"S2_SR {start}..{end} cloud<{cloud}%"
        s2l1c = (ee.ImageCollection("COPERNICUS/S2")
                 .filterBounds(aoi_geom).filterDate(start, end)
                 .filter(ee.Filter.lt("CLOUDY_PIXEL_PERCENTAGE", cloud)))
        if s2l1c.size().getInfo() > 0:
            return s2l1c.median(), f"S2_L1C {start}..{end} cloud<{cloud}%"
    raise SystemExit("No recent Sentinel-2 scenes for AOI after fallbacks.")

def gee_green_polygons(aoi_geom, ndvi_min=NDVI_GREEN_MIN, scale=30, max_features=700):
    composite, desc = choose_s2_composite(aoi_geom)
    print("GEE composite picked:", desc)
    ndvi = composite.normalizedDifference(["B8", "B4"]).rename("NDVI")
    green_mask = ndvi.gte(ndvi_min).selfMask()
    vectors = green_mask.reduceToVectors(
        geometry=aoi_geom, scale=scale, geometryType="polygon",
        bestEffort=True, maxPixels=1e13
    ).limit(max_features)
    fc = vectors.getInfo()
    feats = fc.get("features", [])
    if not feats:
        return gpd.GeoDataFrame(geometry=[], crs="EPSG:4326")
    gdf = gpd.GeoDataFrame.from_features(feats, crs="EPSG:4326")
    return gdf[gdf.geometry.type.isin(["Polygon", "MultiPolygon"])].copy()

# ----------------------------
# EE helpers (batched reducers)
# ----------------------------
def ee_geom_from_shapely(geom):
    return ee.Geometry(geom.__geo_interface__)

def fc_from_buffers(ids, lonlats, radius_m):
    feats = []
    for cid, (lon, lat) in zip(ids, lonlats):
        feats.append(ee.Feature(ee.Geometry.Point([lon, lat]).buffer(radius_m), {"cid": int(cid)}))
    return ee.FeatureCollection(feats)

def fc_from_polys(ids, shapely_polys):
    feats = []
    for cid, shp in zip(ids, shapely_polys):
        if shp is None: 
            continue
        if isinstance(shp, (Polygon, MultiPolygon)):
            feats.append(ee.Feature(ee_geom_from_shapely(shp), {"cid": int(cid)}))
    return ee.FeatureCollection(feats)

def reduce_regions_to_dict(image, fc, reducer, scale):
    """Run reduceRegions and return dict: cid -> properties dict (including band-named keys)."""
    out = {}
    try:
        coll = image.reduceRegions(collection=fc, reducer=reducer, scale=scale)
        data = coll.getInfo().get("features", [])
        for f in data:
            props = f.get("properties", {})
            cid = int(props.get("cid"))
            out[cid] = props
    except Exception:
        pass
    return out

def first_number(props, preferred_keys=None):
    """Get a numeric value from a properties dict, preferring certain keys."""
    if not isinstance(props, dict):
        return None
    if preferred_keys:
        for k in preferred_keys:
            if k in props:
                try:
                    return float(props[k])
                except Exception:
                    pass
    for k, v in props.items():
        if k == "cid": 
            continue
        try:
            return float(v)
        except Exception:
            continue
    return None

# ----------------------------
# DATASETS (images)
# ----------------------------
def jrc_occurrence_img():
    try:
        return ee.Image("JRC/GSW1_4/GlobalSurfaceWater").select("occurrence")
    except Exception:
        return None

def soil_sources_images():
    out = []
    # SoilGrids 2020
    try:
        img = ee.Image("ISRIC/SoilGrids/2020")
        bands = img.bandNames().getInfo()
        def pick(tokens):
            for b in bands:
                bl = b.lower()
                if all(t in bl for t in tokens): return b
            return None
        bmap = {
            "soil_ph":   pick(("phh2o","0-5","mean")) or pick(("phh2o","0-5cm","mean")),
            "soil_clay": pick(("clay","0-5","mean"))  or pick(("clay","0-5cm","mean")),
            "soil_sand": pick(("sand","0-5","mean"))  or pick(("sand","0-5cm","mean")),
            "soil_soc":  pick(("soc","0-5","mean"))   or pick(("soc","0-5cm","mean")),
        }
        selects = [b for b in bmap.values() if b]
        if selects:
            out.append(("SoilGrids2020", img.select(selects)))
    except Exception:
        pass

    # SoilGrids 250m
    try:
        img = ee.Image("ISRIC/SoilGrids/250m")
        bands = img.bandNames().getInfo()
        def pick(tokens):
            for b in bands:
                bl = b.lower()
                if all(t in bl for t in tokens): return b
            return None
        bmap = {
            "soil_ph":   pick(("phh2o","0-5","mean")) or pick(("phh2o","0-5cm","mean")),
            "soil_clay": pick(("clay","0-5","mean"))  or pick(("clay","0-5cm","mean")),
            "soil_sand": pick(("sand","0-5","mean"))  or pick(("sand","0-5cm","mean")),
            "soil_soc":  pick(("soc","0-5","mean"))   or pick(("soc","0-5cm","mean")),
        }
        selects = [b for b in bmap.values() if b]
        if selects:
            out.append(("SoilGrids250m", img.select(selects)))
    except Exception:
        pass

    # OpenLandMap fallbacks
    try:
        ph   = ee.Image("OpenLandMap/SOL/SOL_PH-H2O_USDA-4C1A2A_M/v02")
        clay = ee.Image("OpenLandMap/SOL/SOL_TEXTURE-CLAY_USDA-3A1A1A_M/v02")
        sand = ee.Image("OpenLandMap/SOL/SOL_TEXTURE-SAND_USDA-3A1A1A_M/v02")
        soc  = ee.Image("OpenLandMap/SOL/SOL_ORGANIC-CARBON_USDA-6A1C_M/v02")
        out.append(("OpenLandMap", ph.addBands([clay, sand, soc])))
    except Exception:
        pass

    return out

def population_image(aoi):
    for yr in [2025, 2023, 2022, 2021, 2020, 2019]:
        try:
            col = ee.ImageCollection("WorldPop/GP/100m/pop").filterBounds(aoi).filter(ee.Filter.eq('year', yr))
            if col.size().getInfo() > 0:
                img = col.mosaic()
                bname = img.bandNames().getInfo()[0]
                return img.select(bname, ["pop"])
        except Exception:
            pass
    try:
        img = ee.Image("JRC/GHSL/P2019/POP_GLOBE_R2019A")
        bands = [b for b in img.bandNames().getInfo() if "2020" in b or "2015" in b]
        if bands:
            return img.select(bands[0], ["pop"])
    except Exception:
        pass
    try:
        col = ee.ImageCollection("CIESIN/GPWv411/GPW_Population_Count").filter(ee.Filter.eq("year", 2020))
        img = col.first()
        if img:
            b = img.bandNames().getInfo()[0]
            return img.select(b, ["pop"])
    except Exception:
        pass
    return None

def dem_and_slope():
    try:
        dem = ee.Image("COPERNICUS/DEM/GLO30")
    except Exception:
        dem = ee.Image("USGS/SRTMGL1_003")
    slope = ee.Terrain.slope(dem).rename("slope")
    return dem.rename("elevation"), slope

def heat_images(aoi):
    modis = None
    eco = None
    try:
        col = (ee.ImageCollection("MODIS/061/MOD11A2")
               .filterBounds(aoi).filterDate(HEAT_START, HEAT_END).select("LST_Day_1km"))
        if col.size().getInfo() > 0:
            modisK = col.mean()
            modis = modisK.multiply(0.02).subtract(273.15).rename("LST_modis_C")
    except Exception:
        pass
    try:
        col = (ee.ImageCollection("NASA/JPL/ECOSTRESS/L2_LSTE")
               .filterBounds(aoi).filterDate(HEAT_START, HEAT_END).select("LST"))
        if col.size().getInfo() > 0:
            ecoK = col.mean()
            eco = ecoK.subtract(273.15).rename("LST_eco_C")
    except Exception:
        pass
    return modis, eco

def smap_image(aoi):
    try:
        end = ee.Date(date.today().isoformat())
        start = end.advance(-SMAP_DAYS, 'day')
        col = (ee.ImageCollection("NASA/SMAP/SPL3SMP_E")
               .filterBounds(aoi).filterDate(start, end).select("soil_moisture"))
        if col.size().getInfo() == 0:
            return None
        return col.mean().rename("soil_moisture")
    except Exception:
        return None

# ----------------------------
# INTERPRETERS
# ----------------------------
def fmt_meters_and_walk(min_meters):
    if min_meters is None:
        return "unknown distance"
    minutes = (min_meters / WALK_MPS) / 60.0
    return f"~{round(min_meters):,} m (~{round(minutes,1)} min walk)"

def interpret_water_occurrence(p):
    if p is None:  return "No satellite evidence of open water nearby (or data unavailable)."
    if p < 5:      return "Very rarely wet — area is usually dry."
    if p < 20:     return "Occasionally wet — may pond during heavy rain."
    if p < 50:     return "Seasonally wet — expect water presence in some months."
    return "Frequently/permanently wet — likely near river/pond or flood-prone."

def interpret_ph(ph):
    if ph is None:       return "Soil pH unknown here."
    if ph < 5.5:         return f"Acidic (pH {ph}) — choose tolerant species."
    if ph <= 7.5:        return f"Near neutral (pH {ph}) — good for most plants."
    return f"Alkaline (pH {ph}) — choose tolerant species."

def interpret_texture(sand_pct, clay_pct):
    if sand_pct is None or clay_pct is None:
        return "Soil texture unknown."
    if sand_pct >= 60 and clay_pct < 20:
        return f"Sandy ({sand_pct}% sand) — drains fast; add organic matter."
    if clay_pct >= 35:
        return f"Clayey ({clay_pct}% clay) — slow drainage; raised beds help."
    return f"Loamy mix (sand {sand_pct}%, clay {clay_pct}%) — generally good."

def interpret_distance_to_water(d):
    if d is None: return "Distance to water unknown."
    if d < 100:   return "Very close to water (<100 m)."
    if d < 500:   return "Near water (100–500 m)."
    return "Far from water (>500 m)."

def interpret_density(building_pct, road_km_km2):
    msgs = []
    if building_pct is not None:
        if building_pct < 5:   msgs.append("very low building coverage")
        elif building_pct < 20:msgs.append("low building coverage")
        elif building_pct < 40:msgs.append("moderate building coverage")
        else:                  msgs.append("dense built-up surroundings")
    if road_km_km2 is not None:
        if road_km_km2 < 5:    msgs.append("sparse road network")
        elif road_km_km2 < 15: msgs.append("moderate road network")
        else:                  msgs.append("very dense road network")
    return "; ".join(msgs) if msgs else "Urban density unknown."

def interpret_hand_proxy(hm, slope_deg, gsw_mean):
    parts = []
    if hm is None:
        parts.append("Low-lying risk unknown")
    else:
        if hm < 1:       parts.append("Very low ground (likely ponding)")
        elif hm < 3:     parts.append("Low ground")
        elif hm < 7:     parts.append("Moderate elevation")
        else:            parts.append("High relative elevation")
    if slope_deg is not None:
        if slope_deg < 1: parts.append("very flat; slow drainage")
        elif slope_deg < 3:parts.append("gentle slope")
        else:              parts.append("noticeable slope")
    if gsw_mean is not None and gsw_mean >= 20:
        parts.append("historic water nearby")
    return " • ".join(parts)

def interpret_heat(modis_c, eco_c):
    val = eco_c if eco_c is not None else modis_c
    if val is None: return "Heat unknown."
    if val >= 42:   return f"Heat: very high (~{val}°C daytime LST)."
    if val >= 38:   return f"Heat: high (~{val}°C)."
    if val >= 34:   return f"Heat: moderate (~{val}°C)."
    return f"Heat: mild (~{val}°C)."

# ----------------------------
# OSM helpers
# ----------------------------
POI_CATEGORIES = {
    "schools":      ("amenity", ["school"]),
    "colleges":     ("amenity", ["college"]),
    "universities": ("amenity", ["university"]),
    "hospitals":    ("amenity", ["hospital"]),
    "clinics":      ("amenity", ["clinic"]),
    "pharmacies":   ("amenity", ["pharmacy"]),
    "markets":      ("amenity", ["marketplace"]),
    "libraries":    ("amenity", ["library"]),
    "community":    ("amenity", ["community_centre"]),
    "police":       ("amenity", ["police"]),
    "fire":         ("amenity", ["fire_station"]),
    "worship":      ("amenity", ["place_of_worship"]),
    "playgrounds":  ("leisure", ["playground"]),
    "sports":       ("leisure", ["sports_centre"]),
    "parks_gardens":("leisure", ["park", "garden"]),
    "supermarkets": ("shop", ["supermarket"]),
}

OSM_CONTEXT_TAGS = {
    "amenity": list({v for _, vs in [POI_CATEGORIES[k] for k in POI_CATEGORIES if POI_CATEGORIES[k][0] == "amenity"] for v in vs}),
    "leisure": list({v for _, vs in [POI_CATEGORIES[k] for k in POI_CATEGORIES if POI_CATEGORIES[k][0] == "leisure"] for v in vs}),
    "shop":    ["supermarket"],
    "building": True,
    "natural": ["water", "wetland", "wood"],
    "waterway": True,
    "landuse": ["residential", "commercial", "industrial", "retail", "recreation_ground", "grass"],
}

def fetch_osm_context(aoi_polygon):
    layers = []
    for k, v in OSM_CONTEXT_TAGS.items():
        try:
            g = osm_features_from_polygon(aoi_polygon, tags={k: v})
            if g is not None and not g.empty:
                layers.append(g)
        except Exception:
            pass
    if not layers:
        empty = gpd.GeoDataFrame(geometry=[], crs="EPSG:4326")
        return empty, empty, empty

    base_crs = getattr(layers[0], "crs", None) or "EPSG:4326"
    all_feats = gpd.GeoDataFrame(gpd.pd.concat(layers, ignore_index=True), crs=base_crs)
    all_feats = all_feats[all_feats.geometry.notna()].copy()

    is_water = (
        (all_feats.get("natural").isin(["water", "wetland"])) |
        (all_feats.get("waterway").notna())
    ).fillna(False)

    is_building = all_feats.get("building").notna().fillna(False)

    water_gdf = all_feats[is_water].copy()
    buildings_gdf = all_feats[is_building].copy()
    pois_gdf = all_feats[~is_water].copy()
    return pois_gdf.to_crs(epsg=4326), water_gdf.to_crs(epsg=4326), buildings_gdf.to_crs(epsg=4326)

def count_features_in_buffer(pois_proj, buffer_geom, key, values):
    if pois_proj is None or pois_proj.empty or key not in pois_proj.columns:
        return 0
    subset = pois_proj[pois_proj[key].isin(values)]
    if subset.empty:
        return 0
    try:
        idx = subset.sindex
        cand_idx = list(idx.intersection(buffer_geom.bounds))
        subset = subset.iloc[cand_idx]
    except Exception:
        minx, miny, maxx, maxy = buffer_geom.bounds
        subset = subset[
            (subset.geometry.bounds["maxx"] >= minx) &
            (subset.geometry.bounds["minx"] <= maxx) &
            (subset.geometry.bounds["maxy"] >= miny) &
            (subset.geometry.bounds["miny"] <= maxy)
        ]
    if subset.empty:
        return 0
    return int(subset.intersects(buffer_geom).sum())

def building_and_road_density(buildings_proj, edges_proj, buffer_geom):
    try:
        area_m2 = float(buffer_geom.area)
        bldg_pct = None
        road_density = None

        if buildings_proj is not None and not buildings_proj.empty:
            try:
                idx = buildings_proj.sindex
                cand_idx = list(idx.intersection(buffer_geom.bounds))
                bsub = buildings_proj.iloc[cand_idx]
            except Exception:
                bsub = buildings_proj
            bsub = bsub[bsub.geometry.intersects(buffer_geom)]
            if not bsub.empty:
                inter = bsub.geometry.intersection(buffer_geom)
                built_area = float(inter.area.sum())
                if area_m2 > 0:
                    bldg_pct = round(100.0 * built_area / area_m2, 1)

        if edges_proj is not None and not edges_proj.empty:
            try:
                idx = edges_proj.sindex
                cand_idx = list(idx.intersection(buffer_geom.bounds))
                esub = edges_proj.iloc[cand_idx]
            except Exception:
                esub = edges_proj
            esub = esub[esub.geometry.intersects(buffer_geom)]
            if not esub.empty:
                ilen = esub.geometry.intersection(buffer_geom).length.sum()  # meters
                km = float(ilen) / 1000.0
                km2 = area_m2 / 1e6
                if km2 > 0:
                    road_density = round(km / km2, 1)
        return bldg_pct, road_density
    except Exception:
        return None, None

# ----------------------------
# CORE LOGIC
# ----------------------------
def line_midpoint(geom: LineString):
    try:
        return geom.interpolate(0.5, normalized=True)
    except Exception:
        if geom.geom_type == "LineString" and len(geom.coords) >= 2:
            (x1, y1), (x2, y2) = geom.coords[0], geom.coords[-1]
            return Point((x1 + x2) / 2.0, (y1 + y2) / 2.0)
        return geom.centroid

def make_iso_polygon(edges_subset, buffer_m=EDGE_BUFFER_M):
    if edges_subset is None or edges_subset.empty:
        return None
    buffered = edges_subset.geometry.buffer(buffer_m)
    merged = unary_union(list(buffered.values))
    return gpd.GeoSeries([merged], crs=edges_subset.crs)

def edges_within_time(Gp, edges_gdf, source_node, cutoff_s):
    times = nx.single_source_dijkstra_path_length(Gp, source=source_node, cutoff=cutoff_s, weight="time_s")
    nodes_in = set(times.keys())
    mask = edges_gdf.apply(lambda r: (r["u"] in nodes_in) or (r["v"] in nodes_in), axis=1)
    return edges_gdf[mask].copy(), times

# ----------------------------
# MAIN
# ----------------------------
def main():
    ee_init_headless()

    # Soil sources & images
    soil_imgs = soil_sources_images() if DO_SOIL else []
    dem_img, slope_img = dem_and_slope() if DO_HAND else (None, None)

    # OSMnx
    ox.settings.log_console = True
    ox.settings.use_cache = True
    ox.settings.timeout = 180

    print("Geocoding AOI…")
    aoi = ox.geocode_to_gdf(PLACE)
    if aoi.empty:
        raise SystemExit("Could not geocode the AOI name.")
    aoi_polygon = aoi.geometry.iloc[0]
    aoi_bounds = aoi.to_crs(epsg=4326).total_bounds
    gee_aoi = ee.Geometry.Rectangle(list(aoi_bounds))

    print("Downloading pedestrian network…")
    G = ox.graph_from_polygon(aoi_polygon, network_type="walk", simplify=True)

    print("Projecting graph to local metric CRS…")
    Gp = ox.project_graph(G)
    nodes_gdf, edges_gdf = ox.graph_to_gdfs(Gp)
    if "u" not in edges_gdf.columns or "v" not in edges_gdf.columns:
        edges_gdf = edges_gdf.reset_index()
    graph_crs = nodes_gdf.crs

    # OSM green areas
    print("Downloading OSM green areas…")
    green_layers = []
    for k, v in GREEN_TAGS.items():
        try:
            g = osm_features_from_polygon(aoi_polygon, tags={k: v})
            if g is not None and not g.empty:
                green_layers.append(g)
        except Exception:
            pass
    osm_greens = None
    if green_layers:
        base_crs = getattr(green_layers[0], "crs", None) or "EPSG:4326"
        osm_greens = gpd.GeoDataFrame(gpd.pd.concat(green_layers, ignore_index=True), crs=base_crs)
        osm_greens = osm_greens[osm_greens.geometry.type.isin(["Polygon", "MultiPolygon"])].copy()

    # NDVI greens
    print("Vectorizing NDVI green polygons (GEE)…")
    ndvi_greens = gee_green_polygons(gee_aoi, ndvi_min=NDVI_GREEN_MIN, scale=30, max_features=700)

    greens_list = []
    if osm_greens is not None and not osm_greens.empty:
        greens_list.append(osm_greens.to_crs(epsg=4326))
    if ndvi_greens is not None and not ndvi_greens.empty:
        greens_list.append(ndvi_greens.to_crs(epsg=4326))
    if not greens_list:
        raise SystemExit("No green polygons found from OSM or NDVI.")

    greens_all = gpd.GeoDataFrame(gpd.pd.concat(greens_list, ignore_index=True), crs="EPSG:4326")
    print(f"Green polygons: OSM={0 if osm_greens is None else len(osm_greens)} | NDVI={len(ndvi_greens)} | merged={len(greens_all)}")

    # Project greens
    greens_poly_proj = greens_all.to_crs(graph_crs)

    # Destination nodes from green centroids
    print("Computing destination nodes from green centroids…")
    greens_poly_proj["centroid"] = greens_poly_proj.geometry.centroid
    dest_nodes = set()
    for c in greens_poly_proj["centroid"]:
        try:
            dest_nodes.add(ox.distance.nearest_nodes(Gp, X=c.x, Y=c.y))
        except Exception:
            pass
    if not dest_nodes:
        raise SystemExit("No destination nodes from green centroids.")

    # Edge costs
    print("Assigning time costs to edges…")
    for u, v, k, data in Gp.edges(keys=True, data=True):
        length_m = float(data.get("length", 0.0)) or 0.0
        data["time_s"] = length_m / WALK_MPS

    # Multi-source Dijkstra (reverse trick)
    print("Running multi-source shortest path (Dijkstra)…")
    Gr = Gp.reverse()
    min_time_s = nx.multi_source_dijkstra_path_length(Gr, sources=list(dest_nodes), weight="time_s")

    def covered_by_threshold(u, v, threshold_s):
        tu = min_time_s.get(u, math.inf); tv = min_time_s.get(v, math.inf)
        return (tu <= threshold_s) or (tv <= threshold_s)

    def both_beyond_10(u, v):
        return (min_time_s.get(u, math.inf) > T10) and (min_time_s.get(v, math.inf) > T10)

    print("Classifying edges by coverage…")
    edges_gdf["covered_5min"] = edges_gdf.apply(lambda r: covered_by_threshold(r["u"], r["v"], T5), axis=1)
    edges_gdf["covered_10min"] = edges_gdf.apply(lambda r: covered_by_threshold(r["u"], r["v"], T10), axis=1)
    edges_gdf["uncovered_10min"] = edges_gdf.apply(lambda r: both_beyond_10(r["u"], r["v"]), axis=1)
    uncovered = edges_gdf[edges_gdf["uncovered_10min"]].copy()
    print(f"Uncovered road segments >10 min: {len(uncovered)}")

    # Isochrones (for display)
    print("Building isochrone polygons…")
    iso5_edges = edges_gdf[edges_gdf["covered_5min"]]
    iso10_edges = edges_gdf[edges_gdf["covered_10min"]]
    iso5_poly = make_iso_polygon(iso5_edges, buffer_m=EDGE_BUFFER_M)
    iso10_poly = make_iso_polygon(iso10_edges, buffer_m=EDGE_BUFFER_M)

    # Candidates: midpoints of longest uncovered segments (dedup)
    print("Selecting candidate micro-park points…")
    uncovered["length_m"] = uncovered.geometry.length
    candidates = uncovered.sort_values("length_m", ascending=False).head(3 * TOP_N_CANDIDATES).copy()
    candidates["midpt"] = candidates.geometry.apply(line_midpoint)
    cand_proj = gpd.GeoDataFrame(geometry=candidates["midpt"], crs=edges_gdf.crs)
    cand_wgs84 = cand_proj.to_crs(epsg=4326)  # <-- FIX: transform the GeoDataFrame (not Points)
    cand_wgs84["xy_round"] = cand_wgs84.geometry.apply(lambda g: (round(g.x, 6), round(g.y, 6)))
    cand_wgs84 = cand_wgs84.drop_duplicates(subset="xy_round").head(TOP_N_CANDIDATES).copy()
    cand_proj = cand_wgs84.to_crs(edges_gdf.crs)[["geometry"]].copy()  # keep projected too

    # OSM context
    print("Downloading OSM context layers…")
    pois_wgs84, water_wgs84, buildings_wgs84 = fetch_osm_context(aoi_polygon)
    pois_proj = pois_wgs84.to_crs(graph_crs) if not pois_wgs84.empty else pois_wgs84
    water_proj = water_wgs84.to_crs(graph_crs) if not water_wgs84.empty else water_wgs84
    buildings_proj = buildings_wgs84.to_crs(graph_crs) if not buildings_wgs84.empty else buildings_wgs84
    water_union = unary_union(list(water_proj.geometry.values)) if (water_proj is not None and not water_proj.empty) else None

    # Assemble candidate basics
    ids = list(range(1, len(cand_proj) + 1))
    # Use the already-transformed WGS84 points for lon/lat lists  <-- FIXED
    lonlats = [(pt.x, pt.y) for pt in cand_wgs84.geometry]
    nearest_nodes = [ox.distance.nearest_nodes(Gp, X=geom.x, Y=geom.y) for geom in cand_proj.geometry]

    # Build 10-min isochrone polygons once (local; 20 sites → OK)
    iso_polys = []
    for nid in nearest_nodes:
        edges_iso, _times = edges_within_time(Gp, edges_gdf, nid, T10)
        iso_polys.append(make_iso_polygon(edges_iso, buffer_m=EDGE_BUFFER_M).iloc[0] if (edges_iso is not None and not edges_iso.empty) else None)

    # ----------------------------
    # BATCHED EE CALLS
    # ----------------------------
    print("\nComputing batched metrics (EE)…")

    # Population (sum over isochrone polygons)
    pop_results = {}
    if DO_POP:
        pop_img = population_image(gee_aoi)
        if pop_img is not None:
            fc_iso = fc_from_polys(ids, iso_polys)
            pop_results = reduce_regions_to_dict(
                image=pop_img, fc=fc_iso, reducer=ee.Reducer.sum(), scale=POP_SCALE
            )
        else:
            print("Population raster not available; walkshed population will be 'n/a'.")

    # JRC water (mean and max in 150 m) — do as two simple batched calls (robust)
    jrc_mean_results = {}
    jrc_max_results = {}
    occ = jrc_occurrence_img()
    if occ is not None:
        fc_water = fc_from_buffers(ids, lonlats, WATER_STATS_RADIUS_M)
        jrc_mean_results = reduce_regions_to_dict(
            image=occ, fc=fc_water, reducer=ee.Reducer.mean(), scale=JRC_SCALE
        )
        jrc_max_results = reduce_regions_to_dict(
            image=occ, fc=fc_water, reducer=ee.Reducer.max(), scale=JRC_SCALE
        )

    # Soil (buffered mean 150 m) with fallbacks (run per source, but batched)
    soil_results = {}
    if DO_SOIL and soil_imgs:
        fc_soil = fc_from_buffers(ids, lonlats, 150)
        for label, img in soil_imgs:
            tmp = reduce_regions_to_dict(
                image=img, fc=fc_soil, reducer=ee.Reducer.mean(), scale=SOIL_SCALE
            )
            for cid in ids:
                if cid in tmp and cid not in soil_results:
                    # store label + properties dict
                    soil_results[cid] = (label, tmp[cid])
    for cid in ids:
        if cid not in soil_results:
            soil_results[cid] = (None, {})

    # DEM low-lying proxy & slope (batched)
    hand_results = {}
    slope_results = {}
    if DO_HAND and dem_img is not None and slope_img is not None:
        kernel = ee.Kernel.circle(HAND_RADIUS_M, 'meters')
        try:
            p5 = dem_img.reduceNeighborhood(ee.Reducer.percentile([5]), kernel)
            hand_img = dem_img.subtract(p5).rename("hand_proxy")
            fc_hand = fc_from_buffers(ids, lonlats, 30)   # small buffer
            fc_slope = fc_from_buffers(ids, lonlats, 60)  # slope a bit larger
            hand_results = reduce_regions_to_dict(
                image=hand_img, fc=fc_hand, reducer=ee.Reducer.mean(), scale=DEM_SCALE
            )
            slope_results = reduce_regions_to_dict(
                image=slope_img, fc=fc_slope, reducer=ee.Reducer.mean(), scale=DEM_SCALE
            )
        except Exception:
            pass

    # Heat (batched)
    heat_modis = {}
    heat_eco = {}
    if DO_HEAT:
        modis_img, eco_img = heat_images(gee_aoi)
        fc_heat = fc_from_buffers(ids, lonlats, 300)
        if modis_img is not None:
            heat_modis = reduce_regions_to_dict(
                image=modis_img, fc=fc_heat, reducer=ee.Reducer.mean(), scale=MODIS_SCALE
            )
        if eco_img is not None:
            heat_eco = reduce_regions_to_dict(
                image=eco_img, fc=fc_heat, reducer=ee.Reducer.mean(), scale=ECOS_SCALE
            )

    # SMAP (batched)
    smap_results = {}
    if DO_SMAP:
        smap_img = smap_image(gee_aoi)
        if smap_img is not None:
            fc_smap = fc_from_buffers(ids, lonlats, 800)  # trimmed buffer
            smap_results = reduce_regions_to_dict(
                image=smap_img, fc=fc_smap, reducer=ee.Reducer.mean(), scale=SMAP_SCALE
            )

    # ----------------------------
    # Local OSM-derived metrics (fast w/ sindex)
    # ----------------------------
    print("Computing local OSM metrics…")
    all_counts = []
    all_build_road = []
    for cid, proj_geom in zip(ids, cand_proj.geometry):
        radius_used = SITE_BUFFER_M
        buf = proj_geom.buffer(radius_used)

        def compute_counts(buffer_geom):
            counts_local = {}
            if not DO_COUNTS or pois_proj is None or pois_proj.empty:
                for label in POI_CATEGORIES: counts_local[label] = 0
                return counts_local
            for label, (key, values) in POI_CATEGORIES.items():
                counts_local[label] = count_features_in_buffer(pois_proj, buffer_geom, key, values)
            return counts_local

        counts = compute_counts(buf)
        if DO_COUNTS and sum(counts.values()) == 0 and SITE_BUFFER_M_FALLBACK and SITE_BUFFER_M_FALLBACK > SITE_BUFFER_M:
            radius_used = SITE_BUFFER_M_FALLBACK
            buf = proj_geom.buffer(radius_used)
            counts = compute_counts(buf)

        bldg_pct, road_density = building_and_road_density(buildings_proj, edges_gdf, buf)
        all_counts.append((radius_used, counts))
        all_build_road.append((bldg_pct, road_density))

    # ----------------------------
    # Assemble per-candidate results
    # ----------------------------
    print("\nBuilding Folium map…")
    aoi_latlon = aoi.to_crs(epsg=4326)
    center = [aoi_latlon.geometry.iloc[0].centroid.y, aoi_latlon.geometry.iloc[0].centroid.x]

    edges_latlon = edges_gdf.to_crs(epsg=4326)
    uncovered_latlon = uncovered.to_crs(epsg=4326)
    greens_latlon = greens_poly_proj.to_crs(epsg=4326)
    cand_latlon_final = cand_proj.to_crs(epsg=4326)  # for plotting markers
    iso5_latlon = iso5_poly.to_crs(epsg=4326) if iso5_poly is not None else None
    iso10_latlon = iso10_poly.to_crs(epsg=4326) if iso10_poly is not None else None

    m = folium.Map(location=center, zoom_start=12, control_scale=True, tiles="cartodbpositron")
    folium.GeoJson(
        greens_latlon[["geometry"]],
        name=f"Green areas (OSM + NDVI≥{NDVI_GREEN_MIN:.2f})",
        style_function=lambda _: {"color": "#2e7d32", "weight": 1, "fillColor": "#66bb6a", "fillOpacity": 0.35},
    ).add_to(m)
    if iso10_latlon is not None:
        folium.GeoJson(iso10_latlon.__geo_interface__, name="Within 10 min of green",
                       style_function=lambda _: {"color": "#ff9800", "weight": 1, "fillColor": "#ffcc80", "fillOpacity": 0.25}).add_to(m)
    if iso5_latlon is not None:
        folium.GeoJson(iso5_latlon.__geo_interface__, name="Within 5 min of green",
                       style_function=lambda _: {"color": "#1976d2", "weight": 1, "fillColor": "#90caf9", "fillOpacity": 0.25}).add_to(m)
    folium.GeoJson(
        uncovered_latlon[["geometry"]],
        name="Road segments beyond 10 min (need green access)",
        style_function=lambda _: {"color": "#e53935", "weight": 2, "opacity": 0.9},
    ).add_to(m)

    description = ""

    description += ("\n================= Candidate Site Context =================\n")
    summary_rows = []

    for idx, (cid, latlon_geom, proj_geom) in enumerate(zip(ids, cand_latlon_final.geometry, cand_proj.geometry), start=1):
        lat, lon = latlon_geom.y, latlon_geom.x

        # Lookups from batched dicts
        pop_val = first_number(pop_results.get(cid, {}), ["pop", "sum"]) if DO_POP else None

        gsw_mean = first_number(jrc_mean_results.get(cid, {}), ["occurrence", "mean"])
        gsw_max  = first_number(jrc_max_results.get(cid, {}), ["occurrence", "max"])

        soil_label, soil_props = soil_results.get(cid, (None, {}))
        def _get(d, keys):
            for k in d.keys():
                lk = k.lower()
                if all(t in lk for t in keys):
                    try:
                        return float(d[k])
                    except Exception:
                        pass
            return None
        soil_ph   = round(_get(soil_props, ("ph",))        , 2) if _get(soil_props, ("ph",))         is not None else None
        soil_clay = round(_get(soil_props, ("clay",))      , 1) if _get(soil_props, ("clay",))       is not None else None
        soil_sand = round(_get(soil_props, ("sand",))      , 1) if _get(soil_props, ("sand",))       is not None else None
        soil_soc  = round(_get(soil_props, ("org","carb")) , 1) if _get(soil_props, ("org","carb"))  is not None else None
        if soil_ph is None and soil_clay is None and soil_sand is None and soil_soc is None:
            soil_label = None

        hand_val  = first_number(hand_results.get(cid, {}), ["hand_proxy", "mean"]) if DO_HAND else None
        slope_val = first_number(slope_results.get(cid, {}), ["slope", "mean"]) if DO_HAND else None
        modis_c   = first_number(heat_modis.get(cid, {}), ["LST_modis_C", "mean"]) if DO_HEAT else None
        eco_c     = first_number(heat_eco.get(cid, {}), ["LST_eco_C", "mean"]) if DO_HEAT else None
        smap_sm   = first_number(smap_results.get(cid, {}), ["soil_moisture", "mean"]) if DO_SMAP else None

        # Distance to OSM water
        dist_to_water_m = None
        if water_union is not None:
            try:
                dist_to_water_m = round(float(proj_geom.distance(water_union)), 1)
            except Exception:
                pass

        # Counts & urban form (local)
        radius_used, counts = all_counts[idx-1]
        bldg_pct, road_density = all_build_road[idx-1]

        # Interpretations
        water_distance_msg = f"{interpret_distance_to_water(dist_to_water_m)} — {fmt_meters_and_walk(dist_to_water_m)}."
        water_occ_msg = interpret_water_occurrence(gsw_mean)
        ph_msg = interpret_ph(soil_ph)
        texture_msg = interpret_texture(soil_sand, soil_clay)
        density_msg = interpret_density(bldg_pct, road_density)
        heat_msg = interpret_heat(modis_c, eco_c)
        hand_msg = interpret_hand_proxy(hand_val, slope_val, gsw_mean)

        # ---- CLEAN CONSOLE OUTPUT (no nearby-counts line) ----
        description += (f"\n📍 Candidate #{cid}  (Lat, Lon: {lat:.6f}, {lon:.6f})\n")
        if DO_POP and pop_val is not None:
            description += (f"  People within a 10-min walk (estimated): ~{int(pop_val):,}\n")
        description += (f"  Water: nearest mapped water is {fmt_meters_and_walk(dist_to_water_m)}. {water_distance_msg}\n")
        description += (f"  Water presence (satellite history ≤{WATER_STATS_RADIUS_M} m): mean {gsw_mean}, max {gsw_max}. {water_occ_msg}\n")
        description += (f"  Soil (0–5 cm): pH={soil_ph} (src: {soil_label}), clay%={soil_clay}, sand%={soil_sand}, SOC g/kg={soil_soc}.\n")
        description += (f"  ↳ {ph_msg}  |  {texture_msg}\n")
        if DO_HAND:
            description += (f"  Terrain: HAND-proxy {hand_val} m; slope ~{slope_val}°. {hand_msg}\n")
        if DO_HEAT:
            description += (f"  Heat (Apr–Jun): MODIS≈{modis_c}°C"
                  f"{' | ECOSTRESS≈'+str(eco_c)+'°C' if eco_c is not None else ''}. {heat_msg}\n")
        if DO_SMAP and smap_sm is not None:
            description += (f"  Soil moisture (SMAP {SMAP_DAYS}-day mean): {smap_sm} m³/m³\n")
        if bldg_pct is not None or road_density is not None:
            description += (f"  Urban form: building cover ~{bldg_pct if bldg_pct is not None else 'n/a'}% "
                  f"& roads ~{road_density if road_density is not None else 'n/a'} km/km² → {density_msg}\n")

        print(description.strip())

        llm_res = groq_api.inference(description)
        llm_res_dict = groq_api.parse_response(llm_res)
        if not llm_res_dict:
            print("  (Empty from parser.)\n")
        # Map popup (keeps counts)
        popup_html = f"""
        <div style='font-size:12px;line-height:1.35'>
          <i>Lat, Lon:</i> {lat:.6f}, {lon:.6f}<br>
          <b>People within 10-min walk:</b> {('{:,}'.format(int(pop_val)) if (DO_POP and pop_val is not None) else 'n/a')}<br>
          <b>Nearby (≤{radius_used} m)</b><br>
          schools:{counts.get('schools',0)} · colleges:{counts.get('colleges',0)} · universities:{counts.get('universities',0)}<br>
          hospitals:{counts.get('hospitals',0)} · clinics:{counts.get('clinics',0)} · pharmacies:{counts.get('pharmacies',0)}<br>
          markets:{counts.get('markets',0)} · worship:{counts.get('worship',0)} · supermarkets:{counts.get('supermarkets',0)}<br>
          playgrounds:{counts.get('playgrounds',0)} · sports:{counts.get('sports',0)} · parks/gardens:{counts.get('parks_gardens',0)}<br>
          <b>Water</b><br>
          nearest: {fmt_meters_and_walk(dist_to_water_m)}<br>
          JRC occurrence (≤{WATER_STATS_RADIUS_M} m, mean/max): {gsw_mean} / {gsw_max}<br>
          <i>{water_occ_msg}</i><br>
          <b>Soil (0–5 cm)</b><br>
          pH: {soil_ph} (src: {soil_label}) — <i>{ph_msg}</i><br>
          texture: sand {soil_sand}% / clay {soil_clay}% — <i>{texture_msg}</i><br>
          SOC: {soil_soc} g/kg<br>
          <b>Terrain</b><br>
          HAND-proxy: {hand_val} m · slope: {slope_val}°<br>
          <i>{hand_msg}</i><br>
          <b>Heat (Apr–Jun)</b><br>
          MODIS: {modis_c}°C{(' · ECOSTRESS: '+str(eco_c)+'°C') if eco_c is not None else ''}<br>
          <i>{heat_msg}</i><br>
          <b>Soil moisture</b><br>
          SMAP {SMAP_DAYS}-day mean: {smap_sm} m³/m³<br>
          <b>Urban form</b><br>
          buildings ~{bldg_pct if bldg_pct is not None else 'n/a'}% · roads ~{road_density if road_density is not None else 'n/a'} km/km²<br>
          <i>{density_msg}</i>
        </div>
        """
        folium.CircleMarker(
            location=(lat, lon), radius=6, color="#2962FF",
            fill=True, fill_color="#2962FF", fill_opacity=0.95,
            popup=folium.Popup(popup_html, max_width=380),
        ).add_to(m)

        # CSV row
        row = {
            "candidate_id": cid,
            "lat": lat, "lon": lon,
            "radius_used_m": radius_used,
            "people_walk10": int(pop_val) if (DO_POP and pop_val is not None) else None,
            "dist_to_osm_water_m": dist_to_water_m,
            "gsw_occ_mean": gsw_mean, "gsw_occ_max": gsw_max,
            "soil_ph_0_5cm": soil_ph, "soil_clay_pct_0_5cm": soil_clay,
            "soil_sand_pct_0_5cm": soil_sand, "soil_soc_gkg_0_5cm": soil_soc,
            "soil_source": soil_label,
            "hand_proxy_m": hand_val, "slope_deg": slope_val,
            "modis_lst_C": modis_c, "ecostress_lst_C": eco_c,
            "smap_sm_m3m3": smap_sm,
            "building_cover_pct": bldg_pct, "road_km_per_km2": road_density,
        }
        for k in POI_CATEGORIES.keys():
            row[f"cnt_{k}__{radius_used}m"] = counts.get(k, 0)
        summary_rows.append(row)

    # Legend & controls
    legend_html = f"""
    <div style="position: fixed; bottom: 18px; left: 18px; z-index:9999; background: white;
                padding: 10px 12px; border: 1px solid #ccc; border-radius: 6px; font-size: 13px;">
      <b>Legend</b><br>
      <span style="display:inline-block;width:12px;height:12px;background:#66bb6a;border:1px solid #2e7d32;"></span>
      Green areas (OSM + NDVI≥{NDVI_GREEN_MIN:.2f})<br>
      <span style="display:inline-block;width:12px;height:12px;background:#ffcc80;border:1px solid #ff9800;"></span>
      ≤ 10 min walk<br>
      <span style="display:inline-block;width:12px;height:12px;background:#90caf9;border:1px solid #1976d2;"></span>
      ≤ 5 min walk<br>
      <span style="display:inline-block;width:18px;height:2px;background:#e53935;vertical-align:middle;display:inline-block;"></span>
      Uncovered roads (> 10 min)<br>
      <span style="display:inline-block;width:12px;height:12px;background:#2962FF;border:1px solid #2962FF;"></span>
      Candidate micro-park
    </div>
    """
    m.get_root().html.add_child(folium.Element(legend_html))
    folium.LayerControl(collapsed=False).add_to(m)

    # Save map
    out_cwd = "narayanganj_green_access_ndvi_osm.html"
    m.save(out_cwd)
    print(f"\n✅ Saved map in current folder: {out_cwd}")
    try:
        os.makedirs(DOWNLOADS, exist_ok=True)
        m.save(OUT_HTML)
        print(f"✅ Also saved to: {OUT_HTML}")
    except Exception as e:
        print("Could not save to ~/Downloads:", e)

    # Save CSV
    try:
        df = gpd.pd.DataFrame(summary_rows)
        df.to_csv(OUT_CSV, index=False)
        print(f"✅ Site context CSV saved to: {OUT_CSV}")
    except Exception as e:
        print("Could not write CSV:", e)

    print("\nDone.")

# ----------------------------
# OSMNX COMPAT HELPERS IMPORT
# ----------------------------
try:
    from osmnx.features import features_from_polygon as osm_features_from_polygon
except Exception:
    try:
        from osmnx import geometries_from_polygon as osm_features_from_polygon
    except Exception:
        raise SystemExit("Your osmnx version is missing polygon geometries. Please: pip install --upgrade osmnx.")

if __name__ == "__main__":
    main()


Geocoding AOI…
Downloading pedestrian network…


  return _nominatim_request(params=params, request_type=request_type)
  overpass_settings = _make_overpass_settings()
  yield _overpass_request(data={"data": query_str})


Projecting graph to local metric CRS…
Downloading OSM green areas…


  overpass_settings = _make_overpass_settings()
  yield _overpass_request(data={"data": query_str})
  overpass_settings = _make_overpass_settings()
  yield _overpass_request(data={"data": query_str})
  overpass_settings = _make_overpass_settings()
  yield _overpass_request(data={"data": query_str})


Vectorizing NDVI green polygons (GEE)…



Attention required for COPERNICUS/S2_SR! You are using a deprecated asset.
To make sure your code keeps working, please update it.
Learn more: https://developers.google.com/earth-engine/datasets/catalog/COPERNICUS_S2_SR


Attention required for COPERNICUS/S2! You are using a deprecated asset.
To make sure your code keeps working, please update it.
Learn more: https://developers.google.com/earth-engine/datasets/catalog/COPERNICUS_S2



GEE composite picked: S2_SR 2025-06-01..2025-09-25 cloud<80%
Green polygons: OSM=25 | NDVI=700 | merged=725
Computing destination nodes from green centroids…
Assigning time costs to edges…
Running multi-source shortest path (Dijkstra)…
Classifying edges by coverage…
Uncovered road segments >10 min: 32884
Building isochrone polygons…
Selecting candidate micro-park points…
Downloading OSM context layers…


  overpass_settings = _make_overpass_settings()
  yield _overpass_request(data={"data": query_str})
  this_pause = _get_overpass_pause(overpass_endpoint)
  overpass_settings = _make_overpass_settings()
  yield _overpass_request(data={"data": query_str})
  this_pause = _get_overpass_pause(overpass_endpoint)
  overpass_settings = _make_overpass_settings()
  yield _overpass_request(data={"data": query_str})
  this_pause = _get_overpass_pause(overpass_endpoint)
  overpass_settings = _make_overpass_settings()
  yield _overpass_request(data={"data": query_str})
  this_pause = _get_overpass_pause(overpass_endpoint)
  overpass_settings = _make_overpass_settings()
  yield _overpass_request(data={"data": query_str})
  this_pause = _get_overpass_pause(overpass_endpoint)
  overpass_settings = _make_overpass_settings()
  yield _overpass_request(data={"data": query_str})
  this_pause = _get_overpass_pause(overpass_endpoint)
  overpass_settings = _make_overpass_settings()
  yield _overpass_request(da


Computing batched metrics (EE)…
Population raster not available; walkshed population will be 'n/a'.
Computing local OSM metrics…

Building Folium map…


📍 Candidate #1  (Lat, Lon: 23.864477, 90.557852)
  Water: nearest mapped water is ~262 m (~3.4 min walk). Near water (100–500 m). — ~262 m (~3.4 min walk).
  Water presence (satellite history ≤150 m): mean 1.2826086956521743, max 2.0. Very rarely wet — area is usually dry.
  Soil (0–5 cm): pH=None (src: None), clay%=None, sand%=None, SOC g/kg=None.
  ↳ Soil pH unknown here.  |  Soil texture unknown.
  Terrain: HAND-proxy None m; slope ~None°. Low-lying risk unknown
  Heat (Apr–Jun): MODIS≈28.974218750000034°C. Heat: mild (~28.974218750000034°C).
  Urban form: building cover ~n/a% & roads ~2.7 km/km² → sparse road network

📍 Candidate #2  (Lat, Lon: 23.762769, 90.507250)
  Water: nearest mapped water is ~45 m (~0.6 min walk). Very close to water (<100 m). — ~45 m (~0.6 min walk).
  Water presence (satellite history ≤150 m): mean 62.556

## Data Sources 

* **Greenery (NDVI)**
  From **Sentinel-2** (Copernicus/ESA). The MSI sensor has **13 bands** at 10–60 m; NDVI is computed from the **red** (`B4`) and **NIR** (`B8`) bands.
  *Source: European Space Agency.*

* **Water presence (“JRC water occurrence”)**
  From the **JRC Global Surface Water** dataset, derived from ~30 m **Landsat** imagery (1984 → present). The “occurrence” score is the **% of years a pixel appeared as water**.
  *Source: Google for Developers (JRC GSW).*

* **Soil properties (pH, sand/clay, SOC)**
  From **ISRIC SoilGrids**. These are **machine-learning predictions** built from ~230k soil profiles plus many covariates (including remote sensing); it’s not a single satellite sensor. We sample the **0–5 cm** layer.
  *Source: ISRIC.*

* **“Traffic” vs what we really compute**
  The code reports **road network density (km/km²)** from **OpenStreetMap** geometry near each site. That’s a **built-environment proxy**, **not live traffic**. (Live traffic requires other data sources/APIs.)

* **Population exposure**
  **WorldPop (~100 m)** and/or **GHSL POP** to estimate **how many people** benefit within a walk-shed. Both are **gridded population surfaces**.

* **DEM-based indices (e.g., HAND)**
  Using **Copernicus DEM GLO-30** or **MERIT DEM** to flag **low-lying / flood-prone** spots; combine with **JRC water history**.

* **Soil moisture**
  **SMAP** for **surface wetness background**.

* **Heat (MODIS/ECOSTRESS)**
  Compute **average summer daytime LST** around each candidate and add a **“heat score.”**


# Air Quality

In [None]:
# narayanganj_aq_hotspots_readable.py
# Hotspots → Top-3 area-wise clusters with concave envelopes + summaries
# (population, plain-language AQ severity + seasonality, sensitive sites inside,
# named industries/point sources inside). Speed-optimized.

import markdown
from models.llms import groq_api

import os, tempfile, base64
import math
from datetime import date, timedelta

import ee
import folium
from folium.plugins import MiniMap, Fullscreen, MousePosition, MeasureControl
from shapely.geometry import Point, MultiPoint, box
from shapely.ops import unary_union

import re
import dotenv

dotenv.load_dotenv()

# OSM / Geo deps
try:
    import osmnx as ox
    import geopandas as gpd
except Exception as e:
    raise SystemExit(
        f"Import error: {e}\nInstall: pip install osmnx geopandas rtree\n"
        "If NumPy 2.x issues: pip install 'numpy<2' && reinstall geopandas shapely pyproj fiona rtree"
    )

# ------------------ CONFIG ------------------
AOI_BBOX = [90.32, 23.70, 90.52, 23.86]  # (W,S,E,N)

DAYS_BACK = 60
END = date.today()
START = END - timedelta(days=DAYS_BACK)

# EE sampling scale + limits (tuned for speed)
SCALE_M = 1200
MAX_POINTS = 3000
MAX_HOTSPOTS = 120
EE_TILE_SCALE = 4

# Weights for combined AQ index (z-space)
W_NO2 = 0.6
W_PM25 = 0.6
W_CO  = 0.3

# Hotspot selection
Z_THRESHOLD = 1.0
PCTL_THRESHOLD = 85.0

# Clustering
EPS_METERS = 1500.0
MIN_SAMPLES = 6

# Concave envelope controls
ALPHA_M = 1200            # “tightness” (m)
MIN_ENVELOPE_POINTS = 5   # min pts to build polygon
MIN_POLY_AREA_M2 = 2000   # drop tiny artifacts (~0.002 km²)

# Severity buckets
SEVERE_Z = 2.0
HIGH_Z   = 1.0
ELEV_Z   = 0.5

COLORS = {
    "severe": "#d32f2f",
    "high":   "#fb8c00",
    "elev":   "#ffd54f",
    "envelope": "#673ab7"
}

USER = os.getenv("USER") or os.getenv("USERNAME") or "user"
OUT_HTML = f"/Users/{USER}/Downloads/narayanganj_aq_hotspots_readable.html"

# ------------------ EE INIT ------------------
def ee_init_headless():
    sa = os.environ["EE_SERVICE_ACCOUNT"]       # ee-runner@<project>.iam.gserviceaccount.com
    key_b64 = os.environ["EE_KEY_B64"]          # base64 of the JSON key

    # Write key to a temp file
    with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
        f.write(base64.b64decode(key_b64).decode("utf-8"))
        key_path = f.name

    creds = ee.ServiceAccountCredentials(sa, key_path)
    ee.Initialize(credentials=creds)

# ------------------ UTIL ------------------
def season_windows(today: date):
    y = today.year
    if today.month >= 4:
        dry_start = date(y - 1, 12, 1); dry_end = date(y, 3, 31)
    else:
        dry_start = date(y - 2, 12, 1); dry_end = date(y - 1, 3, 31)
    if (today.month, today.day) >= (9, 15):
        mon_start = date(y, 6, 1); mon_end = date(y, 9, 15)
    else:
        mon_start = date(y - 1, 6, 1); mon_end = date(y - 1, 9, 15)
    return dry_start, dry_end, mon_start, mon_end

def utm_crs_from_bbox(bbox):
    minx, miny, maxx, maxy = bbox
    lon_c = (minx + maxx) / 2.0
    lat_c = (miny + maxy) / 2.0
    zone = int((lon_c + 180) // 6) + 1
    epsg = 32600 + zone if lat_c >= 0 else 32700 + zone
    return f"EPSG:{epsg}"

# Plain-language helpers
def z_to_level_text(z):
    if z is None:
        return "n/a"
    if z >= 2.0: return "Very high (well above typical)"
    if z >= 1.0: return "High (above typical)"
    if z >= 0.5: return "Slightly elevated"
    if z > -0.5: return "Around typical"
    return "Below typical"

def seasonality_plain(zd, zm):
    """Return a readable sentence comparing monsoon vs dry."""
    if zd is None or zm is None:
        return "not enough data"
    diff = zm - zd  # monsoon minus dry
    if diff > 0.25:
        trend = "worse in the monsoon than the dry season"
    elif diff < -0.25:
        trend = "worse in the dry season than the monsoon"
    else:
        trend = "similar between seasons"
    return f"{trend} (monsoon {zm:.2f}, dry {zd:.2f} in standardized units; Δ={diff:+.2f})"

# ------------------ EE IMAGES ------------------
def build_mean_images(aoi, start_iso, end_iso):
    no2 = (ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_NO2")
           .filterBounds(aoi).filterDate(start_iso, end_iso)
           .select("tropospheric_NO2_column_number_density")
           .mean().rename("no2").unmask(0)).clip(aoi)
    co = (ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_CO")
          .filterBounds(aoi).filterDate(start_iso, end_iso)
          .select("CO_column_number_density")
          .mean().rename("co").unmask(0)).clip(aoi)
    aod = (ee.ImageCollection("MODIS/061/MCD19A2_GRANULES")
           .filterBounds(aoi).filterDate(start_iso, end_iso)
           .select("Optical_Depth_047")
           .mean().rename("aod").unmask(0)).clip(aoi)
    pm25 = aod.multiply(60.0).rename("pm25")
    return no2, pm25, co

def image_to_z(img, aoi, band_name):
    stats_mean = img.reduceRegion(
        reducer=ee.Reducer.mean(), geometry=aoi, scale=SCALE_M,
        maxPixels=1e13, bestEffort=True, tileScale=EE_TILE_SCALE
    )
    stats_std = img.reduceRegion(
        reducer=ee.Reducer.stdDev(), geometry=aoi, scale=SCALE_M,
        maxPixels=1e13, bestEffort=True, tileScale=EE_TILE_SCALE
    )
    mean_val = stats_mean.get(band_name)
    std_val  = stats_std.get(band_name)
    mean_num = ee.Number(ee.Algorithms.If(mean_val, mean_val, 0))
    std_num  = ee.Number(ee.Algorithms.If(std_val,  std_val,  1)).max(1e-6)
    return img.subtract(mean_num).divide(std_num).rename(f"{band_name}_z")

def combined_z_image(aoi, start_iso, end_iso):
    no2, pm25, co = build_mean_images(aoi, start_iso, end_iso)
    no2z = image_to_z(no2, aoi, "no2")
    pmz  = image_to_z(pm25, aoi, "pm25")
    coz  = image_to_z(co, aoi, "co")
    comb = (no2z.multiply(W_NO2).add(pmz.multiply(W_PM25)).add(coz.multiply(W_CO))).rename("aq_index_z")
    return no2z, pmz, coz, comb

# ------------------ POPULATION ------------------
def population_image(aoi):
    for yr in [2025, 2023, 2022, 2021, 2020, 2019]:
        try:
            col = ee.ImageCollection("WorldPop/GP/100m/pop").filterBounds(aoi).filter(ee.Filter.eq('year', yr))
            if col.size().getInfo() > 0:
                img = col.mosaic()
                bname = img.bandNames().getInfo()[0]
                return img.select(bname, ["pop"])
        except Exception:
            pass
    try:
        img = ee.Image("JRC/GHSL/P2019/POP_GLOBE_R2019A")
        bands = [b for b in img.bandNames().getInfo() if "2020" in b or "2015" in b]
        if bands:
            return img.select(bands[0], ["pop"])
    except Exception:
        pass
    try:
        col = ee.ImageCollection("CIESIN/GPWv411/GPW_Population_Count").filter(ee.Filter.eq("year", 2020))
        img = col.first()
        if img:
            b = img.bandNames().getInfo()[0]
            return img.select(b, ["pop"])
    except Exception:
        pass
    return None

def reduce_region_sum(image, region, scale):
    try:
        val = image.reduceRegion(
            reducer=ee.Reducer.sum(), geometry=region, scale=scale,
            maxPixels=1e13, bestEffort=True, tileScale=EE_TILE_SCALE
        ).get("pop")
        return float(ee.Number(ee.Algorithms.If(val, val, 0)).getInfo())
    except Exception:
        return None

def ee_means_in_poly(img_dict, poly):
    geom = ee.Geometry(poly.__geo_interface__)
    bands = []
    for name, im in img_dict.items():
        bands.append(im.rename(name))
    stack = bands[0]
    for i in range(1, len(bands)):
        stack = stack.addBands(bands[i])
    try:
        vals = stack.reduceRegion(
            reducer=ee.Reducer.mean(), geometry=geom, scale=SCALE_M,
            maxPixels=1e13, bestEffort=True, tileScale=EE_TILE_SCALE
        )
        out = {}
        for name in img_dict.keys():
            v = vals.get(name)
            out[name] = float(ee.Number(ee.Algorithms.If(v, v, 0)).getInfo())
        return out
    except Exception:
        return {name: None for name in img_dict.keys()}

# ------------------ SAMPLING & STATS ------------------
def sample_grid(aoi, img_stack, scale_m=SCALE_M, max_points=MAX_POINTS):
    fc = img_stack.sample(region=aoi, scale=scale_m, geometries=True)
    feats = fc.limit(max_points).getInfo().get("features", [])
    rows = []
    for f in feats:
        geom = f.get("geometry", {})
        if geom.get("type") != "Point": continue
        lon, lat = geom["coordinates"]
        p = f.get("properties", {})
        no2, pm25, co = p.get("no2"), p.get("pm25"), p.get("co")
        if None in (no2, pm25, co): continue
        rows.append({"lat": float(lat), "lon": float(lon),
                     "no2": float(no2), "pm25": float(pm25), "co": float(co)})
    return rows

def zscores(vals):
    good = [v for v in vals if v is not None and math.isfinite(v)]
    if len(good) < 2: return [0.0 for _ in vals]
    mean = sum(good)/len(good)
    var  = sum((v-mean)**2 for v in good)/len(good)
    std  = math.sqrt(max(var, 1e-12))
    return [0.0 if (v is None or not math.isfinite(v)) else (v-mean)/std for v in vals]

def p_rank(all_vals, v):
    s = sorted(all_vals)
    if not s: return 0.0
    cnt = sum(1 for x in s if x <= v)
    return 100.0 * cnt / len(s)

def haversine_m(lat1, lon1, lat2, lon2):
    R = 6371000.0
    p1, p2 = math.radians(lat1), math.radians(lat2)
    dphi = p2 - p1
    dl = math.radians(lon2 - lon1)
    a = math.sin(dphi/2)**2 + math.cos(p1)*math.cos(p2)*math.sin(dl/2)**2
    return 2*R*math.asin(math.sqrt(a))

def cluster_dbscan(points, eps_m=EPS_METERS, min_samples=MIN_SAMPLES):
    n = len(points)
    if n == 0: return []
    buckets = {}
    for i, p in enumerate(points):
        key = (int(p["lat"]/0.01), int(p["lon"]/0.01))  # ~1.1 km cells
        buckets.setdefault(key, []).append(i)
    visited = [False]*n
    clusters = [-1]*n
    nbrs = [[] for _ in range(n)]
    for key, idxs in buckets.items():
        kx, ky = key
        cand = []
        for dx in (-1,0,1):
            for dy in (-1,0,1):
                cand += buckets.get((kx+dx, ky+dy), [])
        for i in idxs:
            for j in cand:
                if j <= i: continue
                if haversine_m(points[i]["lat"], points[i]["lon"], points[j]["lat"], points[j]["lon"]) <= eps_m:
                    nbrs[i].append(j); nbrs[j].append(i)
    cid = 0
    for i in range(n):
        if visited[i]: continue
        visited[i] = True
        if len(nbrs[i]) + 1 < min_samples:
            clusters[i] = -1; continue
        clusters[i] = cid
        seeds = list(nbrs[i]); k = 0
        while k < len(seeds):
            j = seeds[k]
            if not visited[j]:
                visited[j] = True
                if len(nbrs[j]) + 1 >= min_samples:
                    for q in nbrs[j]:
                        if q not in seeds: seeds.append(q)
            if clusters[j] < 0: clusters[j] = cid
            k += 1
        cid += 1
    return clusters

def ensure_clusters(hotspots):
    clusters = cluster_dbscan(hotspots, eps_m=EPS_METERS, min_samples=MIN_SAMPLES)
    if not any(c >= 0 for c in clusters):
        clusters = cluster_dbscan(hotspots, eps_m=EPS_METERS*1.6, min_samples=max(3, MIN_SAMPLES-2))
    if not any(c >= 0 for c in clusters):
        clusters = [0 for _ in hotspots]
    return clusters

# ------------------ OSM HELPERS ------------------
def aoi_polygon_wgs84():
    minx, miny, maxx, maxy = AOI_BBOX
    return box(minx, miny, maxx, maxy)

def osm_geoms_from_polygon(aoi_poly_wgs84, tags_dict):
    ox.settings.use_cache = True
    ox.settings.timeout = 180
    try:
        from osmnx.features import features_from_polygon as osm_features_from_polygon
    except Exception:
        try:
            from osmnx import geometries_from_polygon as osm_features_from_polygon
        except Exception:
            raise SystemExit("OSMnx missing polygon geometries. pip install --upgrade osmnx")
    layers = []
    for k, v in tags_dict.items():
        try:
            g = osm_features_from_polygon(aoi_poly_wgs84, tags={k: v})
            if g is not None and not g.empty:
                layers.append(g)
        except Exception:
            pass
    if not layers:
        return gpd.GeoDataFrame(geometry=[], crs="EPSG:4326")
    base_crs = layers[0].crs or "EPSG:4326"
    all_feats = gpd.GeoDataFrame(gpd.pd.concat(layers, ignore_index=True), crs=base_crs)
    all_feats = all_feats[all_feats.geometry.notna()].copy()
    return all_feats.to_crs(epsg=4326)

def count_sensitive_inside(sens_gdf, polygon):
    if sens_gdf is None or sens_gdf.empty:
        return dict(schools=0, clinics=0, hospitals=0, elder_homes=0)
    try:
        idx = sens_gdf.sindex
        sub = sens_gdf.iloc[list(idx.intersection(polygon.bounds))]
        inside = sub[sub.geometry.intersects(polygon)]
    except Exception:
        inside = sens_gdf[sens_gdf.geometry.intersects(polygon)]
    res = dict(schools=0, clinics=0, hospitals=0, elder_homes=0)
    if inside.empty:
        return res
    if "amenity" in inside.columns:
        res["schools"] = int((inside["amenity"]=="school").sum())
        res["clinics"] = int(((inside["amenity"]=="clinic") | (inside["amenity"]=="doctors")).sum())
        res["hospitals"] = int((inside["amenity"]=="hospital").sum())
    if "social_facility" in inside.columns:
        res["elder_homes"] = int(inside["social_facility"].isin(["nursing_home","assisted_living"]).sum())
    return res

def friendly_unnamed(row):
    """Produce a readable label when OSM has no name."""
    # Try a few informative tags to hint the type
    for key in ("industrial","power","man_made","landuse","waterway","harbour","amenity"):
        val = row.get(key)
        if isinstance(val, str) and val.strip():
            if key == "power" and val == "plant":
                return "Unnamed facility (power plant)"
            if key == "industrial":
                return "Unnamed facility (industrial)"
            if key == "man_made":
                return f"Unnamed facility (man_made={val})"
            if key == "waterway":
                return f"Unnamed riverside/port feature ({val})"
            if key == "harbour":
                return "Unnamed harbour/jetty"
            if key == "landuse" and val == "industrial":
                return "Unnamed facility (industrial landuse)"
            if key == "amenity":
                return f"Unnamed amenity ({val})"
            return f"Unnamed facility ({key}={val})"
    return "Unnamed facility (OSM)"

def list_osm_names_in_poly(gdf, polygon, max_show=40):
    if gdf is None or gdf.empty:
        return [], 0
    try:
        idx = gdf.sindex
        cand = gdf.iloc[list(idx.intersection(polygon.bounds))]
        inside = cand[cand.geometry.intersects(polygon)]
    except Exception:
        inside = gdf[gdf.geometry.intersects(polygon)]
    count = len(inside)
    if count == 0:
        return [], 0
    name_cols = [c for c in ["name", "name:en", "operator", "brand"] if c in inside.columns]
    names = []
    for _, r in inside.iterrows():
        nm = None
        for c in name_cols:
            val = r.get(c)
            if isinstance(val, str) and val.strip():
                nm = val.strip(); break
        if nm is None:
            nm = friendly_unnamed(r)
        names.append(nm)
    names = sorted(set(names))
    if len(names) > max_show:
        names = names[:max_show] + [f"... (+{len(set(names))-max_show} more)"]
    return names, count

# ------------------ ENVELOPES (concave polygons) ------------------
def build_concave_envelopes(hotspots, clusters, metric_crs, alpha_m=ALPHA_M, min_pts=MIN_ENVELOPE_POINTS):
    """Returns dict cid -> list[Polygon in WGS84], with tiny/degenerate parts removed."""
    by_cluster = {}
    for hp, cid in zip(hotspots, clusters):
        if cid < 0: continue
        by_cluster.setdefault(cid, []).append(hp)

    out = {}
    for cid, pts in by_cluster.items():
        if len(pts) < min_pts:
            continue
        pts_wgs = gpd.GeoSeries([Point(p["lon"], p["lat"]) for p in pts], crs="EPSG:4326").to_crs(metric_crs)
        buf = pts_wgs.buffer(alpha_m)
        merged = unary_union(list(buf.values))
        shell = merged.buffer(-alpha_m)
        geom = shell if not shell.is_empty else merged.convex_hull
        polys = []
        if geom.geom_type == "Polygon":
            polys = [geom]
        elif geom.geom_type == "MultiPolygon":
            polys = list(geom.geoms)
        kept = [g for g in polys if float(g.area) >= MIN_POLY_AREA_M2]
        if not kept:
            continue
        kept_wgs = gpd.GeoSeries(kept, crs=metric_crs).to_crs(epsg=4326).tolist()
        out[cid] = kept_wgs
    return out

# ------------------ MAP ------------------
def build_map(aoi_bbox, hotspots, selected_cluster_polys, result):
    lat_c = (aoi_bbox[1] + aoi_bbox[3]) / 2.0
    lon_c = (aoi_bbox[0] + aoi_bbox[2]) / 2.0
    m = folium.Map(location=[lat_c, lon_c], zoom_start=12,
                   tiles="cartodbpositron", control_scale=True)

    # Cluster polygons (Top-3)
    for rank, (cid, poly) in enumerate(selected_cluster_polys, start=1):
        area_marking = folium.GeoJson(
            data=poly.__geo_interface__,
            name=f"Top cluster #{rank} (cluster {cid})",
            style_function=lambda _ : {"color": COLORS["envelope"], "weight": 3, "fillColor": COLORS["envelope"], "fillOpacity": 0.10},
            tooltip=f"Top cluster #{rank} (cluster {cid})",
            popup=f"Test popup for cluster {cid}"
        )

        tags_html = f"""
            <div style="margin-bottom: 0.5em;">
            <span style="background:#007bff;color:white;padding:3px 7px;border-radius:5px;margin-right:5px;">
                Cluster ID: {cid}
            </span>
            <span style="background:#28a745;color:white;padding:3px 7px;border-radius:5px;">
                Rank: {rank}
            </span>
            </div>
            """

        description = result[cid]
        description = markdown.markdown(description, extensions=['extra', 'toc', 'tables'])
        description = tags_html + description

        folium.Popup(f"{description}", max_width=700, max_height = 500).add_to(area_marking)

        area_marking.add_to(m)

    # Hotspots belonging to selected clusters only
    kept_cids = {cid for cid, _ in selected_cluster_polys}
    for hp in hotspots:
        cid = hp.get("_cid")
        if cid not in kept_cids:
            continue
        sev = 'severe' if hp["aq_index_z"] >= SEVERE_Z else ('high' if hp["aq_index_z"] >= HIGH_Z else ('elev' if hp["aq_index_z"] >= ELEV_Z else None))
        if sev is None: 
            continue
        color = COLORS[sev]
        radius = 6 if sev == "elev" else (8 if sev == "high" else 10)
        z_dry = hp.get("aq_z_dry"); z_mon = hp.get("aq_z_monsoon")
        season_txt = seasonality_plain(z_dry, z_mon)
        hint = hp.get("driver_hint","Mixed drivers")
        popup_html = (
            f"<b>{sev.upper()} hotspot</b><br>"
            f"Current level: {z_to_level_text(hp['aq_index_z'])} (z≈{hp['aq_index_z']:.2f}; 0≈typical)<br>"
            f"NO₂ z: {hp['no2_z']:.2f} | PM₂.₅ z: {hp['pm25_z']:.2f} | CO z: {hp['co_z']:.2f}<br>"
            f"<b>Likely driver:</b> {hint}<br>"
            f"<b>Seasonality:</b> {season_txt}"
        )
        folium.CircleMarker(
            location=(hp["lat"], hp["lon"]),
            radius=radius,
            color=color, fill=True, fill_color=color, fill_opacity=0.95,
            tooltip=f"{sev.upper()} hotspot", popup=popup_html
        ).add_to(m)

    MiniMap(toggle_display=True, position="bottomright").add_to(m)
    Fullscreen().add_to(m)
    MousePosition(position="topright", separator=" | ", prefix="Lat/Lon:").add_to(m)
    MeasureControl(position="topright", primary_length_unit='kilometers').add_to(m)

    legend = """
    <div style="position: fixed; bottom: 18px; left: 18px; z-index:9999; background: white;
                padding: 10px 12px; border: 1px solid #ccc; border-radius: 6px; font-size: 13px;">
      <b>Top hotspot clusters (area-wise)</b><br>
      <span style="display:inline-block;width:12px;height:12px;background:#d32f2f;border:1px solid #d32f2f;"></span>
      Severe (≥ 2σ above city typical)<br>
      <span style="display:inline-block;width:12px;height:12px;background:#fb8c00;border:1px solid #fb8c00;"></span>
      High (1–2σ)<br>
      <span style="display:inline-block;width:12px;height:12px;background:#ffd54f;border:1px solid #ffd54f;"></span>
      Elevated (0.5–1σ)<br>
      <span style="display:inline-block;width:12px;height:12px;background:#673ab7;border:1px solid #673ab7;"></span>
      Cluster polygon(s)
    </div>
    """
    m.get_root().html.add_child(folium.Element(legend))
    folium.LayerControl(collapsed=False).add_to(m)
    return m

# ------------------ MAIN ------------------
def main():
    print("Initializing Earth Engine…")
    ee_init_headless()

    aoi = ee.Geometry.Rectangle(AOI_BBOX)
    start_iso, end_iso = str(START), str(END)
    print(f"AOI: {AOI_BBOX} | Window: {start_iso} → {end_iso}")

    # Current-window images & z stacks
    no2_img, pm25_img, co_img = build_mean_images(aoi, start_iso, end_iso)
    stack = no2_img.addBands(pm25_img).addBands(co_img)
    no2z_now, pmz_now, coz_now, aqz_now_img = combined_z_image(aoi, start_iso, end_iso)

    # Sample grid (capped)
    rows = sample_grid(aoi, stack, scale_m=SCALE_M, max_points=MAX_POINTS)
    if not rows:
        raise SystemExit("No samples. Try expanding AOI or increasing DAYS_BACK.")

    # Z-scores (current window)
    no2_z = zscores([r["no2"] for r in rows])
    pm25_z = zscores([r["pm25"] for r in rows])
    co_z   = zscores([r["co"] for r in rows])
    aq_raw = [W_NO2*n + W_PM25*p + W_CO*c for n, p, c in zip(no2_z, pm25_z, co_z)]
    aq_index_z = zscores(aq_raw)

    # Hotspot selection (cap)
    def prc(vs, v): return p_rank(vs, v)
    pcts = [prc(aq_index_z, v) for v in aq_index_z]
    candidates = []
    for r, nz, pz, cz, az, pr in zip(rows, no2_z, pm25_z, co_z, aq_index_z, pcts):
        if (az >= Z_THRESHOLD) or (pr >= PCTL_THRESHOLD):
            if (nz >= 1.0) and (cz >= 1.0):
                driver = "Traffic / combustion (high NO₂ + CO)"
            elif (pz >= 1.0) and (nz < 0.5):
                driver = "Dust / construction / open burning (high PM proxy, low NO₂)"
            elif (nz >= 1.0) and (cz < 0.5):
                driver = "Point sources / industry (high NO₂, low CO)"
            else:
                driver = "Mixed drivers"
            candidates.append({
                "lat": r["lat"], "lon": r["lon"],
                "no2_z": nz, "pm25_z": pz, "co_z": cz,
                "aq_index_z": az, "percentile": pr,
                "driver_hint": driver
            })
    if not candidates:
        raise SystemExit("No hotspots met the threshold; relax Z_THRESHOLD/PCTL_THRESHOLD.")

    candidates.sort(key=lambda x: x["aq_index_z"], reverse=True)
    hotspots = candidates[:MAX_HOTSPOTS]

    # Cluster (with fallback)
    clusters = ensure_clusters(hotspots)
    for hp, cid in zip(hotspots, clusters):
        hp["_cid"] = cid

    # Seasonality quick-look (batched)
    dry_start, dry_end, mon_start, mon_end = season_windows(date.today())
    _, _, _, aqz_dry_img = combined_z_image(aoi, str(dry_start), str(dry_end))
    _, _, _, aqz_mon_img = combined_z_image(aoi, str(mon_start), str(mon_end))

    def fc_from_points(hps):
        feats = [ee.Feature(ee.Geometry.Point([hp["lon"], hp["lat"]]), {"idx": i})
                 for i, hp in enumerate(hps)]
        return ee.FeatureCollection(feats)

    pts_fc = fc_from_points(hotspots)
    aq_dry_coll = aqz_dry_img.sampleRegions(collection=pts_fc, scale=SCALE_M, geometries=False, tileScale=EE_TILE_SCALE)
    aq_mon_coll = aqz_mon_img.sampleRegions(collection=pts_fc, scale=SCALE_M, geometries=False, tileScale=EE_TILE_SCALE)
    aq_dry = aq_dry_coll.getInfo().get("features", [])
    aq_mon = aq_mon_coll.getInfo().get("features", [])
    for feat in aq_dry:
        i = int(feat["properties"]["idx"])
        val = feat["properties"].get("aq_index_z")
        hotspots[i]["aq_z_dry"] = float(val) if val is not None else None
    for feat in aq_mon:
        i = int(feat["properties"]["idx"])
        val = feat["properties"].get("aq_index_z")
        hotspots[i]["aq_z_monsoon"] = float(val) if val is not None else None

    # OSM context
    aoi_poly = aoi_polygon_wgs84()
    try:
        sens_all = osm_geoms_from_polygon(aoi_poly, {"amenity": ["school","clinic","hospital","doctors"],
                                                     "social_facility": ["nursing_home","assisted_living"]})
    except Exception:
        sens_all = gpd.GeoDataFrame(geometry=[], crs="EPSG:4326")
    try:
        ind_all  = osm_geoms_from_polygon(aoi_poly, {
            "landuse": ["industrial"], "industrial": True, "man_made": ["works","chimney"],
            "power": ["plant","generator"], "harbour": ["yes"], "waterway": ["dock"]
        })
        extra_port = osm_geoms_from_polygon(aoi_poly, {"man_made": ["pier"], "landuse": ["port"],
                                                       "harbour": ["yes"], "waterway": ["dock"]})
        if ind_all is not None and not ind_all.empty and extra_port is not None and not extra_port.empty:
            ind_all = gpd.GeoDataFrame(gpd.pd.concat([ind_all, extra_port], ignore_index=True), crs=ind_all.crs or "EPSG:4326")
        elif (ind_all is None or ind_all.empty) and extra_port is not None and not extra_port.empty:
            ind_all = extra_port
    except Exception:
        ind_all = gpd.GeoDataFrame(geometry=[], crs="EPSG:4326")

    # Build concave envelopes per cluster (filter tiny/degenerate)
    metric_crs = utm_crs_from_bbox(AOI_BBOX)
    envelopes_by_cid = build_concave_envelopes(hotspots, clusters, metric_crs, alpha_m=ALPHA_M, min_pts=MIN_ENVELOPE_POINTS)
    if not envelopes_by_cid:
        envelopes_by_cid = build_concave_envelopes(hotspots, [0]*len(hotspots), metric_crs, alpha_m=ALPHA_M, min_pts=3)

    # Union to single polygon per cluster & compute area
    cluster_union = {}
    cluster_area_km2 = {}
    for cid, polys in envelopes_by_cid.items():
        if not polys: 
            continue
        polys_proj = gpd.GeoSeries(polys, crs="EPSG:4326").to_crs(metric_crs)
        union_geom = unary_union(list(polys_proj.values))
        area_km2 = float(union_geom.area / 1e6)
        if area_km2 <= 0:
            continue
        union_wgs = gpd.GeoSeries([union_geom], crs=metric_crs).to_crs(epsg=4326).iloc[0]
        cluster_union[cid] = union_wgs
        cluster_area_km2[cid] = area_km2

    # Pick Top-3 clusters by area
    top_cids = sorted(cluster_area_km2.keys(), key=lambda c: cluster_area_km2[c], reverse=True)[:3]
    selected = [(cid, cluster_union[cid]) for cid in top_cids]

    # ---------- Print summaries for Top-3 (plain language) ----------
    description_string = ""

    description_string += ("\n================= Top bad air quality hotspot clusters (area-wise) =================\n")
    description_string += ("(Note: z (σ) = standardized units; 0 means ‘typical’ for the city in the last 60 days.)\n")
    pop_img = population_image(aoi)
    z_imgs_now = {"no2_z": no2z_now, "pm25_z": pmz_now, "co_z": coz_now, "aq_index_z": aqz_now_img}

    for rank, (cid, poly) in enumerate(selected, start=1):
        area_km2 = cluster_area_km2[cid]
        pop_sum = reduce_region_sum(pop_img, ee.Geometry(poly.__geo_interface__), scale=200) if pop_img is not None else None
        means_now = ee_means_in_poly(z_imgs_now, poly)
        zn = means_now.get("aq_index_z")
        zd = ee_means_in_poly({"aq_index_z": aqz_dry_img}, poly).get("aq_index_z")
        zm = ee_means_in_poly({"aq_index_z": aqz_mon_img}, poly).get("aq_index_z")
        sens_inside = count_sensitive_inside(sens_all, poly)
        ind_names, ind_count = list_osm_names_in_poly(ind_all, poly, max_show=60)

        description_string += (f"\nTop cluster #{rank} (cluster {cid})\n")
        description_string += (f"• Area: ~{area_km2:.2f} km²\n")
        description_string += (f"• People living inside: {(f'{int(pop_sum):,}' if pop_sum is not None else 'n/a')}\n")
        if zn is not None:
            description_string += (f"• Current level: {z_to_level_text(zn)} (z≈{zn:.2f}; 0≈typical)\n")
        else:
            description_string += ("• Current level: n/a\n")
        description_string += (f"• Seasonality: {seasonality_plain(zd, zm)}\n")
        description_string += (f"• Sensitive sites inside: schools:{sens_inside.get('schools',0)}, "
              f"clinics:{sens_inside.get('clinics',0)}, hospitals:{sens_inside.get('hospitals',0)}, "
              f"elder homes:{sens_inside.get('elder_homes',0)}\n")
        description_string += (f"• Industrial/port/point-source features inside: {ind_count}\n")
        if ind_count > 0:
            description_string += ("  Names / tags:\n")
            for nm in ind_names:
                description_string += (f"   - {nm}\n")

    print(description_string)
    result = groq_api.inference(description_string)
    result = groq_api.parse_llm_response(result[0])


    # ---------- Map with Top-3 only ----------
    m = build_map(AOI_BBOX, hotspots, selected, result)
    os.makedirs(os.path.dirname(OUT_HTML), exist_ok=True)
    m.save(OUT_HTML)
    print(f"\n✅ Saved: {OUT_HTML}")

if __name__ == "__main__":
    main()


## Testing

In [None]:
print("Initializing Earth Engine…")
ee_init_headless()

aoi = ee.Geometry.Rectangle(AOI_BBOX)
start_iso, end_iso = str(START), str(END)
print(f"AOI: {AOI_BBOX} | Window: {start_iso} → {end_iso}")

# Current-window images & z stacks
no2_img, pm25_img, co_img = build_mean_images(aoi, start_iso, end_iso)
stack = no2_img.addBands(pm25_img).addBands(co_img)
no2z_now, pmz_now, coz_now, aqz_now_img = combined_z_image(aoi, start_iso, end_iso)

# Sample grid (capped)
rows = sample_grid(aoi, stack, scale_m=SCALE_M, max_points=MAX_POINTS)
if not rows:
    raise SystemExit("No samples. Try expanding AOI or increasing DAYS_BACK.")

# Z-scores (current window)
no2_z = zscores([r["no2"] for r in rows])
pm25_z = zscores([r["pm25"] for r in rows])
co_z   = zscores([r["co"] for r in rows])
aq_raw = [W_NO2*n + W_PM25*p + W_CO*c for n, p, c in zip(no2_z, pm25_z, co_z)]
aq_index_z = zscores(aq_raw)

# Hotspot selection (cap)
def prc(vs, v): return p_rank(vs, v)
pcts = [prc(aq_index_z, v) for v in aq_index_z]
candidates = []
for r, nz, pz, cz, az, pr in zip(rows, no2_z, pm25_z, co_z, aq_index_z, pcts):
    if (az >= Z_THRESHOLD) or (pr >= PCTL_THRESHOLD):
        if (nz >= 1.0) and (cz >= 1.0):
            driver = "Traffic / combustion (high NO₂ + CO)"
        elif (pz >= 1.0) and (nz < 0.5):
            driver = "Dust / construction / open burning (high PM proxy, low NO₂)"
        elif (nz >= 1.0) and (cz < 0.5):
            driver = "Point sources / industry (high NO₂, low CO)"
        else:
            driver = "Mixed drivers"
        candidates.append({
            "lat": r["lat"], "lon": r["lon"],
            "no2_z": nz, "pm25_z": pz, "co_z": cz,
            "aq_index_z": az, "percentile": pr,
            "driver_hint": driver
        })
if not candidates:
    raise SystemExit("No hotspots met the threshold; relax Z_THRESHOLD/PCTL_THRESHOLD.")

candidates.sort(key=lambda x: x["aq_index_z"], reverse=True)
hotspots = candidates[:MAX_HOTSPOTS]

# Cluster (with fallback)
clusters = ensure_clusters(hotspots)
for hp, cid in zip(hotspots, clusters):
    hp["_cid"] = cid

# Seasonality quick-look (batched)
dry_start, dry_end, mon_start, mon_end = season_windows(date.today())
_, _, _, aqz_dry_img = combined_z_image(aoi, str(dry_start), str(dry_end))
_, _, _, aqz_mon_img = combined_z_image(aoi, str(mon_start), str(mon_end))

def fc_from_points(hps):
    feats = [ee.Feature(ee.Geometry.Point([hp["lon"], hp["lat"]]), {"idx": i})
                for i, hp in enumerate(hps)]
    return ee.FeatureCollection(feats)

pts_fc = fc_from_points(hotspots)
aq_dry_coll = aqz_dry_img.sampleRegions(collection=pts_fc, scale=SCALE_M, geometries=False, tileScale=EE_TILE_SCALE)
aq_mon_coll = aqz_mon_img.sampleRegions(collection=pts_fc, scale=SCALE_M, geometries=False, tileScale=EE_TILE_SCALE)
aq_dry = aq_dry_coll.getInfo().get("features", [])
aq_mon = aq_mon_coll.getInfo().get("features", [])
for feat in aq_dry:
    i = int(feat["properties"]["idx"])
    val = feat["properties"].get("aq_index_z")
    hotspots[i]["aq_z_dry"] = float(val) if val is not None else None
for feat in aq_mon:
    i = int(feat["properties"]["idx"])
    val = feat["properties"].get("aq_index_z")
    hotspots[i]["aq_z_monsoon"] = float(val) if val is not None else None

# OSM context
aoi_poly = aoi_polygon_wgs84()
try:
    sens_all = osm_geoms_from_polygon(aoi_poly, {"amenity": ["school","clinic","hospital","doctors"],
                                                    "social_facility": ["nursing_home","assisted_living"]})
except Exception:
    sens_all = gpd.GeoDataFrame(geometry=[], crs="EPSG:4326")
try:
    ind_all  = osm_geoms_from_polygon(aoi_poly, {
        "landuse": ["industrial"], "industrial": True, "man_made": ["works","chimney"],
        "power": ["plant","generator"], "harbour": ["yes"], "waterway": ["dock"]
    })
    extra_port = osm_geoms_from_polygon(aoi_poly, {"man_made": ["pier"], "landuse": ["port"],
                                                    "harbour": ["yes"], "waterway": ["dock"]})
    if ind_all is not None and not ind_all.empty and extra_port is not None and not extra_port.empty:
        ind_all = gpd.GeoDataFrame(gpd.pd.concat([ind_all, extra_port], ignore_index=True), crs=ind_all.crs or "EPSG:4326")
    elif (ind_all is None or ind_all.empty) and extra_port is not None and not extra_port.empty:
        ind_all = extra_port
except Exception:
    ind_all = gpd.GeoDataFrame(geometry=[], crs="EPSG:4326")

# Build concave envelopes per cluster (filter tiny/degenerate)
metric_crs = utm_crs_from_bbox(AOI_BBOX)
envelopes_by_cid = build_concave_envelopes(hotspots, clusters, metric_crs, alpha_m=ALPHA_M, min_pts=MIN_ENVELOPE_POINTS)
if not envelopes_by_cid:
    envelopes_by_cid = build_concave_envelopes(hotspots, [0]*len(hotspots), metric_crs, alpha_m=ALPHA_M, min_pts=3)

# Union to single polygon per cluster & compute area
cluster_union = {}
cluster_area_km2 = {}
for cid, polys in envelopes_by_cid.items():
    if not polys: 
        continue
    polys_proj = gpd.GeoSeries(polys, crs="EPSG:4326").to_crs(metric_crs)
    union_geom = unary_union(list(polys_proj.values))
    area_km2 = float(union_geom.area / 1e6)
    if area_km2 <= 0:
        continue
    union_wgs = gpd.GeoSeries([union_geom], crs=metric_crs).to_crs(epsg=4326).iloc[0]
    cluster_union[cid] = union_wgs
    cluster_area_km2[cid] = area_km2

# Pick Top-3 clusters by area
top_cids = sorted(cluster_area_km2.keys(), key=lambda c: cluster_area_km2[c], reverse=True)[:3]
selected = [(cid, cluster_union[cid]) for cid in top_cids]

# ---------- Print summaries for Top-3 (plain language) ----------
description_string = ""

description_string += ("\n================= Top bad air quality hotspot clusters (area-wise) =================\n")
description_string += ("(Note: z (σ) = standardized units; 0 means ‘typical’ for the city in the last 60 days.)\n")
pop_img = population_image(aoi)
z_imgs_now = {"no2_z": no2z_now, "pm25_z": pmz_now, "co_z": coz_now, "aq_index_z": aqz_now_img}

for rank, (cid, poly) in enumerate(selected, start=1):
    area_km2 = cluster_area_km2[cid]
    pop_sum = reduce_region_sum(pop_img, ee.Geometry(poly.__geo_interface__), scale=200) if pop_img is not None else None
    means_now = ee_means_in_poly(z_imgs_now, poly)
    zn = means_now.get("aq_index_z")
    zd = ee_means_in_poly({"aq_index_z": aqz_dry_img}, poly).get("aq_index_z")
    zm = ee_means_in_poly({"aq_index_z": aqz_mon_img}, poly).get("aq_index_z")
    sens_inside = count_sensitive_inside(sens_all, poly)
    ind_names, ind_count = list_osm_names_in_poly(ind_all, poly, max_show=60)

    description_string += (f"\nTop cluster #{rank} (cluster {cid})\n")
    description_string += (f"• Area: ~{area_km2:.2f} km²\n")
    description_string += (f"• People living inside: {(f'{int(pop_sum):,}' if pop_sum is not None else 'n/a')}\n")
    if zn is not None:
        description_string += (f"• Current level: {z_to_level_text(zn)} (z≈{zn:.2f}; 0≈typical)\n")
    else:
        description_string += ("• Current level: n/a\n")
    description_string += (f"• Seasonality: {seasonality_plain(zd, zm)}\n")
    description_string += (f"• Sensitive sites inside: schools:{sens_inside.get('schools',0)}, "
            f"clinics:{sens_inside.get('clinics',0)}, hospitals:{sens_inside.get('hospitals',0)}, "
            f"elder homes:{sens_inside.get('elder_homes',0)}\n")
    description_string += (f"• Industrial/port/point-source features inside: {ind_count}\n")
    if ind_count > 0:
        description_string += ("  Names / tags:\n")
        for nm in ind_names:
            description_string += (f"   - {nm}\n")

print(description_string)


Initializing Earth Engine…
AOI: [90.32, 23.7, 90.52, 23.86] | Window: 2025-07-31 → 2025-09-29


  overpass_settings = _make_overpass_settings()
  yield _overpass_request(data={"data": query_str})
  overpass_settings = _make_overpass_settings()
  yield _overpass_request(data={"data": query_str})
  overpass_settings = _make_overpass_settings()
  yield _overpass_request(data={"data": query_str})
  overpass_settings = _make_overpass_settings()
  yield _overpass_request(data={"data": query_str})
  overpass_settings = _make_overpass_settings()
  yield _overpass_request(data={"data": query_str})
  overpass_settings = _make_overpass_settings()
  yield _overpass_request(data={"data": query_str})
  overpass_settings = _make_overpass_settings()
  yield _overpass_request(data={"data": query_str})
  overpass_settings = _make_overpass_settings()
  yield _overpass_request(data={"data": query_str})
  overpass_settings = _make_overpass_settings()
  yield _overpass_request(data={"data": query_str})
  overpass_settings = _make_overpass_settings()
  yield _overpass_request(data={"data": query_str})



(Note: z (σ) = standardized units; 0 means ‘typical’ for the city in the last 60 days.)

Top cluster #1 (cluster 1)
• Area: ~4.50 km²
• People living inside: n/a
• Current level: High (above typical) (z≈1.27; 0≈typical)
• Seasonality: worse in the monsoon than the dry season (monsoon 0.94, dry -0.74 in standardized units; Δ=+1.68)
• Sensitive sites inside: schools:61, clinics:7, hospitals:13, elder homes:0
• Industrial/port/point-source features inside: 2
  Names / tags:
   - Luna plastic
   - k tex indco ltd

Top cluster #2 (cluster 0)
• Area: ~2.96 km²
• People living inside: n/a
• Current level: High (above typical) (z≈1.72; 0≈typical)
• Seasonality: worse in the monsoon than the dry season (monsoon 0.86, dry -1.71 in standardized units; Δ=+2.57)
• Sensitive sites inside: schools:2, clinics:0, hospitals:0, elder homes:0
• Industrial/port/point-source features inside: 2
  Names / tags:
   - Unnamed facility (industrial landuse)
   - আমিনবাজার ঘাট

Top cluster #3 (cluster 2)
• Area: 

In [3]:
result = groq_api.inference(description_string)


In [4]:
print(description_string)


(Note: z (σ) = standardized units; 0 means ‘typical’ for the city in the last 60 days.)

Top cluster #1 (cluster 1)
• Area: ~4.50 km²
• People living inside: n/a
• Current level: High (above typical) (z≈1.27; 0≈typical)
• Seasonality: worse in the monsoon than the dry season (monsoon 0.94, dry -0.74 in standardized units; Δ=+1.68)
• Sensitive sites inside: schools:61, clinics:7, hospitals:13, elder homes:0
• Industrial/port/point-source features inside: 2
  Names / tags:
   - Luna plastic
   - k tex indco ltd

Top cluster #2 (cluster 0)
• Area: ~2.96 km²
• People living inside: n/a
• Current level: High (above typical) (z≈1.72; 0≈typical)
• Seasonality: worse in the monsoon than the dry season (monsoon 0.86, dry -1.71 in standardized units; Δ=+2.57)
• Sensitive sites inside: schools:2, clinics:0, hospitals:0, elder homes:0
• Industrial/port/point-source features inside: 2
  Names / tags:
   - Unnamed facility (industrial landuse)
   - আমিনবাজার ঘাট

Top cluster #3 (cluster 2)
• Area: 

In [5]:
result[0]

'<| Decision for cluster/node 1 |>\n• **Area:** ~4.50\u202fkm²  \n• **People living inside:** Data unavailable  \n• **Current level:** High (z\u202f≈\u202f1.27) – above city average  \n• **Seasonality:** Worse during monsoon (monsoon\u202f=\u202f0.94 vs dry\u202f=\u202f‑0.74, Δ\u202f=\u202f+1.68)  \n• **Sensitive sites inside:** 61 schools, 7 clinics, 13 hospitals, 0 elder homes  \n• **Industrial/port/point‑source features inside:** 2 (Luna plastic, K Tex Indco Ltd)  \n\n#### AI interpretations :\n\n**1. Suitability Assessment**  \n- **Strengths**  \n  - Large area allows for diversified interventions (e.g., green belt, low‑impact development).  \n  - Presence of many health & education facilities implies existing community hubs that can be leveraged for outreach.  \n  - Industrial sources are limited to two; potential for targeted source‑control measures.  \n\n- **Weaknesses**  \n  - High pollutant concentrations and monsoon‑driven exacerbation indicate persistent exposure risk.  \n  

In [6]:
result = parse_llm_response(result[0])


In [7]:
result

{1: '• **Area:** ~4.50\u202fkm²  \n• **People living inside:** Data unavailable  \n• **Current level:** High (z\u202f≈\u202f1.27) – above city average  \n• **Seasonality:** Worse during monsoon (monsoon\u202f=\u202f0.94 vs dry\u202f=\u202f‑0.74, Δ\u202f=\u202f+1.68)  \n• **Sensitive sites inside:** 61 schools, 7 clinics, 13 hospitals, 0 elder homes  \n• **Industrial/port/point‑source features inside:** 2 (Luna plastic, K Tex Indco Ltd)  \n\n#### AI interpretations :\n\n**1. Suitability Assessment**  \n- **Strengths**  \n  - Large area allows for diversified interventions (e.g., green belt, low‑impact development).  \n  - Presence of many health & education facilities implies existing community hubs that can be leveraged for outreach.  \n  - Industrial sources are limited to two; potential for targeted source‑control measures.  \n\n- **Weaknesses**  \n  - High pollutant concentrations and monsoon‑driven exacerbation indicate persistent exposure risk.  \n  - Lack of data on residential d

In [10]:


# ---------- Map with Top-3 only ----------
m = build_map(AOI_BBOX, hotspots, selected, result)
os.makedirs(os.path.dirname(OUT_HTML), exist_ok=True)
m.save(OUT_HTML)
print(f"\n✅ Saved: {OUT_HTML}")



✅ Saved: /Users/Dipankar Mitra/Downloads/narayanganj_aq_hotspots_readable.html


## Satellite data this script uses (and what for)

* **Sentinel-5P / TROPOMI – NO₂ (L3)**
  **ID:** `COPERNICUS/S5P/OFFL/L3_NO2` → **Mean tropospheric NO₂ column** over the last `DAYS_BACK` days.
  **Used for:** NO₂ **z-score** and the **combined AQ hotspot index**; also **seasonality** (dry vs monsoon).

* **Sentinel-5P / TROPOMI – CO (L3)**
  **ID:** `COPERNICUS/S5P/OFFL/L3_CO` → **Mean CO column** over the last `DAYS_BACK` days.
  **Used for:** CO **z-score** and the **combined AQ hotspot index**; also **seasonality**.

* **MODIS MAIAC – Aerosol Optical Depth (AOD, 0.47 µm)**
  **ID:** `MODIS/061/MCD19A2_GRANULES` (band `Optical_Depth_047`) → **Mean AOD** (~1 km).
  **Used for:** Rough **PM₂.₅ proxy** (`PM₂.₅ ≈ AOD × 60`), its **z-score**, and contribution to the **combined AQ index**; also **seasonality**.

* **Population exposure (gridded, satellite-derived products)**
  **Preferred:** `WorldPop/GP/100m/pop` (year closest to present).
  **Fallbacks:** `JRC/GHSL/P2019/POP_GLOBE_R2019A`, `CIESIN/GPWv411/GPW_Population_Count`.
  **Used for:** **Sum of residents** inside each hotspot polygon (exposure). *(These are modelled/gridded layers informed by multiple sources, not a single sensor.)*

> Non-satellite layers: **OpenStreetMap** for sensitive sites and industrial/port features (counts & names); clustering/envelopes are geometric operations on hotspot points.


# heat Island

In [None]:
# narayanganj_uhi_hotspots.py
# Urban Heat Island (UHI) hotspots → Top-3 area-wise clusters with concave envelopes.
# Per-cluster metrics printed in plain language + Folium map.
#
# Metrics:
# - Population inside
# - Sensitive sites (schools, clinics, hospitals, elder homes)
# - Vulnerability: children %, elderly % (WorldPop ages if available); "informal housing" proxy (roof m²/person)
# - Impervious % (ESA WorldCover)
# - Tree canopy % (WorldCover) & mean NDVI (Sentinel-2)
# - Roof area & cool-roof potential (large roofs)
# - Building height/density proxy (OSM)
# - Distance to water & "blue corridor" hint (OSM water)
# - Night LST & Day–Night delta (MODIS)
# - Seasonality (pre/monsoon/post; MODIS)
# - Extreme hot periods count (8-day MODIS above local 90th percentile)
# - Apparent temperature / heat index proxy (ERA5 air temp + RH)
#
# Speed notes:
# - Uses coarse reduceRegion stats (tileScale=4, bestEffort) per polygon
# - Keeps sampling grid capped; clustering in pure Python with neighborhood bucketing
# - Uses ESA WorldCover & MODIS/ERA5 instead of very high-res rasters for heavy stats

import os
import math
from datetime import date, timedelta

import ee
import folium
from folium.plugins import MiniMap, Fullscreen, MousePosition, MeasureControl
from shapely.geometry import Point, box
from shapely.ops import unary_union

# Geo / OSM
try:
    import osmnx as ox
    import geopandas as gpd
except Exception as e:
    raise SystemExit(
        f"Import error: {e}\nInstall: pip install osmnx geopandas rtree\n"
        "If NumPy 2.x issues: pip install 'numpy<2' && reinstall geopandas shapely pyproj fiona rtree"
    )

# ------------------ CONFIG ------------------
AOI_BBOX = [90.32, 23.70, 90.52, 23.86]  # (W,S,E,N) — Narayanganj
DAYS_BACK = 60
END = date.today()
START = END - timedelta(days=DAYS_BACK)

# Sampling / EE
SCALE_M = 1000        # for LST sampling grid (MODIS native ~1 km)
MAX_POINTS = 5000
EE_TILE_SCALE = 4

# Hotspot selection
Z_THRESHOLD = 1.0
PCTL_THRESHOLD = 85.0

# Clustering
EPS_METERS = 1500.0
MIN_SAMPLES = 6

# Concave envelope (morphological alpha-shape)
ALPHA_M = 1200
MIN_ENVELOPE_POINTS = 5
MIN_POLY_AREA_M2 = 2000  # drop tiny artifacts

# Severity buckets by LST z-score
SEVERE_Z = 2.0
HIGH_Z   = 1.5
ELEV_Z   = 1.0

COLORS = {
    "severe": "#b71c1c",  # dark red
    "high":   "#e53935",  # red
    "elev":   "#fb8c00",  # orange
    "envelope": "#6a1b9a",# purple
}

USER = os.getenv("USER") or os.getenv("USERNAME") or "user"
OUT_HTML = f"/Users/{USER}/Downloads/narayanganj_uhi_hotspots.html"

# ------------------ EE INIT ------------------
def ee_init_headless():
    sa = os.environ["EE_SERVICE_ACCOUNT"]       # ee-runner@<project>.iam.gserviceaccount.com
    key_b64 = os.environ["EE_KEY_B64"]          # base64 of the JSON key

    # Write key to a temp file
    with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
        f.write(base64.b64decode(key_b64).decode("utf-8"))
        key_path = f.name

    creds = ee.ServiceAccountCredentials(sa, key_path)
    ee.Initialize(credentials=creds)

# ------------------ UTILITIES ------------------
def utm_crs_from_bbox(bbox):
    minx, miny, maxx, maxy = bbox
    lon_c = (minx + maxx) / 2.0
    lat_c = (miny + maxy) / 2.0
    zone = int((lon_c + 180) // 6) + 1
    epsg = 32600 + zone if lat_c >= 0 else 32700 + zone
    return f"EPSG:{epsg}"

def aoi_polygon_wgs84():
    minx, miny, maxx, maxy = AOI_BBOX
    return box(minx, miny, maxx, maxy)

def zscores(vals):
    good = [v for v in vals if v is not None and math.isfinite(v)]
    if len(good) < 2: return [0.0 for _ in vals]
    mean = sum(good)/len(good)
    var  = sum((v-mean)**2 for v in good)/len(good)
    std  = math.sqrt(max(var, 1e-12))
    return [0.0 if (v is None or not math.isfinite(v)) else (v-mean)/std for v in vals]

def p_rank(all_vals, v):
    s = sorted(all_vals)
    if not s: return 0.0
    cnt = sum(1 for x in s if x <= v)
    return 100.0 * cnt / len(s)

def haversine_m(lat1, lon1, lat2, lon2):
    R = 6371000.0
    p1, p2 = math.radians(lat1), math.radians(lat2)
    dphi = p2 - p1
    dl = math.radians(lon2 - lon1)
    a = math.sin(dphi/2)**2 + math.cos(p1)*math.cos(p2)*math.sin(dl/2)**2
    return 2*R*math.asin(math.sqrt(a))

def cluster_dbscan(points, eps_m=EPS_METERS, min_samples=MIN_SAMPLES):
    n = len(points)
    if n == 0: return []
    # light spatial bucketing (~1.1km) to prune distance calcs
    buckets = {}
    for i, p in enumerate(points):
        key = (int(p["lat"]/0.01), int(p["lon"]/0.01))
        buckets.setdefault(key, []).append(i)
    visited = [False]*n
    clusters = [-1]*n
    nbrs = [[] for _ in range(n)]
    for key, idxs in buckets.items():
        kx, ky = key
        cand = []
        for dx in (-1,0,1):
            for dy in (-1,0,1):
                cand += buckets.get((kx+dx, ky+dy), [])
        for i in idxs:
            for j in cand:
                if j <= i: continue
                if haversine_m(points[i]["lat"], points[i]["lon"], points[j]["lat"], points[j]["lon"]) <= eps_m:
                    nbrs[i].append(j); nbrs[j].append(i)
    cid = 0
    for i in range(n):
        if visited[i]: continue
        visited[i] = True
        if len(nbrs[i]) + 1 < min_samples:
            clusters[i] = -1; continue
        clusters[i] = cid
        seeds = list(nbrs[i]); k = 0
        while k < len(seeds):
            j = seeds[k]
            if not visited[j]:
                visited[j] = True
                if len(nbrs[j]) + 1 >= min_samples:
                    for q in nbrs[j]:
                        if q not in seeds: seeds.append(q)
            if clusters[j] < 0: clusters[j] = cid
            k += 1
        cid += 1
    return clusters

def ensure_clusters(hotspots):
    clusters = cluster_dbscan(hotspots, eps_m=EPS_METERS, min_samples=MIN_SAMPLES)
    if not any(c >= 0 for c in clusters):
        clusters = cluster_dbscan(hotspots, eps_m=EPS_METERS*1.6, min_samples=max(3, MIN_SAMPLES-2))
    if not any(c >= 0 for c in clusters):
        clusters = [0 for _ in hotspots]
    return clusters

def build_concave_envelopes(hotspots, clusters, metric_crs, alpha_m=ALPHA_M, min_pts=MIN_ENVELOPE_POINTS):
    """Returns dict cid -> list[Polygon in WGS84], tiny parts removed."""
    by_cluster = {}
    for hp, cid in zip(hotspots, clusters):
        if cid < 0: continue
        by_cluster.setdefault(cid, []).append(hp)
    out = {}
    for cid, pts in by_cluster.items():
        if len(pts) < min_pts: continue
        pts_wgs = gpd.GeoSeries([Point(p["lon"], p["lat"]) for p in pts], crs="EPSG:4326").to_crs(metric_crs)
        buf = pts_wgs.buffer(alpha_m)
        merged = unary_union(list(buf.values))
        shell = merged.buffer(-alpha_m)
        geom = shell if not shell.is_empty else merged.convex_hull
        polys = []
        if geom.geom_type == "Polygon": polys=[geom]
        elif geom.geom_type == "MultiPolygon": polys=list(geom.geoms)
        kept = [g for g in polys if float(g.area) >= MIN_POLY_AREA_M2]
        if not kept: continue
        kept_wgs = gpd.GeoSeries(kept, crs=metric_crs).to_crs(epsg=4326).tolist()
        out[cid] = kept_wgs
    return out

def severity_from_z(z):
    if z >= SEVERE_Z: return "severe"
    if z >= HIGH_Z:   return "high"
    if z >= ELEV_Z:   return "elev"
    return None

def z_to_level_text(z):
    if z is None: return "n/a"
    if z >= 2.0: return "Very high (well above typical)"
    if z >= 1.0: return "High (above typical)"
    if z >= 0.5: return "Slightly elevated"
    if z > -0.5: return "Around typical"
    return "Below typical"

def season_bands_today():
    """Return (pre_monsoon, monsoon, post_monsoon) windows as (start_iso,end_iso)."""
    y = date.today().year
    pre = (date(y,3,1),  date(y,5,31))
    mon = (date(y,6,1),  date(y,9,15))
    post= (date(y,9,16), date(y,11,30))
    # convert to string ISO
    return [tuple(map(str, w)) for w in (pre, mon, post)]

# ------------------ EARTH ENGINE IMAGES ------------------
def lst_day_mean(aoi, start_iso, end_iso):
    coll = (ee.ImageCollection("MODIS/061/MOD11A2")
            .filterBounds(aoi).filterDate(start_iso, end_iso)
            .select("LST_Day_1km").map(lambda img: img.updateMask(img.gt(0))))
    lst_c = coll.mean().multiply(0.02).subtract(273.15).rename("lst_day_c").clip(aoi)
    return lst_c

def lst_night_mean(aoi, start_iso, end_iso):
    coll = (ee.ImageCollection("MODIS/061/MOD11A2")
            .filterBounds(aoi).filterDate(start_iso, end_iso)
            .select("LST_Night_1km").map(lambda img: img.updateMask(img.gt(0))))
    lst_c = coll.mean().multiply(0.02).subtract(273.15).rename("lst_night_c").clip(aoi)
    return lst_c

def lst_day_daily_collection(aoi, start_iso, end_iso):
    coll = (ee.ImageCollection("MODIS/061/MOD11A1")
            .filterBounds(aoi).filterDate(start_iso, end_iso)
            .select("LST_Day_1km").map(lambda img: img.updateMask(img.gt(0))))
    return coll

def sentinel2_ndvi_recent(aoi, months_back=6):
    start = date.today() - timedelta(days=months_back*30)
    end   = date.today()
    # S2 SR (surface reflectance), simple cloud mask via QA60
    def mask_s2_sr(img):
        qa = img.select('QA60')
        cloud = qa.bitwiseAnd(1<<10).Or(qa.bitwiseAnd(1<<11))
        return img.updateMask(cloud.eq(0))
    coll = (ee.ImageCollection("COPERNICUS/S2_SR")
            .filterBounds(aoi).filterDate(str(start), str(end))
            .map(mask_s2_sr))
    med = coll.median()
    ndvi = med.normalizedDifference(['B8','B4']).rename('ndvi').clip(aoi)
    return ndvi

def worldcover_map(year=2021):
    # ESA WorldCover v200 (2021/2020). Built-up class = 50, Tree cover = 10
    try:
        img = ee.Image("ESA/WorldCover/v200").select('Map')
        return img
    except Exception:
        try:
            return ee.Image("ESA/WorldCover/v100").select('Map')
        except Exception:
            return None

def population_image(aoi):
    # Prefer WorldPop; fall back to GHSL/GPW if needed
    for yr in [2025, 2023, 2022, 2021, 2020, 2019]:
        try:
            col = (ee.ImageCollection("WorldPop/GP/100m/pop")
                   .filterBounds(aoi).filter(ee.Filter.eq('year', yr)))
            if col.size().getInfo() > 0:
                img = col.mosaic()
                bname = img.bandNames().getInfo()[0]
                return img.select(bname, ["pop"])
        except Exception:
            pass
    try:
        img = ee.Image("JRC/GHSL/P2019/POP_GLOBE_R2019A")
        bands = [b for b in img.bandNames().getInfo() if "2020" in b or "2015" in b]
        if bands: return img.select(bands[0], ["pop"])
    except Exception:
        pass
    try:
        col = ee.ImageCollection("CIESIN/GPWv411/GPW_Population_Count").filter(ee.Filter.eq("year", 2020))
        img = col.first()
        if img:
            b = img.bandNames().getInfo()[0]
            return img.select(b, ["pop"])
    except Exception:
        pass
    return None

def worldpop_children_elderly(aoi):
    """Try to assemble children% and elderly% rasters (defensive). Returns (child_img, elder_img) as fractions 0..1 or (None,None)."""
    # WorldPop has age-sex layers by country; availability varies.
    candidates = [
        "WorldPop/GP/100m/pop_age_sex",  # generic
        "WorldPop/GP/100m/pop_age_sex_cons_unadj",
        "WorldPop/GP/100m/pop_age_sex_unadj"
    ]
    for ds in candidates:
        try:
            col = ee.ImageCollection(ds).filterBounds(aoi)
            if col.size().getInfo() == 0: continue
            # Heuristic: sum young (0-4, 5-9) and elderly (65+). Band names differ; try common patterns.
            first = col.first()
            bands = first.bandNames().getInfo()
            # guess bands
            young_bands = [b for b in bands if any(k in b.lower() for k in ["0","1-4","0-4","5-9"])]
            elder_bands = [b for b in bands if any(k in b.lower() for k in ["65","65-69","70","75","80","85"])]
            total = first.reduce(ee.Reducer.sum())
            young = first.select(young_bands).reduce(ee.Reducer.sum())
            elder = first.select(elder_bands).reduce(ee.Reducer.sum())
            child_frac = young.divide(total).rename("child_frac")
            elder_frac = elder.divide(total).rename("elder_frac")
            return child_frac, elder_frac
        except Exception:
            continue
    return None, None

# ------------------ REDUCERS ------------------
def reduce_mean(image, geom, scale):
    try:
        val = image.reduceRegion(ee.Reducer.mean(), geom, scale=scale,
                                 maxPixels=1e13, bestEffort=True, tileScale=EE_TILE_SCALE)
        b = image.bandNames().getInfo()[0]
        v = val.get(b)
        return float(ee.Number(ee.Algorithms.If(v, v, None)).getInfo())
    except Exception:
        return None

def reduce_sum(image, geom, scale):
    try:
        val = image.reduceRegion(ee.Reducer.sum(), geom, scale=scale,
                                 maxPixels=1e13, bestEffort=True, tileScale=EE_TILE_SCALE)
        v = val.get(image.bandNames().get(0))
        return float(ee.Number(ee.Algorithms.If(v, v, 0)).getInfo())
    except Exception:
        return None

def fraction_of_mask(mask_img, geom, scale):
    """mask_img: 1 where class present, else 0. Returns fraction 0..1."""
    try:
        stats = mask_img.reduceRegion(ee.Reducer.mean(), geom, scale=scale,
                                      maxPixels=1e13, bestEffort=True, tileScale=EE_TILE_SCALE)
        v = stats.get(mask_img.bandNames().get(0))
        return float(ee.Number(ee.Algorithms.If(v, v, 0)).getInfo())
    except Exception:
        return None

# ------------------ OSM ------------------
def osm_geoms_from_polygon(aoi_poly_wgs84, tags_dict):
    ox.settings.use_cache = True
    ox.settings.timeout = 180
    try:
        from osmnx.features import features_from_polygon as osm_features_from_polygon
    except Exception:
        try:
            from osmnx import geometries_from_polygon as osm_features_from_polygon
        except Exception:
            raise SystemExit("OSMnx missing polygon geometries. pip install --upgrade osmnx")
    layers = []
    for k, v in tags_dict.items():
        try:
            g = osm_features_from_polygon(aoi_poly_wgs84, tags={k: v})
            if g is not None and not g.empty:
                layers.append(g)
        except Exception:
            pass
    if not layers:
        return gpd.GeoDataFrame(geometry=[], crs="EPSG:4326")
    base_crs = layers[0].crs or "EPSG:4326"
    all_feats = gpd.GeoDataFrame(gpd.pd.concat(layers, ignore_index=True), crs=base_crs)
    all_feats = all_feats[all_feats.geometry.notna()].copy()
    return all_feats.to_crs(epsg=4326)

def count_sensitive_inside(sens_gdf, polygon):
    if sens_gdf is None or sens_gdf.empty:
        return dict(schools=0, clinics=0, hospitals=0, elder_homes=0)
    try:
        idx = sens_gdf.sindex
        sub = sens_gdf.iloc[list(idx.intersection(polygon.bounds))]
        inside = sub[sub.geometry.intersects(polygon)]
    except Exception:
        inside = sens_gdf[sens_gdf.geometry.intersects(polygon)]
    res = dict(schools=0, clinics=0, hospitals=0, elder_homes=0)
    if inside.empty:
        return res
    if "amenity" in inside.columns:
        res["schools"] = int((inside["amenity"]=="school").sum())
        res["clinics"] = int(((inside["amenity"]=="clinic") | (inside["amenity"]=="doctors")).sum())
        res["hospitals"] = int((inside["amenity"]=="hospital").sum())
    if "social_facility" in inside.columns:
        res["elder_homes"] = int(inside["social_facility"].isin(["nursing_home","assisted_living"]).sum())
    return res

# ------------------ HEAT INDEX ------------------
def heat_index_c_from_t_rh(t_air_c, rh_pct):
    """NOAA HI formula (approx). Inputs: air T in °C, RH in %."""
    if (t_air_c is None) or (rh_pct is None): return None
    # Convert to F
    T = t_air_c * 9/5 + 32.0
    R = max(0.0, min(100.0, rh_pct))
    HI = (-42.379 + 2.04901523*T + 10.14333127*R
          - 0.22475541*T*R - 0.00683783*T*T - 0.05481717*R*R
          + 0.00122874*T*T*R + 0.00085282*T*R*R - 0.00000199*T*T*R*R)
    # adjustments (ignored for brevity)
    hi_c = (HI - 32.0) * 5/9
    return hi_c

# ------------------ MAP ------------------
def build_map(aoi_bbox, hotspots, selected_cluster_polys):
    lat_c = (aoi_bbox[1] + aoi_bbox[3]) / 2.0
    lon_c = (aoi_bbox[0] + aoi_bbox[2]) / 2.0
    m = folium.Map(location=[lat_c, lon_c], zoom_start=12,
                   tiles="cartodbpositron", control_scale=True)

    # Cluster polygons (Top-3)
    for rank, (cid, poly) in enumerate(selected_cluster_polys, start=1):
        folium.GeoJson(
            data=poly.__geo_interface__,
            name=f"Hot zone #{rank} (cluster {cid})",
            style_function=lambda _ : {"color": COLORS["envelope"], "weight": 3, "fillColor": COLORS["envelope"], "fillOpacity": 0.10},
            tooltip=f"Hot zone #{rank} (cluster {cid})"
        ).add_to(m)

    # Hotspot markers only for selected clusters
    kept_cids = {cid for cid, _ in selected_cluster_polys}
    for hp in hotspots:
        if hp.get("_cid") not in kept_cids: continue
        sev = severity_from_z(hp["lst_z"])
        if sev is None: continue
        color = COLORS[sev]
        radius = 6 if sev == "elev" else (8 if sev == "high" else 10)
        folium.CircleMarker(
            location=(hp["lat"], hp["lon"]),
            radius=radius,
            color=color, fill=True, fill_color=color, fill_opacity=0.95,
            tooltip=f"{sev.upper()} UHI hotspot",
            popup=(f"<b>{sev.upper()} UHI hotspot</b><br>"
                   f"Surface temp (day): {hp['lst_c']:.1f} °C<br>"
                   f"Vs city typical: {z_to_level_text(hp['lst_z'])} (z≈{hp['lst_z']:.2f})")
        ).add_to(m)

    MiniMap(toggle_display=True, position="bottomright").add_to(m)
    Fullscreen().add_to(m)
    MousePosition(position="topright", separator=" | ", prefix="Lat/Lon:").add_to(m)
    MeasureControl(position="topright", primary_length_unit='kilometers').add_to(m)

    legend = f"""
    <div style="position: fixed; bottom: 18px; left: 18px; z-index:9999; background: white;
                padding: 10px 12px; border: 1px solid #ccc; border-radius: 6px; font-size: 13px;">
      <b>Urban Heat Island Hotspots</b> (last {DAYS_BACK} days)<br>
      <span style="display:inline-block;width:12px;height:12px;background:{COLORS['severe']};border:1px solid {COLORS['severe']};"></span>
      Severe (≥{SEVERE_Z}σ) &nbsp;
      <span style="display:inline-block;width:12px;height:12px;background:{COLORS['high']};border:1px solid {COLORS['high']};"></span>
      High ({HIGH_Z}–{SEVERE_Z}σ) &nbsp;
      <span style="display:inline-block;width:12px;height:12px;background:{COLORS['elev']};border:1px solid {COLORS['elev']};"></span>
      Elevated ({ELEV_Z}–{HIGH_Z}σ)
    </div>
    """
    m.get_root().html.add_child(folium.Element(legend))
    folium.LayerControl(collapsed=False).add_to(m)
    return m

# ------------------ MAIN ------------------
def main():
    print("Initializing Earth Engine…")
    ee_init_headless()
    aoi = ee.Geometry.Rectangle(AOI_BBOX)
    start_iso, end_iso = str(START), str(END)
    print(f"AOI: {AOI_BBOX} | Window: {start_iso} → {end_iso}")

    # Day & night LST means
    lst_day_img   = lst_day_mean(aoi, start_iso, end_iso)
    lst_night_img = lst_night_mean(aoi, start_iso, end_iso)

    # Sample grid for clustering
    fc = lst_day_img.sample(region=aoi, scale=SCALE_M, geometries=True)
    feats = fc.limit(MAX_POINTS).getInfo().get("features", [])
    rows = []
    for f in feats:
        geom = f.get("geometry", {})
        if geom.get("type") != "Point": continue
        lon, lat = geom["coordinates"]
        v = f.get("properties", {}).get("lst_day_c", None)
        if v is None or not math.isfinite(v): continue
        rows.append({"lat": float(lat), "lon": float(lon), "lst_c": float(v)})

    if not rows:
        raise SystemExit("No samples. Try increasing DAYS_BACK or MAX_POINTS.")

    # z-scores & pick hotspots
    lst_vals = [r["lst_c"] for r in rows]
    lst_z = zscores(lst_vals)
    pcts  = [p_rank(lst_z, v) for v in lst_z]
    hotspots = []
    for r, z, pr in zip(rows, lst_z, pcts):
        if (z >= Z_THRESHOLD) or (pr >= PCTL_THRESHOLD):
            hotspots.append({"lat": r["lat"], "lon": r["lon"], "lst_c": r["lst_c"], "lst_z": z, "percentile": pr})
    hotspots.sort(key=lambda x: x["lst_z"], reverse=True)

    # Cluster & envelopes
    clusters = ensure_clusters(hotspots)
    for hp, cid in zip(hotspots, clusters):
        hp["_cid"] = cid
    metric_crs = utm_crs_from_bbox(AOI_BBOX)
    envelopes_by_cid = build_concave_envelopes(hotspots, clusters, metric_crs, alpha_m=ALPHA_M, min_pts=MIN_ENVELOPE_POINTS)
    if not envelopes_by_cid:
        envelopes_by_cid = build_concave_envelopes(hotspots, [0]*len(hotspots), metric_crs, alpha_m=ALPHA_M, min_pts=3)

    # Union per cluster & areas
    cluster_union = {}
    cluster_area_km2 = {}
    for cid, polys in envelopes_by_cid.items():
        if not polys: continue
        polys_proj = gpd.GeoSeries(polys, crs="EPSG:4326").to_crs(metric_crs)
        union_geom = unary_union(list(polys_proj.values))
        area_km2 = float(union_geom.area / 1e6)
        if area_km2 <= 0: continue
        union_wgs = gpd.GeoSeries([union_geom], crs=metric_crs).to_crs(epsg=4326).iloc[0]
        cluster_union[cid] = union_wgs
        cluster_area_km2[cid] = area_km2

    # Top-3 by area
    top_cids = sorted(cluster_area_km2.keys(), key=lambda c: cluster_area_km2[c], reverse=True)[:3]
    selected = [(cid, cluster_union[cid]) for cid in top_cids]

    # ---- Ancillary datasets ----
    wc = worldcover_map(year=2021)
    ndvi_img = sentinel2_ndvi_recent(aoi, months_back=6)
    pop_img  = population_image(aoi)
    child_img, elder_img = worldpop_children_elderly(aoi)  # may be None

    # ERA5 (air temp & dewpoint for heat index proxy)
    try:
        era5 = ee.ImageCollection("ECMWF/ERA5/DAILY").filterDate(start_iso, end_iso).filterBounds(aoi)
        t2m = era5.select("mean_2m_air_temperature").mean().subtract(273.15).rename("t2m_c").clip(aoi)
        td2m = era5.select("mean_2m_dewpoint_temperature").mean().subtract(273.15).rename("td2m_c").clip(aoi)
    except Exception:
        t2m = None; td2m = None

    # Season windows
    (pre_s, pre_e), (mon_s, mon_e), (post_s, post_e) = season_bands_today()
    lst_pre  = lst_day_mean(aoi, pre_s,  pre_e)
    lst_mon  = lst_day_mean(aoi, mon_s,  mon_e)
    lst_post = lst_day_mean(aoi, post_s, post_e)

    # Extreme hot periods (use 8-day MOD11A2; count # composites above 90th pct over AOI)
    coll_8d = (ee.ImageCollection("MODIS/061/MOD11A2")
               .filterBounds(aoi).filterDate(start_iso, end_iso)
               .select("LST_Day_1km").map(lambda img: img.updateMask(img.gt(0))))
    # 90th percentile over AOI
    try:
        pct90 = coll_8d.reduce(ee.Reducer.percentile([90])).multiply(0.02).subtract(273.15).rename("p90")
    except Exception:
        pct90 = None

    # OSM: buildings, sensitive sites, water
    aoi_poly = aoi_polygon_wgs84()
    try:
        buildings = osm_geoms_from_polygon(aoi_poly, {"building": True})
    except Exception:
        buildings = gpd.GeoDataFrame(geometry=[], crs="EPSG:4326")
    try:
        sensitive = osm_geoms_from_polygon(aoi_poly, {"amenity": ["school","clinic","hospital","doctors"],
                                                      "social_facility": ["nursing_home","assisted_living"]})
    except Exception:
        sensitive = gpd.GeoDataFrame(geometry=[], crs="EPSG:4326")
    try:
        water = osm_geoms_from_polygon(aoi_poly, {"natural": ["water"], "waterway": ["river","canal"], "landuse": ["reservoir"]})
    except Exception:
        water = gpd.GeoDataFrame(geometry=[], crs="EPSG:4326")

    # Print summaries
    print("\n================= Top UHI clusters (area-wise) =================")
    print("(z (σ): standardized vs city typical; 0 ≈ typical)")

    # Helpers for class fractions
    def frac_worldcover(mask_vals, poly):
        if wc is None: return None
        geom = ee.Geometry(poly.__geo_interface__)
        # allow list or int
        mask = None
        if isinstance(mask_vals, (list, tuple)):
            mask = wc.remap(mask_vals, [1]*len(mask_vals), 0).rename("m")
        else:
            mask = wc.eq(mask_vals).rename("m")
        return fraction_of_mask(mask, geom, scale=20)

    # Loop over selected clusters
    for rank, (cid, poly) in enumerate(selected, start=1):
        # Geometry conversions
        poly_series = gpd.GeoSeries([poly], crs="EPSG:4326").to_crs(utm_crs_from_bbox(AOI_BBOX))
        area_km2 = float(poly_series.area.iloc[0] / 1e6)
        geom = ee.Geometry(poly.__geo_interface__)

        # Population & vulnerability
        pop_sum = reduce_sum(pop_img, geom, scale=100) if pop_img is not None else None
        child_pct = None; elder_pct = None
        if (child_img is not None) and (elder_img is not None):
            child_mean = reduce_mean(child_img, geom, scale=100)
            elder_mean = reduce_mean(elder_img, geom, scale=100)
            child_pct = None if child_mean is None else (100.0*child_mean)
            elder_pct = None if elder_mean is None else (100.0*elder_mean)

        # Impervious & canopy; NDVI mean
        imperv_pct = None
        tree_pct = None
        if wc is not None:
            imperv_pct = None if (frac_worldcover(50, poly) is None) else (100.0 * frac_worldcover(50, poly))
            tree_pct   = None if (frac_worldcover(10, poly) is None) else (100.0 * frac_worldcover(10, poly))
        ndvi_mean = reduce_mean(ndvi_img, geom, scale=20) if ndvi_img is not None else None

        # Buildings: roof area, cool-roof potential, height/density proxy
        large_roof_threshold_m2 = 500.0
        b_in = gpd.GeoDataFrame(geometry=[], crs="EPSG:4326")
        try:
            if buildings is not None and not buildings.empty:
                try:
                    idx = buildings.sindex
                    cand = buildings.iloc[list(idx.intersection(poly.bounds))]
                    b_in = cand[cand.geometry.intersects(poly)].copy()
                except Exception:
                    b_in = buildings[buildings.geometry.intersects(poly)].copy()
        except Exception:
            pass
        roof_area_m2 = 0.0
        large_roofs_m2 = 0.0
        large_roofs_n  = 0
        mean_levels = None
        if b_in is not None and not b_in.empty:
            b_proj = b_in.to_crs(utm_crs_from_bbox(AOI_BBOX))
            areas = b_proj.geometry.area.fillna(0.0)
            roof_area_m2 = float(areas.sum())
            large_mask = areas >= large_roof_threshold_m2
            large_roofs_m2 = float(areas[large_mask].sum())
            large_roofs_n  = int(large_mask.sum())
            # height/levels proxy
            lvl = []
            for _, r in b_in.iterrows():
                lv = r.get("building:levels") or r.get("levels") or r.get("height")
                try:
                    if isinstance(lv, str) and "m" in lv: lv = lv.replace("m","").strip()
                    lvf = float(lv)
                    # if height in meters, roughly convert to levels (3 m/level)
                    if lvf > 40: lvf = lvf / 3.0
                    lvl.append(lvf)
                except Exception:
                    continue
            if lvl: mean_levels = sum(lvl)/len(lvl)

        # Informal housing proxy: roof m² per person (lower → denser/informal)
        informal_proxy = None
        if (pop_sum is not None) and pop_sum > 0 and roof_area_m2 > 0:
            m2_per_person = roof_area_m2 / pop_sum
            informal_proxy = ("likely informal/very dense" if m2_per_person < 8.0
                              else "medium density" if m2_per_person < 20.0
                              else "lower density")
        # Sensitive sites inside
        sens = count_sensitive_inside(sensitive, poly)

        # Water distance
        dist_water_m = None
        if water is not None and not water.empty:
            try:
                w_proj = water.to_crs(utm_crs_from_bbox(AOI_BBOX))
                p_proj = poly_series.iloc[0]
                dist_water_m = float(w_proj.distance(p_proj).min())
            except Exception:
                dist_water_m = None

        # Day/Night means & delta
        day_c   = reduce_mean(lst_day_img,   geom, scale=1000)
        night_c = reduce_mean(lst_night_img, geom, scale=1000)
        dn_delta = None if (day_c is None or night_c is None) else (day_c - night_c)

        # Seasonality (pre/monsoon/post)
        pre_c  = reduce_mean(lst_pre,  geom, scale=1000)
        mon_c  = reduce_mean(lst_mon,  geom, scale=1000)
        post_c = reduce_mean(lst_post, geom, scale=1000)

        # Extreme hot periods (8-day composites above AOI 90th pct)
        extreme_cnt = None
        if pct90 is not None:
            try:
                # Count composites whose mean over poly > p90 over AOI
                # Build list of images in period
                imgs = coll_8d.toList(coll_8d.size())
                n = int(coll_8d.size().getInfo())
                cnt = 0
                thr_c = reduce_mean(pct90, ee.Geometry(aoi), scale=1000)
                for k in range(n):
                    im = ee.Image(imgs.get(k)).multiply(0.02).subtract(273.15).rename("c")
                    mval = reduce_mean(im, geom, scale=1000)
                    if (mval is not None) and (thr_c is not None) and (mval > thr_c):
                        cnt += 1
                extreme_cnt = cnt
            except Exception:
                extreme_cnt = None

        # Heat index proxy (ERA5)
        hi_c = None
        if (t2m is not None) and (td2m is not None):
            t_mean = reduce_mean(t2m, geom, scale=9000)
            td_mean= reduce_mean(td2m, geom, scale=9000)
            if (t_mean is not None) and (td_mean is not None):
                # RH from T & Td (Magnus)
                T = t_mean
                Td = td_mean
                es = 6.1094 * math.exp(17.625*Td/(243.04+Td))
                e  = 6.1094 * math.exp(17.625*T /(243.04+T))
                rh = max(0.0, min(100.0, 100.0*es/e)) if e > 0 else None
                hi_c = heat_index_c_from_t_rh(T, rh) if rh is not None else None

        # Print (plain language)
        print(f"\nHot zone #{rank} (cluster {cid})")
        print(f"• Area: ~{area_km2:.2f} km²")
        print(f"• People living inside: {(f'{int(pop_sum):,}' if pop_sum is not None else 'n/a')}")
        if (child_pct is not None) or (elder_pct is not None):
            ch = f"{child_pct:.1f}%" if child_pct is not None else "n/a"
            el = f"{elder_pct:.1f}%" if elder_pct is not None else "n/a"
            print(f"• Vulnerable groups: children {ch}, elderly {el}")
        else:
            print(f"• Vulnerable groups: n/a")
        if informal_proxy is not None:
            print(f"• Density / informal proxy: {informal_proxy}")
        else:
            print(f"• Density / informal proxy: n/a")

        imp = f"{imperv_pct:.1f}%" if imperv_pct is not None else "n/a"
        trp = f"{tree_pct:.1f}%" if tree_pct is not None else "n/a"
        ndv = f"{ndvi_mean:.2f}" if ndvi_mean is not None else "n/a"
        print(f"• Surfaces: impervious {imp} | tree canopy {trp} | mean NDVI {ndv}")

        print(f"• Roof area total: {int(roof_area_m2):,} m² | large roofs: {int(large_roofs_n)} bldgs / {int(large_roofs_m2):,} m² (cool-roof potential)")
        if mean_levels is not None:
            dens = (roof_area_m2 / (area_km2*1e6)) if area_km2>0 else None
            dens_txt = f"{dens*100:.1f}% footprint cover" if dens is not None else "n/a"
            print(f"• Building height/density: mean levels ≈ {mean_levels:.1f} | {dens_txt}")
        else:
            print(f"• Building height/density: n/a")

        if dist_water_m is not None:
            print(f"• Nearest water: ~{dist_water_m:.0f} m → blue-corridor greening potential")
        else:
            print(f"• Nearest water: n/a")

        day_txt = f"{day_c:.1f} °C" if day_c is not None else "n/a"
        night_txt = f"{night_c:.1f} °C" if night_c is not None else "n/a"
        dnd_txt = (f"{dn_delta:+.1f} °C (day − night)" if dn_delta is not None else "n/a")
        print(f"• Day vs Night: day {day_txt} | night {night_txt} | Δ {dnd_txt}")

        seas_txt = []
        seas_txt.append(f"pre-monsoon {pre_c:.1f} °C" if pre_c is not None else "pre-monsoon n/a")
        seas_txt.append(f"monsoon {mon_c:.1f} °C" if mon_c is not None else "monsoon n/a")
        seas_txt.append(f"post-monsoon {post_c:.1f} °C" if post_c is not None else "post-monsoon n/a")
        print("• Seasonality: " + " | ".join(seas_txt))

        if extreme_cnt is not None:
            print(f"• Extreme hot periods (8-day composites above local 90th pct): {extreme_cnt}")
        else:
            print(f"• Extreme hot periods: n/a")

        if hi_c is not None:
            print(f"• Heat index (air temp + humidity proxy): ~{hi_c:.1f} °C")
        else:
            print(f"• Heat index: n/a")

        # Sensitive sites
        print(f"• Sensitive sites: schools:{sens.get('schools',0)}, clinics:{sens.get('clinics',0)}, "
              f"hospitals:{sens.get('hospitals',0)}, elder homes:{sens.get('elder_homes',0)}")

    # Map (Top-3 only)
    m = build_map(AOI_BBOX, hotspots, selected)
    os.makedirs(os.path.dirname(OUT_HTML), exist_ok=True)
    m.save(OUT_HTML)
    print(f"\n✅ Saved UHI map to: {OUT_HTML}\nOpen in your browser to explore.")

if __name__ == "__main__":
    main()


Initializing Earth Engine…
AOI: [90.32, 23.7, 90.52, 23.86] | Window: 2025-07-30 → 2025-09-28


  overpass_settings = _make_overpass_settings()
  yield _overpass_request(data={"data": query_str})
  this_pause = _get_overpass_pause(overpass_endpoint)
  overpass_settings = _make_overpass_settings()
  yield _overpass_request(data={"data": query_str})
  overpass_settings = _make_overpass_settings()
  yield _overpass_request(data={"data": query_str})
  overpass_settings = _make_overpass_settings()
  yield _overpass_request(data={"data": query_str})
  this_pause = _get_overpass_pause(overpass_endpoint)
  overpass_settings = _make_overpass_settings()
  yield _overpass_request(data={"data": query_str})
  this_pause = _get_overpass_pause(overpass_endpoint)
  overpass_settings = _make_overpass_settings()
  yield _overpass_request(data={"data": query_str})
  this_pause = _get_overpass_pause(overpass_endpoint)



(z (σ): standardized vs city typical; 0 ≈ typical)

Hot zone #1 (cluster 1)
• Area: ~31.10 km²
• People living inside: n/a
• Vulnerable groups: children 23.3%, elderly 1.4%
• Density / informal proxy: n/a
• Surfaces: impervious n/a | tree canopy n/a | mean NDVI n/a
• Roof area total: 0 m² | large roofs: 0 bldgs / 0 m² (cool-roof potential)
• Building height/density: n/a
• Nearest water: ~0 m → blue-corridor greening potential
• Day vs Night: day 33.1 °C | night n/a | Δ n/a
• Seasonality: pre-monsoon 31.9 °C | monsoon 33.1 °C | post-monsoon n/a
• Extreme hot periods (8-day composites above local 90th pct): 1
• Heat index: n/a
• Sensitive sites: schools:319, clinics:78, hospitals:95, elder homes:1

Hot zone #2 (cluster 0)
• Area: ~4.83 km²
• People living inside: n/a
• Vulnerable groups: children 23.1%, elderly 1.6%
• Density / informal proxy: n/a
• Surfaces: impervious n/a | tree canopy n/a | mean NDVI n/a
• Roof area total: 0 m² | large roofs: 0 bldgs / 0 m² (cool-roof potential)
• Bu

# Satellite data this script uses (and what for)

* **MODIS LST (Land Surface Temperature)**
  **IDs:** `MODIS/061/MOD11A2` (8-day, 1 km) — bands `LST_Day_1km`, `LST_Night_1km`
  **Used for:**

  * **Daytime & nighttime LST means** (last `DAYS_BACK` days) and **day–night delta** inside each hot zone.
  * **Seasonality** (pre-monsoon / monsoon / post-monsoon) via mean daytime LST in those windows.
  * **Extreme hot periods**: count of 8-day composites with polygon-mean LST above the **AOI 90th percentile**.

* **Sentinel-2 SR (MSI)**
  **ID:** `COPERNICUS/S2_SR`
  **Used for:** **NDVI** (from bands **B8** NIR and **B4** red) to report **mean NDVI** inside each hot zone.

* **ESA WorldCover**
  **IDs:** `ESA/WorldCover/v200` (fallback `v100`) — band `Map`
  **Used for:** **Impervious surface %** (class **50**, built-up) and **Tree canopy %** (class **10**) within each polygon.

* **WorldPop (Population)**
  **ID:** `WorldPop/GP/100m/pop` (fallbacks: `JRC/GHSL/P2019/POP_GLOBE_R2019A`, `CIESIN/GPWv411/GPW_Population_Count`)
  **Used for:** **Population inside** each hot zone (sum of residents).

* **WorldPop age/sex (if available)**
  **IDs:** `WorldPop/GP/100m/pop_age_sex*`
  **Used for:** **Children %** and **Elderly %** (vulnerability overlay) inside each polygon.

> **Non-satellite context (for completeness):**
> **OpenStreetMap** is used for **buildings/roof area**, **sensitive sites** (schools/clinics/hospitals/elder homes), **distance to water**, and a simple **density/informal housing proxy** (roof m² per person).


# Prompt Design

You are an urban planner tasked with analyzing candidate sites, pollution/heat clusters, and urban heat island (UHI) zones.
Your role is to generate actionable, clear, and detailed urban planning guidelines for decision-making.
Write your response in structured bullet points so it is easy to read.

📥 Input (context provided to you will look like this):

Candidate sites with details:

Coordinates (Lat, Lon: {latitude}, {longitude})

Water access: {water proximity, wetness history}

Soil: {pH, clay %, sand %, SOC g/kg, notes}

Terrain: {HAND proxy, slope, low-lying risk}

Heat: {temperature ranges, seasonality}

Urban form: {building coverage, road density}

Pollution/Cluster analysis:

Cluster ID: {cluster_id}

Area: {size km²}

Current pollution/heat level: {z-score, relative to city typical}

Seasonality: {better/worse in monsoon/dry season, Δ value}

Sensitive sites: {schools, clinics, hospitals, elder homes}

Industrial/point-source features: {list of sources}

Urban Heat Island (UHI) hot zones:

Hot zone ID: {zone_id}

Area: {size km²}

Vulnerable groups: {children %, elderly %}

Surfaces: {impervious %, tree canopy %, NDVI}

Roofs: {total m², large roofs potential m²}

Building density: {footprint cover %, height levels}

Nearest water: {m distance}

Temperature: {day °C, night °C, seasonality, extremes}

Sensitive sites: {schools, hospitals, clinics, elder homes}

📤 Expected Output (LLM should generate):

For each site/cluster/zone, provide:

1. Suitability Assessment

Strengths (what makes this site suitable for development/greenery)

Weaknesses (risks, vulnerabilities, missing data)

Key environmental and social concerns

2. Urban Planning Recommendations

Land-use suggestions (e.g., micro-park, housing, water retention, blue-green corridor)

Infrastructure needs (roads, drainage, soil remediation, water treatment)

Heat and pollution mitigation measures (e.g., tree planting, cool roofs, water buffers)

Social considerations (protecting schools, clinics, vulnerable populations)

3. Decision Guidelines

Is this site/cluster recommended for development, conservation, or monitoring?

Priority level (High/Medium/Low)

Trade-offs (e.g., risk of flooding vs. community need)

📌 Style Instructions:

Always act as an expert urban planner.

Write in clear, structured bullet points.

Provide detailed, evidence-based reasoning.

Highlight practical actions that local governments/NGOs could implement.