In [None]:

import geopandas as gpd
import pandas as pd
from shapely.geometry import box
import numpy as np
from pathlib import Path
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s:%(message)s')

BASE = Path.cwd().parent if Path.cwd().name == 'notebooks' else Path.cwd()
PROCESSED_DIR = BASE / "data" / "processed"
GEO_DIR = BASE / "geo"
OUTPUTS = BASE / "outputs"
MAPS = OUTPUTS / "maps"
OUTPUTS.mkdir(parents=True, exist_ok=True)
MAPS.mkdir(parents=True, exist_ok=True)

daily_csv = PROCESSED_DIR / "delhi_daily_grid.csv"
df = pd.read_csv(daily_csv, parse_dates=['date'])
logging.info("Loaded daily grid rows=%d", len(df))

# --- helper functions ---
def compute_rh(temp_c, dewpoint_c):
    es = 6.11 * np.exp((17.27 * temp_c) / (237.3 + temp_c))
    e = 6.11 * np.exp((17.27 * dewpoint_c) / (237.3 + dewpoint_c))
    rh = 100.0 * (e / es)
    return np.clip(rh, 0, 100)

def heat_index_celsius(T, RH):
    T_F = T * 9/5 + 32
    HI_F = (-42.379
            + 2.04901523 * T_F
            + 10.14333127 * RH
            - 0.22475541 * T_F * RH
            - 0.00683783 * T_F * T_F
            - 0.05481717 * RH * RH
            + 0.00122874 * T_F * T_F * RH
            + 0.00085282 * T_F * RH * RH
            - 0.00000199 * T_F * T_F * RH * RH)
    return (HI_F - 32) * 5/9

# --- derive RH and heat index for each grid/day ---
if 'heat_index' not in df.columns:
    if {'temp_mean','dewpoint_mean'} <= set(df.columns):
        df['RH'] = compute_rh(df['temp_mean'], df['dewpoint_mean'])
        df['heat_index'] = heat_index_celsius(df['temp_mean'], df['RH'])
        logging.info("Computed RH and heat_index from temp_mean/dewpoint_mean")
    else:
        raise KeyError("Missing columns to compute heat_index (need temp_mean and dewpoint_mean)")
else:
    logging.info("heat_index column already present")

# --- average heat index per grid cell ---
cell_stats = df.groupby(['lat_bin','lon_bin']).agg(mean_hi=('heat_index','mean')).reset_index()
logging.info("Aggregated to %d grid cells", len(cell_stats))


In [None]:

# Build polygons around each (lat_bin, lon_bin) center
unique_lats = sorted(cell_stats['lat_bin'].unique())
unique_lons = sorted(cell_stats['lon_bin'].unique())
lat_step = np.median(np.diff(unique_lats)) if len(unique_lats) > 1 else 0.25
lon_step = np.median(np.diff(unique_lons)) if len(unique_lons) > 1 else 0.25
lat_half = lat_step / 2
lon_half = lon_step / 2

cells = []
for idx, row in cell_stats.iterrows():
    lat = row['lat_bin']
    lon = row['lon_bin']
    geom = box(lon - lon_half, lat - lat_half, lon + lon_half, lat + lat_half)
    cells.append({'geometry': geom, 'cell_id': idx, 'mean_hi': row['mean_hi']})

grid = gpd.GeoDataFrame(cells, crs="EPSG:4326")
grid.to_file(GEO_DIR / "dwarka_grid_risk.geojson", driver="GeoJSON")
logging.info("Saved grid geojson to %s", GEO_DIR / "dwarka_grid_risk.geojson")


In [None]:

# Quick Folium map preview
import folium
from branca.colormap import linear

center_lat = grid.geometry.centroid.y.mean()
center_lon = grid.geometry.centroid.x.mean()
colormap = linear.YlOrRd_09.scale(grid['mean_hi'].min(), grid['mean_hi'].max())

m = folium.Map(location=[center_lat, center_lon], zoom_start=11, tiles="cartodbpositron")
folium.Choropleth(
    geo_data=grid,
    data=grid,
    columns=['cell_id','mean_hi'],
    key_on='feature.properties.cell_id',
    fill_color='YlOrRd',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Mean Heat Index'
).add_to(m)
colormap.caption = 'Mean Heat Index'
colormap.add_to(m)

MAPS.mkdir(parents=True, exist_ok=True)
m.save(MAPS / "dwarka_grid_risk_map.html")
logging.info("Saved folium map at %s", MAPS / "dwarka_grid_risk_map.html")
