# Geospatial Heatmap Data Visualization

Configure paths and column names:

In [1]:
import os

# paths
NFHS_DATA_PATH          = os.path.abspath(os.path.join("..", "subphenotype", "data", "clean.csv"))
SUBPHENOTYPES_DATA_PATH = os.path.abspath(os.path.join("..", "subphenotype", "out", "subphenotypes.csv"))
GENETICS_DATA_PATH      = os.path.abspath(os.path.join("..", "genetic", "data", "state_scores.csv"))
GEOJSON_PATH            = os.path.abspath(os.path.join("..", "subphenotype", "data", "assets", "districts.geojson"))

# column names
STATE_COL          = "State/UT"
DISTRICT_COL       = "District"
PRIORITY_SCORE_COL = "GLP1_Focused_Priority_Score"
GENETIC_RISK_COL   = "Priority Score"
AFFLUENCE_COL      = "Population in Highest Wealth Quintile (%)"
INSURANCE_COL      = "Households with Health Insurance / Financing Scheme Coverage (%)"

# geojson field names
DISTRICT_GJ  = "district"
STATE_GJ     = "st_nm"

# output path
HTML_PATH = os.path.abspath(os.path.join(".", "out", "map.html"))

Construct district name alias mapping:

In [2]:
# alias map to reconcile common spelling variants between CSV and GeoJSON
ALIASES = {
    "aizawl":  "aizawal",
    "belgaum": "belagavi",
    "aravali": "aravalli"
}

Construct CSV geo-identifier $\to$ priority score lookup:

In [3]:
import pandas as pd


def norm(s:str) -> str:
    if s is None: return ""
    
    return str(s).strip().lower()


df_subphenotype = pd.read_csv(SUBPHENOTYPES_DATA_PATH)

# build normalized matching keys
df_subphenotype["district_key"] = df_subphenotype[DISTRICT_COL].map(norm).map(lambda x: ALIASES.get(x, x))
df_subphenotype["state_key"]    = df_subphenotype[STATE_COL].map(norm)

# build a lookup from (district_key, state_key) -> score
subphenotype_lookup = { (r["district_key"], r["state_key"]): r[PRIORITY_SCORE_COL] for _, r in df_subphenotype.dropna(subset=[PRIORITY_SCORE_COL]).iterrows() }

Construct genetic data lookup:

In [4]:
df_genetic = pd.read_csv(GENETICS_DATA_PATH)

# build normalized matching keys
df_genetic["state_key"] = df_genetic[STATE_COL].map(norm)

# build a lookup from (district_key, state_key) -> score
genetic_lookup = { r["state_key"]: r[GENETIC_RISK_COL] for _, r in df_genetic.dropna(subset=[GENETIC_RISK_COL]).iterrows() }

Construct affluence/adoption data lookup:

In [5]:
import numpy as np


def z_norm(series:pd.Series) -> pd.Series:
	"""Mean-impute and z-score. If std==0 or NaN, return zeros."""
	
	s  = series.copy()
	mu = s.mean(skipna=True)
	s  = s.fillna(mu)
	
	std = s.std(skipna=True, ddof=0)
	
	if pd.isna(std) or std == 0: return pd.Series(np.zeros(len(s)), index=s.index)
	
	return (s - mu) / std


df_adoption = pd.read_csv(NFHS_DATA_PATH)[[STATE_COL, DISTRICT_COL, AFFLUENCE_COL, INSURANCE_COL]]

affluence_norm = z_norm(df_adoption[AFFLUENCE_COL])
insurance_norm = z_norm(df_adoption[INSURANCE_COL])

df_adoption["Adoption Score"] = 0.7*affluence_norm + 0.3*insurance_norm

# build normalized matching keys
df_adoption["district_key"] = df_adoption[DISTRICT_COL].map(norm).map(lambda x: ALIASES.get(x, x))
df_adoption["state_key"]    = df_adoption[STATE_COL].map(norm)

# build a lookup from (district_key, state_key) -> score
insurance_lookup = { (r["district_key"], r["state_key"]): r[INSURANCE_COL] for _, r in df_adoption.dropna(subset=[INSURANCE_COL]).iterrows() }
adoption_lookup  = { (r["district_key"], r["state_key"]): r["Adoption Score"] for _, r in df_adoption.dropna(subset=["Adoption Score"]).iterrows() }

Match subphenotype-based priority, genetic risk, and adoption scores from CSV, and add to GeoJSON features:

In [6]:
import json

# load .geojson
with open(GEOJSON_PATH, "r", encoding="utf-8") as f:
    geo = json.load(f)

for feat in geo["features"]:
    props = feat.get("properties", {})
    
    d = norm(props.get(DISTRICT_GJ, ""))
    s = norm(props.get(STATE_GJ, ""))

    subphenotype = subphenotype_lookup.get((d, s))
    genetic      = genetic_lookup.get(s)
    adoption     = adoption_lookup.get((d, s))
    insurance    = insurance_lookup.get((d, s))

    props["glp1_score"]      = float(subphenotype) if subphenotype else None
    props["genetic_risk"]    = float(genetic) if genetic else None
    props["adoption_score"]  = float(adoption) if adoption else None
    props["insurance_score"] = float(insurance) if insurance else None
    
    feat["properties"] = props

Draw heatmap:

In [7]:
import folium
from folium.features import GeoJsonTooltip
from branca.colormap import linear
import numpy as np

DEFAULT_STYLE = {"fillOpacity": 0, "weight": 0.2, "fillColor": "#cccccc", "color": "#555555"}


def minmax_from_geo(geo, key):
    vals = [
        float(f["properties"].get(key))
        for f in geo["features"]
        if f["properties"].get(key) is not None
        and not (isinstance(f["properties"].get(key), float) and np.isnan(f["properties"].get(key)))
    ]
    
    return (min(vals), max(vals)) if vals else (0.0, 1.0)


def feature_style(feature, *, cmap, fill_opacity=0.5, weight=0.3, border="#333"):
    val = feature["properties"].get("__value__")

    if val is None or (isinstance(val, float) and np.isnan(val)): return DEFAULT_STYLE
    return {"fillOpacity": fill_opacity, "weight": weight, "fillColor": cmap(float(val)), "color": border}


layers = [
    ("GLP1 Priority by District",          "glp1_score",     "GLP1 Score"),
    ("Genetic Risk by State",              "genetic_risk",   "Genetic Risk"),
    ("Adoption Priority",                  "adoption_score", "Adoption Score"),
    ("Household with Insurance Coverage",  "insurance_score", "Insurance Score"),
]

m = folium.Map(location=[22.9734, 78.6569], zoom_start=5, tiles="cartodbpositron")

for name, key, alias in layers:
    vmin, vmax = minmax_from_geo(geo, key)
    cmap = linear.YlOrRd_09.scale(vmin, vmax); cmap.caption = alias

    gcopy = {"type": geo["type"], "features": []}

    for f in geo["features"]:
        pf = {**f["properties"], "__value__": f["properties"].get(key)}
        gcopy["features"].append({**f, "properties": pf})

    folium.GeoJson(
        data=gcopy,
        name=name,
        style_function=lambda feat, cm=cmap: feature_style(feat, cmap=cm, fill_opacity=0.5, weight=0.3),
        tooltip=GeoJsonTooltip(fields=[DISTRICT_GJ, STATE_GJ, key], aliases=["District", "State/UT", alias], sticky=True, labels=True),
    ).add_to(m)

    cmap.add_to(m)

folium.LayerControl().add_to(m)

<folium.map.LayerControl at 0x7fee12a46900>

Export heatmap to interactive HTML:

In [8]:
OUT_DIR  = os.path.abspath(os.path.join(".", "out"))
OUT_PATH = os.path.join(OUT_DIR, "heatmap.html")

# create if doesn't exist
os.makedirs(OUT_DIR, exist_ok=True)

m.save(OUT_PATH)

print(f"Saved: {OUT_PATH}")

Saved: /home/aarush/Projects/NN_Hackathon/visualization/out/heatmap.html
