In [10]:
import pandas as pd
import pandas as pd
import plotly.express as px
import json
from district_to_state import district_to_state
import dash
from dash import dcc, html, Input, Output, State
import dash_bootstrap_components as dbc
from shapely.geometry import shape
from dash import dcc, html

In [11]:

# Load the dataset
df = pd.read_csv('../data/merged/district_forecast_2025_2030_all_fields.csv')

# Ensure correct data types
df['Year'] = df['Year'].astype(int)

with open('../data/merged/map.geojson', 'r') as f:
    geojson = json.load(f)


In [3]:

# Normalize district names in both data and GeoJSON
df['District'] = df['District'].str.strip().str.lower()

print(geojson['features'][0]['properties'].keys())

for feature in geojson['features']:
    feature['properties']['DISTRICT'] = feature['properties']['DISTRICT'].strip().lower()

# Map state names (normalize keys too)
district_to_state_lower = {k.strip().lower(): v for k, v in district_to_state.items()}
district_to_state_lower.update({
    'andaman islands': 'Andaman & Nicobar Island',
    'barabanki': 'Uttar Pradesh',
    'leh (ladakh)': 'Jammu and Kashmir',
    'mumbai (suburban)': 'Maharashtra',
    'ri bhoi': 'Meghalaya',
    'sant ravidas nagar bhadohi': 'Uttar Pradesh',
    'senapati': 'Manipur',
    'south  twenty four parganas': 'West Bengal',
})
df['State'] = df['District'].map(district_to_state_lower)

# Debug: print districts not matched to a state
unmatched = df[df['State'].isna()]['District'].unique()
if len(unmatched) > 0:
    print("⚠️ Districts not matched to any state:")
    print(unmatched)
else:
    print("✅ All districts matched to states successfully.")

# Debug: print unmatched districts in the GeoJSON
geojson_districts = set(f['properties']['DISTRICT'] for f in geojson['features'])
df_districts = set(df['District'])
unmatched_geojson = df_districts - geojson_districts
if unmatched_geojson:
    print("\n⚠️ Districts in data not found in GeoJSON:")
    print(unmatched_geojson)
else:
    print("\n✅ All districts found in GeoJSON.")

q_low = df['Risk_Factor'].quantile(0.05)
q_high = df['Risk_Factor'].quantile(0.95)

# Build choropleth animation
fig = px.choropleth_map(
    df,
    geojson=geojson,
    locations='District',
    featureidkey='properties.DISTRICT',
    color='Risk_Factor',
    color_continuous_scale='YlOrRd',
    range_color=(q_low, q_high), 
    map_style='carto-positron',
    zoom=4,
    center={'lat': 22.9734, 'lon': 78.6569},
    opacity=0.7,
    hover_name='District',
    hover_data=['State','Risk_Factor', 'Year'],
    animation_frame='Year'
)


fig.update_layout(
    title='Predictive Risk Mapping of Poverty/Malnutrition (2025-2030)',
    margin={"r": 0, "t": 30, "l": 0, "b": 0}
)

# Save the map as HTML
fig.write_html('risk_map.html')
print("\n✅ risk_map.html generated successfully.")


dict_keys(['FID', 'ST_NM', 'ST_CEN_CD', 'DT_CEN_CD', 'DISTRICT'])
✅ All districts matched to states successfully.

✅ All districts found in GeoJSON.

✅ risk_map.html generated successfully.


In [14]:

# Normalize names
df['District'] = df['District'].str.strip().str.lower()
print(geojson['features'][0]['properties'].keys())

district_to_state_lower = {k.strip().lower(): v for k, v in district_to_state.items()}
district_to_state_lower.update({
    'andaman islands': 'Andaman & Nicobar Island',
    'barabanki': 'Uttar Pradesh',
    'leh (ladakh)': 'Jammu and Kashmir',
    'mumbai (suburban)': 'Maharashtra',
    'ri bhoi': 'Meghalaya',
    'sant ravidas nagar bhadohi': 'Uttar Pradesh',
    'senapati': 'Manipur',
    'south  twenty four parganas': 'West Bengal',
})
df['State'] = df['District'].map(district_to_state_lower)
for feature in geojson['features']:
    feature['properties']['DISTRICT'] = feature['properties']['DISTRICT'].strip().lower()

# Debug: print districts not matched to a state
unmatched = df[df['State'].isna()]['District'].unique()
if len(unmatched) > 0:
    print("⚠️ Districts not matched to any state:")
    print(unmatched)
else:
    print("✅ All districts matched to states successfully.")

# Debug: print unmatched districts in the GeoJSON
geojson_districts = set(f['properties']['DISTRICT'] for f in geojson['features'])
df_districts = set(df['District'])
unmatched_geojson = df_districts - geojson_districts
if unmatched_geojson:
    print("\n⚠️ Districts in data not found in GeoJSON:")
    print(unmatched_geojson)
else:
    print("\n✅ All districts found in GeoJSON.")

# Centroid lookup
centroids = {}
for feature in geojson["features"]:
    try:
        geom = shape(feature["geometry"])
        centroid = geom.centroid
        centroids[feature["properties"]["DISTRICT"]] = {"lat": centroid.y, "lon": centroid.x}
    except:
        centroids[feature["properties"]["DISTRICT"]] = {"lat": None, "lon": None}

# Color range scaling
q_low = df['Risk_Factor'].quantile(0.05)
q_high = df['Risk_Factor'].quantile(0.95)

# Start Dash app
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
app.title = "Interactive Risk Map"

# Layout
app.layout = dbc.Container([
    html.H2("Predictive Risk Mapping of Poverty/Malnutrition (2025–2030)", className="text-center my-3"),
    dcc.Dropdown(
        id='year-dropdown',
        options=[{'label': str(y), 'value': y} for y in sorted(df['Year'].unique())],
        value=sorted(df['Year'].unique())[0],
        clearable=False,
        className="mb-2"
    ),
    dbc.Button("Reset View", id='reset-btn', color='secondary', size='sm', className='mb-2'),
    dcc.Graph(id='choropleth-map', config={'displayModeBar': False}),
    html.Div(id='clicked-district', className='text-center my-2 text-primary')
], fluid=True)

# Callback
@app.callback(
    Output('choropleth-map', 'figure'),
    Output('clicked-district', 'children'),
    Input('choropleth-map', 'clickData'),
    Input('year-dropdown', 'value'),
    Input('reset-btn', 'n_clicks'),
    State('choropleth-map', 'figure')
)
def update_map(clickData, selected_year, reset_clicks, current_fig):
    ctx = dash.callback_context
    dff = df[df['Year'] == selected_year]
    zoom_district = None
    triggered_by = ctx.triggered[0]['prop_id'].split('.')[0] if ctx.triggered else None

    # If Reset clicked, return to original
    if triggered_by == "reset-btn":
        fig = px.choropleth_map(
            dff,
            geojson=geojson,
            locations='District',
            featureidkey='properties.DISTRICT',
            color='Risk_Factor',
            color_continuous_scale='YlOrRd',
            range_color=(q_low, q_high),
            map_style='carto-positron',
            zoom=4,
            center={'lat': 22.9734, 'lon': 78.6569},
            opacity=0.9,
            hover_name='District',
            hover_data=['State', 'Risk_Factor']
        )
        fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
        return fig, "Reset to full India view"

    # If district clicked
    if triggered_by == "choropleth-map" and clickData:
        try:
            zoom_district = clickData['points'][0]['location']
        except (KeyError, IndexError, TypeError):
            return current_fig, "⚠️ Couldn't extract district"

    if zoom_district:
        highlight_df = dff.copy()
        highlight_df['Highlight'] = highlight_df['District'].apply(lambda x: 1 if x == zoom_district else 0)

        fig = px.choropleth_map(
            highlight_df,
            geojson=geojson,
            locations='District',
            featureidkey='properties.DISTRICT',
            color='Highlight',
            color_continuous_scale=[[0, "lightgray"], [1, "red"]],
            range_color=(0, 1),
            map_style='carto-positron',
            zoom=6,
            center=centroids.get(zoom_district, {'lat': 22.9734, 'lon': 78.6569}),
            opacity=0.9,
            hover_name='District',
            hover_data=['State', 'Risk_Factor']
        )

        fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
        return fig, f"🔍 Focused on: {zoom_district.title()}"

    # Default initial view
    fig = px.choropleth_map(
        dff,
        geojson=geojson,
        locations='District',
        featureidkey='properties.DISTRICT',
        color='Risk_Factor',
        color_continuous_scale='YlOrRd',
        range_color=(q_low, q_high),
        map_style='carto-positron',
        zoom=4,
        center={'lat': 22.9734, 'lon': 78.6569},
        opacity=0.9,
        hover_name='District',
        hover_data=['State', 'Risk_Factor']
    )

    fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
    return fig, "Click a district to focus"

# Run app
if __name__ == '__main__':
    app.run(debug=True)


dict_keys(['FID', 'ST_NM', 'ST_CEN_CD', 'DT_CEN_CD', 'DISTRICT'])
✅ All districts matched to states successfully.

✅ All districts found in GeoJSON.


In [17]:

# Normalize 'DISTRICT' from GeoJSON into lowercase 'district'
for feature in geojson['features']:
    feature['properties']['district'] = feature['properties']['DISTRICT'].strip().lower()

# Map district to state (use lowercased keys)
district_to_state_lower = {k.strip().lower(): v for k, v in district_to_state.items()}
district_to_state_lower.update({
    'andaman islands': 'Andaman & Nicobar Island',
    'barabanki': 'Uttar Pradesh',
    'leh (ladakh)': 'Jammu and Kashmir',
    'mumbai (suburban)': 'Maharashtra',
    'ri bhoi': 'Meghalaya',
    'sant ravidas nagar bhadohi': 'Uttar Pradesh',
    'senapati': 'Manipur',
    'south  twenty four parganas': 'West Bengal',
})

# Map State column
df['State'] = df['District'].map(district_to_state_lower)

# Convert Year to native int
df['Year'] = df['Year'].astype(int)

# Debug checks (optional)
unmatched = df[df['State'].isna()]['District'].unique()
if len(unmatched) > 0:
    print("⚠️ Districts not matched to any state:")
    print(unmatched)
else:
    print("✅ All districts matched to states successfully.")

geojson_districts = set(f['properties']['district'] for f in geojson['features'])
df_districts = set(df['District'])
unmatched_geojson = df_districts - geojson_districts
if unmatched_geojson:
    print("\n⚠️ Districts in data not found in GeoJSON:")
    print(unmatched_geojson)
else:
    print("\n✅ All districts found in GeoJSON.")

# Prepare component values
risk_min = float(df['Risk_Factor'].min())
risk_max = float(df['Risk_Factor'].max())
years = sorted([int(y) for y in df['Year'].unique()])
states = sorted(df['State'].dropna().unique())

# Color range scaling
q_low = df['Risk_Factor'].quantile(0.05)
q_high = df['Risk_Factor'].quantile(0.95)

# Create Dash app
app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("Predictive Risk Mapping Dashboard (2025-2030)"),

    dcc.Slider(
        id='year-slider',
        min=min(years),
        max=max(years),
        step=1,
        value=min(years),
        marks={year: str(year) for year in years}
    ),

    dcc.RangeSlider(
        id='risk-slider',
        min=risk_min,
        max=risk_max,
        step=0.01,
        value=[risk_min, risk_max],
        marks={float(round(val, 2)): str(round(val, 2))
               for val in df['Risk_Factor'].quantile([0, 0.25, 0.5, 0.75, 1]).tolist()}
    ),

    dcc.Dropdown(
        id='state-dropdown',
        options=[{'label': s, 'value': s} for s in states],
        multi=True,
        placeholder="Filter by State"
    ),

    dcc.Graph(id='choropleth')
])

# Callback for map update
@app.callback(
    Output('choropleth', 'figure'),
    [Input('year-slider', 'value'),
     Input('risk-slider', 'value'),
     Input('state-dropdown', 'value')]
)
def update_map(selected_year, risk_range, selected_states):
    dff = df[
        (df['Year'] == selected_year) &
        (df['Risk_Factor'] >= risk_range[0]) &
        (df['Risk_Factor'] <= risk_range[1])
    ]
    if selected_states:
        dff = dff[dff['State'].isin(selected_states)]

    fig = px.choropleth_map(
        dff,
        geojson=geojson,
        locations='District',
        featureidkey='properties.district',
        color='Risk_Factor',
        color_continuous_scale='YlOrRd',
        map_style='carto-positron',
        zoom=4,
        range_color=(q_low, q_high),
        center={'lat': 22.9734, 'lon': 78.6569},
        opacity=0.9,
        hover_name='District',
        hover_data=['State', 'Risk_Factor']
    )

    fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
    return fig

# Run server
if __name__ == '__main__':
    app.run(debug=False)


✅ All districts matched to states successfully.

✅ All districts found in GeoJSON.


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

for year in years:
    plt.figure(figsize=(12,6))
    sns.histplot(df[df['Year'] == year]['Risk_Factor'], bins=30, kde=True)
    plt.title(f'Distribution of District Risk Scores ({year})')
    plt.xlabel('Risk Score')
    plt.ylabel('Number of Districts')
    plt.tight_layout()
    plt.savefig(f'district_risk_distribution_{year}.png')
