# 09. Population-Normalized Heatmaps

This notebook compares Aadhaar enrollment/update activities with the total state population to calculate the **Service Penetration Ratio** (Activity per 1000 people).

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import requests
import os

sns.set(style="whitegrid")
plt.rcParams['figure.figsize'] = (15, 10)

os.makedirs('../../visualizations', exist_ok=True)
print("Environment ready.")

## 1. Load Aadhaar Data

In [None]:
df = pd.read_csv('../../processed_data/geographic_data.csv')

# Cleaning state names to match population data and GeoJSON
def clean_state(state):
    if pd.isna(state): return state
    s = state.replace(' & ', ' and ')
    if s == 'Orissa': return 'Odisha'
    if s in ['Dadra and Nagar Haveli', 'Daman and Diu']: return 'Dadra and Nagar Haveli and Daman and Diu'
    return s

df['state_clean'] = df['state'].apply(clean_state)
state_activity = df.groupby('state_clean')[['total_enrollments', 'total_updates']].sum().reset_index()
state_activity['total_activity'] = state_activity['total_enrollments'] + state_activity['total_updates']

print(f"Processed Aadhaar activity for {len(state_activity)} states/UTs.")

## 2. Incorporate Population Data (2024 Projections)
Source: UIDAI/Census Projections 2024 (Figures in millions)

In [None]:
pop_data = {
    'Uttar Pradesh': 241.1, 'Bihar': 136.0, 'Maharashtra': 127.1, 'West Bengal': 99.8,
    'Madhya Pradesh': 88.4, 'Rajasthan': 82.9, 'Tamil Nadu': 77.5, 'Gujarat': 74.4,
    'Karnataka': 69.5, 'Andhra Pradesh': 53.3, 'Odisha': 47.0, 'Jharkhand': 41.3,
    'Telangana': 38.4, 'Kerala': 36.0, 'Assam': 36.0, 'Punjab': 31.0, 'Haryana': 30.5,
    'Chhattisgarh': 30.5, 'Delhi': 20.4, 'Jammu and Kashmir': 13.8, 'Uttarakhand': 11.8,
    'Himachal Pradesh': 7.5, 'Tripura': 4.1, 'Meghalaya': 3.4, 'Manipur': 3.3,
    'Nagaland': 2.1, 'Goa': 1.6, 'Arunachal Pradesh': 1.5, 'Puducherry': 1.6,
    'Dadra and Nagar Haveli and Daman and Diu': 0.99, 'Chandigarh': 1.2, 
    'Mizoram': 1.2, 'Sikkim': 0.69, 'Andaman and Nicobar Islands': 0.40, 
    'Ladakh': 0.30, 'Lakshadweep': 0.07
}

state_activity['population_millions'] = state_activity['state_clean'].map(pop_data)
state_activity = state_activity.dropna(subset=['population_millions'])

print("Population data successfully mapped.")

## 3. Calculate Ratio: Activity per 1000 People
Formula: `(Total Activity / (Population in Millions * 1,000,000)) * 1000`

In [None]:
state_activity['activity_per_1000'] = (state_activity['total_activity'] / (state_activity['population_millions'] * 1000000)) * 1000
state_activity = state_activity.sort_values('activity_per_1000', ascending=False)

print("Top 5 States by Service Penetration (per 1000 people):")
print(state_activity[['state_clean', 'activity_per_1000']].head())

## 4. National Heatmap: Activity per 1000 People

In [None]:
print("Fetching India GeoJSON...")
india_geojson_url = "https://raw.githubusercontent.com/geohacker/india/master/state/india_state.geojson"
response = requests.get(india_geojson_url)
india_geojson = response.json()

fig = px.choropleth(
    state_activity,
    geojson=india_geojson,
    featureidkey='properties.NAME_1',
    locations='state_clean',
    color='activity_per_1000',
    color_continuous_scale="OrRd",
    title='Aadhaar Service Penetration: Activity per 1000 People (by State)',
    labels={'activity_per_1000': 'Activity per 1000 People'}
)

fig.update_geos(fitbounds="locations", visible=False)
fig.write_image('../../visualizations/09_national_penetration_heatmap.png')
fig.show()

## 5. State-wise Comparison (Bar Plot)

In [None]:
sns.barplot(data=state_activity, x='activity_per_1000', y='state_clean', palette='Spectral')
plt.title('Aadhaar Service Penetration Rate by State (per 1000 people)')
plt.xlabel('Records per 1000 population')
plt.ylabel('State')
plt.savefig('../../visualizations/09_state_penetration_bar_chart.png')
plt.show()