# 03_pressure_index

In [None]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Load Raw Data for Growth/Volatility calc
df_all = pd.concat([
    pd.read_csv('district_monthly_enrollment.csv'),
    pd.read_csv('district_monthly_biometric.csv'),
    pd.read_csv('district_monthly_demographic.csv')
])
district_monthly = df_all.groupby(['state', 'district', 'month'])['total'].sum().reset_index()

# Metrics
metrics = district_monthly.groupby(['state', 'district'])['total'].agg(total_volume='sum', volatility='std').reset_index()

# Growth Rate (Avg Monthly Growth)
def get_growth(x):
    if len(x) < 2: return 0
    return x['total'].pct_change().mean()

growth = district_monthly.groupby(['state', 'district']).apply(get_growth).reset_index(name='monthly_growth_rate')
metrics = metrics.merge(growth, on=['state', 'district'])
metrics['monthly_growth_rate'] = metrics['monthly_growth_rate'].fillna(0)


In [None]:
# Normalization (MinMax Column-wise)
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
cols = ['total_volume', 'monthly_growth_rate', 'volatility']
norm_cols = [f'norm_{c}' for c in cols]
metrics[norm_cols] = scaler.fit_transform(metrics[cols].fillna(0))

# Pressure Index
metrics['pressure_index'] = 0.5 * metrics['norm_total_volume'] + 0.3 * metrics['norm_monthly_growth_rate'] + 0.2 * metrics['norm_volatility']

# Classification
def classify_pressure(x):
    if x >= 0.75: return 'Critical Infrastructure Stress'
    if x >= 0.55: return 'High Stress'
    if x >= 0.35: return 'Moderate Stress'
    return 'Stable'

metrics['pressure_tier'] = metrics['pressure_index'].apply(classify_pressure)

# Output
out_03 = metrics[['state', 'district', 'pressure_index', 'pressure_tier', 'total_volume', 'monthly_growth_rate', 'volatility']]
out_03.round(3).to_csv('03_output.csv', index=False)
print('Saved 03_output.csv')