In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os
from dateutil.relativedelta import relativedelta

MASTER_FILE = "./outputs/master_monthly.csv"
HOTSPOT_LIST_FILE = "./outputs/tables/top25_mean_assi.csv"
OUTPUT_TBL_DIR = "./outputs/tables"
OUTPUT_FIG_DIR = "./outputs/figures"

# Ensure dirs
os.makedirs(OUTPUT_TBL_DIR, exist_ok=True)
os.makedirs(OUTPUT_FIG_DIR, exist_ok=True)

# Data Loading and Hotspot Identification
Load the master dataset. Identify the top 20 hotspots either from the pre-calculated top 25 list or by computing the mean ASSI directly.

In [None]:
df = pd.read_csv(MASTER_FILE)
df['date_obj'] = pd.to_datetime(df['month_year'], format='%Y-%m')


if os.path.exists(HOTSPOT_LIST_FILE):
    hotspots = pd.read_csv(HOTSPOT_LIST_FILE).head(20)
else:
    print("Hotspot list missing. Computing directly from master...")
    agg = df.groupby(['state', 'district', 'pincode'])['assi'].mean().reset_index()
    hotspots = agg.nlargest(20, 'assi')

print(f" identified {len(hotspots)} hotspots for forecasting.")

 identified 20 hotspots for forecasting.


# Forecasting Logic
For each hotspot, calculate a forecast based on the mean of the last 3 months. Generate predictions for the next 3 months.

In [3]:
forecast_results = []

# 3. Forecast Loop
for _, row in hotspots.iterrows():
    s, d, p = row['state'], row['district'], row['pincode']
    
    # Get history
    mask = (df['state'] == s) & (df['district'] == d) & (df['pincode'] == p)
    history = df[mask].sort_values('date_obj')
    
    if len(history) < 1: continue
        
    # Baseline: Mean of last 3 months
    last_3 = history.tail(3)
    baseline_ma = last_3['assi'].mean()
    
    # Generate next 3 months
    last_date = history['date_obj'].max()
    
    for i in range(1, 4):
        next_date = last_date + relativedelta(months=i)
        fc_month_str = next_date.strftime('%Y-%m')
        
        forecast_results.append({
            'state': s,
            'district': d,
            'pincode': p,
            'forecast_month': fc_month_str,
            'forecast_assi': baseline_ma,
            'method': '3-Month MA Baseline'
        })

df_forecast = pd.DataFrame(forecast_results)
out_csv = os.path.join(OUTPUT_TBL_DIR, "forecast_top_hotspots_3m.csv")
df_forecast.to_csv(out_csv, index=False)
print(f"Saved forecast table: {out_csv}")

Saved forecast table: ./outputs/tables/forecast_top_hotspots_3m.csv


# Plotting and Ranking
Visualize the historical data and forecasts for the top 5 hotspots. Generate a ranking table for the predicted top stress locations for the upcoming month.

In [None]:
# 4. Plot Top 5
top_5 = hotspots.head(5)

for i, row in top_5.iterrows():
    s, d, p = row['state'], row['district'], row['pincode']
    

    mask_hist = (df['state'] == s) & (df['district'] == d) & (df['pincode'] == p)
    history = df[mask_hist].sort_values('date_obj')
    

    mask_fc = (df_forecast['state'] == s) & (df_forecast['district'] == d) & (df_forecast['pincode'] == p)
    fc_data = df_forecast[mask_fc].copy()
    fc_data['date_obj'] = pd.to_datetime(fc_data['forecast_month'], format='%Y-%m')
    

    plt.figure(figsize=(10, 5))
    plt.plot(history['date_obj'], history['assi'], label='Historical', marker='o')
    plt.plot(fc_data['date_obj'], fc_data['forecast_assi'], label='Forecast', linestyle='--', marker='x', color='red')
    
    plt.title(f"ASSI Forecast: {d} - {p}")
    plt.ylabel("Aadhaar Service Stress Index")
    plt.xlabel("Date")
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    out_fig = os.path.join(OUTPUT_FIG_DIR, f"forecast_hotspot_{i+1}.png")
    plt.savefig(out_fig)
    plt.close()
    print(f"Saved plot: {out_fig}")

# 5. Top 10 Predicted Rankings (Next Month)
if not df_forecast.empty:
    next_month = df_forecast['forecast_month'].min()
    next_month_data = df_forecast[df_forecast['forecast_month'] == next_month]
    
    top_10_pred = next_month_data.nlargest(10, 'forecast_assi')
    out_rank = os.path.join(OUTPUT_TBL_DIR, "top10_predicted_hotspots_next_month.csv")
    top_10_pred.to_csv(out_rank, index=False)
    print("Saved predicted rankings.")
    print(top_10_pred[['district', 'pincode', 'forecast_assi']])

Saved plot: ./outputs/figures/forecast_hotspot_1.png
Saved plot: ./outputs/figures/forecast_hotspot_2.png
Saved plot: ./outputs/figures/forecast_hotspot_3.png
Saved plot: ./outputs/figures/forecast_hotspot_4.png
Saved plot: ./outputs/figures/forecast_hotspot_5.png
Saved predicted rankings.
          district  pincode  forecast_assi
18  Dinajpur Uttar   733210         1356.5
