# 07. Insight Generation

In [5]:
import pandas as pd
import os
import sys
import plotly.express as px

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from src.metrics.stress_index import calculate_ausi
from src.analytics.migration_analysis import calculate_migration_score
from src.models.clustering import cluster_districts

processed_path = os.path.join("..", "data", "processed", "merged_master_table.csv")
df = pd.read_csv(processed_path)
df['date'] = pd.to_datetime(df['date'])

# 1. Calculate Stress Index
df_ausi = calculate_ausi(df)

# 2. Calculate Migration Score (Pre-Clustering)
df_scored = calculate_migration_score(df_ausi)

top_stress_districts = df_scored.groupby(['state', 'district'])['ausi_score'].mean().sort_values(ascending=False).head(10)
print("Top 10 High Stress Districts:")
print(top_stress_districts)

# 3. Generate Clustering Recommendations ( Now using Migration Flux for better targeting)
recommendations = cluster_districts(df_scored)
print("\nResource Allocation Summary:")
print(recommendations['recommendation'].value_counts())

Top 10 High Stress Districts:
state              district                     
Chhattisgarh       ManendragarhChirmiriBharatpur    100.000000
                   Narayanpur                        91.892588
Jammu and Kashmir  Shupiyan                          90.945490
Chhattisgarh       Kawardha                          84.952801
                   Kabeerdham                        78.408492
Manipur            Thoubal                           73.879165
                   Bishnupur                         69.629267
Chhattisgarh       Mungeli                           65.110347
                   Mahasamund                        63.410757
Telangana          Medchal?malkajgiri                63.128649
Name: ausi_score, dtype: float64

Resource Allocation Summary:
recommendation
Monitor                696
Mobile Van Required    333
New Center Required    103
Name: count, dtype: int64


### 2. Migration Insights
Identifying districts with high Demographic Update ratios (Potential Migrant Hotspots).

In [None]:
# Top 10 Potential Migration Hubs
# Logic: Updates per Enrolment (High updates normalized by new enrolments implies movement)
# We already calculated 'migration_flux' in df_scored
top_mig = df_scored.groupby(['state', 'district'])['migration_flux'].mean().sort_values(ascending=False).head(10)

print("Top 10 Potential Migration Hubs (Flux Score):")
print(top_mig)

### 3. Strategic Recommendations
Based on the analysis, here are the districts requiring immediate intervention (New Seva Centers).

In [7]:
new_centers_needed = recommendations[recommendations['recommendation'] == 'New Center Required']
display(new_centers_needed[['state', 'district', 'ausi_score', 'total_updates']].sort_values('ausi_score', ascending=False).head(10))

Unnamed: 0,state,district,ausi_score,total_updates
577,Maharashtra,Parbhani,39.127752,272046.0
591,Maharashtra,Yavatmal,33.910956,472849.0
550,Maharashtra,Chandrapur,32.371609,295757.0
992,Uttar Pradesh,Kushinagar,26.982992,265788.0
565,Maharashtra,Latur,26.198296,339156.0
549,Maharashtra,Buldhana,25.577718,304112.0
543,Maharashtra,Amravati,24.045697,404373.0
118,Bihar,Araria,23.13327,273335.0
136,Bihar,Katihar,23.125264,346189.0
545,Maharashtra,Beed,22.611873,382673.0


### 4. Export Policy Report
Saving the actionable insights for the stakeholders.

In [8]:
output_dir = os.path.join("..", "reports", "tables")
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Save New Center Recommendations
new_centers_needed.to_csv(os.path.join(output_dir, "new_centers_required.csv"), index=False)
print("Saved New Center Recommendations.")

# Save Migration Hubs (Formatted)
top_mig_df = top_mig.reset_index(name='migration_score')
top_mig_df.to_csv(os.path.join(output_dir, "migration_hubs.csv"), index=False)
print("Saved Migration Hubs Report.")

Saved New Center Recommendations.
Saved Migration Hubs Report.


## Conclusion
The analysis pipeline is now complete. We have:
1.  **Ingested and Profiled** the raw Aadhaar data.
2.  **Cleaned and Merged** into a master dataset.
3.  **Analyzed Trends** like seasonality and anomalies.
4.  **Predicted** biometric demand.
5.  **Generated Actionable Insights** for resource allocation.

**Next Step:** Launch the interactive **Streamlit Dashboard** to visualize these metrics in real-time.
Run the command: `streamlit run dashboard/app.py`