# 07. Advanced Insights & Relationship Mapping

This notebook focuses on extracting meaningful relationships between different Aadhaar metrics to identify strategic trends and service efficiency.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

sns.set(style="whitegrid")
plt.rcParams['figure.figsize'] = (14, 8)

os.makedirs('../../visualizations', exist_ok=True)
print("Advanced analysis environment ready.")

## 1. Metric Correlation Matrix
Understanding which activities (Enrollments vs. Updates) across different age groups move together.

In [None]:
# Loading the most comprehensive dataset available (Pincode level)
df = pd.read_csv('../../processed_data/pincode_data.csv')

corr_matrix = df.select_dtypes(include=['number']).drop(columns=['pincode']).corr()

sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)
plt.title('Correlation Matrix of Aadhaar Activities')
plt.savefig('../../visualizations/07_metric_correlation_heatmap.png')
plt.show()

## 2. Demographic vs Biometric Service Index
Does a district favor one type of update? A ratio > 1 means more demographic updates, < 1 means more biometric updates.

In [None]:
df['total_demo'] = df['demo_age_5_17'] + df['demo_age_17_']
df['total_bio'] = df['bio_age_5_17'] + df['bio_age_17_']
df['update_type_index'] = df['total_demo'] / (df['total_bio'] + 1)

state_index = df.groupby('state')['update_type_index'].mean().sort_values()

state_index.plot(kind='barh', color='teal')
plt.axvline(1, color='red', linestyle='--', label='Balanced Ratio')
plt.title('Demographic-to-Biometric Update Index by State')
plt.xlabel('Ratio (Demo / Bio)')
plt.legend()
plt.savefig('../../visualizations/07_demo_vs_bio_index_by_state.png')
plt.show()

print("States with ratio < 1 indicate high biometric update demand (needs technical hardware focus).")
print("States with ratio > 1 indicate high demographic update demand (data correction focus).")

## 3. Growth vs. Saturation Mapping
Comparing Child Enrollment (New Growth) vs. Adult Updates (Maturity).

In [None]:
df['total_updates'] = df['total_demo'] + df['total_bio']

# Aggregate to state for clarity
state_pivot = df.groupby('state').agg({
    'age_0_5': 'sum',
    'total_updates': 'sum',
    'pincode': 'count'
}).rename(columns={'pincode': 'num_pincodes'})

sns.scatterplot(data=state_pivot, x='age_0_5', y='total_updates', size='num_pincodes', hue='num_pincodes', palette='viridis', sizes=(100, 1000))
for i, txt in enumerate(state_pivot.index):
    plt.annotate(txt, (state_pivot['age_0_5'].iloc[i], state_pivot['total_updates'].iloc[i]), fontsize=9, alpha=0.7)

plt.title('Market Maturity: New Child Enrollments vs. System Updates by State')
plt.xlabel('Child Enrollments (0-5 Years)')
plt.ylabel('Total Citizen Updates')
plt.savefig('../../visualizations/07_growth_saturation_bubble_plot.png')
plt.show()