# 06. Pincode Micro-Analysis - Analysis

This notebook performs hyper-local analysis on Aadhaar data at the pincode level to identify micro-trends and outliers.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

sns.set(style="whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)

os.makedirs('../../visualizations', exist_ok=True)
print("Libraries imported.")

## 1. Load Processed Data

In [None]:
data = pd.read_csv('../../processed_data/pincode_data.csv')
print("Data loaded.")

## 2. Pincode Volume Distribution

In [None]:
data['total_activity'] = data['age_0_5'] + data['age_5_17'] + data['age_18_greater'] + \
                         data['demo_age_5_17'] + data['demo_age_17_'] + \
                         data['bio_age_5_17'] + data['bio_age_17_']

sns.histplot(data['total_activity'], bins=50, kde=True, color='purple')
plt.title('Distribution of Activity (Enrollments + Updates) across Pincodes')
plt.xlabel('Total Activity Count')
plt.savefig('../../visualizations/06_pincode_activity_distribution.png')
plt.show()

## 3. Identifying Under-performing Pincodes
Pincodes with very low activity despite being in high-activity districts.

In [None]:
state_avg = data.groupby('state')['total_activity'].transform('mean', numeric_only=True)
data['relative_performance'] = data['total_activity'] / state_avg

bottom_pincodes = data.sort_values(by='relative_performance').head(15)
print("Top 15 Under-performing Pincodes (Relative to State Average):")
print(bottom_pincodes[['pincode', 'state', 'district', 'total_activity', 'relative_performance']])