# 06. Pincode Micro-Analysis - Preprocessing

This notebook prepares the data for hyperlocal analysis at the pincode level.

In [None]:
import pandas as pd
import sys
import os

sys.path.append(os.path.abspath('../../'))
from utils.data_loader import load_enrollment_data, load_demographic_data, load_biometric_data

print("Libraries imported successfully.")

## 1. Load Datasets

In [None]:
enr_df = load_enrollment_data('../../')
demo_df = load_demographic_data('../../')
bio_df = load_biometric_data('../../')

print("Data loaded.")

## 2. Pincode Level Aggregation
We aggregate all metrics at the most granular level: the pincode.

In [None]:
key_cols = ['pincode', 'state', 'district']
print("Aggregating data... (Optimized)")

pin_enr = enr_df.groupby(key_cols).sum(numeric_only=True).reset_index()
pin_demo = demo_df.groupby(key_cols).sum(numeric_only=True).reset_index()
pin_bio = bio_df.groupby(key_cols).sum(numeric_only=True).reset_index()

pin_data = pd.merge(pin_enr, pin_demo, on=key_cols, how='outer', suffixes=('_enr', '_demo'))
pin_data = pd.merge(pin_data, pin_bio, on=key_cols, how='outer', suffixes=('', '_bio'))

pin_data = pin_data.fillna(0)

print("Pincode level aggregation complete.")

## 3. Save Processed Data

In [None]:
os.makedirs('../../processed_data', exist_ok=True)
pin_data.to_csv('../../processed_data/pincode_data.csv', index=False)
print("Processed data saved to processed_data/pincode_data.csv")