# 03. Update Behavior - Preprocessing

This notebook prepares the data for analyzing update behavior. It calculates ratios of updates to enrollment and types of updates.

In [None]:
import pandas as pd
import sys
import os

sys.path.append(os.path.abspath('../../'))
from utils.data_loader import load_enrollment_data, load_demographic_data, load_biometric_data

print("Libraries imported successfully.")

## 1. Load Datasets

In [None]:
enr_df = load_enrollment_data('../../')
demo_df = load_demographic_data('../../')
bio_df = load_biometric_data('../../')

print("Data loaded.")

## 2. Calculate Update Metrics
We aggregate data and calculate update-to-enrollment ratios.

In [None]:
key_cols = ['state', 'district', 'pincode']

print("Aggregating data... (Optimized for speed)")

enr_agg = enr_df.groupby(key_cols).sum(numeric_only=True).reset_index()
demo_agg = demo_df.groupby(key_cols).sum(numeric_only=True).reset_index()
bio_agg = bio_df.groupby(key_cols).sum(numeric_only=True).reset_index()

enr_agg['total_enrollments'] = enr_agg['age_0_5'] + enr_agg['age_5_17'] + enr_agg['age_18_greater']
demo_agg['total_demo_updates'] = demo_agg['demo_age_5_17'] + demo_agg['demo_age_17_']
bio_agg['total_bio_updates'] = bio_agg['bio_age_5_17'] + bio_agg['bio_age_17_']

update_data = pd.merge(enr_agg[key_cols + ['total_enrollments']], demo_agg[key_cols + ['total_demo_updates']], on=key_cols, how='outer')
update_data = pd.merge(update_data, bio_agg[key_cols + ['total_bio_updates']], on=key_cols, how='outer')

update_data = update_data.fillna(0)

# Composite metrics
update_data['total_updates'] = update_data['total_demo_updates'] + update_data['total_bio_updates']
update_data['update_to_enrollment_ratio'] = update_data['total_updates'] / (update_data['total_enrollments'] + 1) # Add 1 to avoid div by zero

print("Update behavior metrics calculated.")

## 3. Save Processed Data

In [None]:
os.makedirs('../../processed_data', exist_ok=True)
update_data.to_csv('../../processed_data/update_behavior_data.csv', index=False)
print("Processed data saved to processed_data/update_behavior_data.csv")