In [1]:
import pandas as pd
import numpy as np

In [2]:
enrol = pd.read_csv("../data/processed/enrolment_clean.csv")
demo  = pd.read_csv("../data/processed/demographic_clean.csv")
bio   = pd.read_csv("../data/processed/biometric_clean.csv")

print(enrol.shape)
print(demo.shape)
print(bio.shape)


(500000, 7)
(500000, 6)
(500000, 6)


In [3]:
# Total enrolment per row
enrol['total_enrolment'] = (
    enrol['age_0_5'] +
    enrol['age_5_17'] +
    enrol['age_18_greater']
)


In [4]:
enrol['monthly_growth'] = (
    enrol
    .groupby('state')['total_enrolment']
    .pct_change()
)


In [5]:
enrol['child_ratio'] = enrol['age_0_5'] / enrol['total_enrolment']
enrol['youth_ratio'] = enrol['age_5_17'] / enrol['total_enrolment']
enrol['adult_ratio'] = enrol['age_18_greater'] / enrol['total_enrolment']


In [9]:
demo_state = (
    demo
    .groupby(['state', 'date'], as_index=False)
    .agg({
        'demo_update_pressure': 'sum'
    })
)


In [10]:
bio_state = (
    bio
    .groupby(['state', 'date'], as_index=False)
    .agg({
        'biometric_update_pressure': 'sum'
    })
)


In [11]:
enrol_state = (
    enrol
    .groupby(['state', 'date'], as_index=False)
    .agg({
        'total_enrolment': 'sum',
        'monthly_growth': 'mean',
        'child_ratio': 'mean',
        'youth_ratio': 'mean',
        'adult_ratio': 'mean'
    })
)


In [12]:
features = enrol_state.merge(
    demo_state,
    on=['state', 'date'],
    how='left'
)

features = features.merge(
    bio_state,
    on=['state', 'date'],
    how='left'
)


In [13]:
features['demo_pressure_ratio'] = (
    features['demo_update_pressure'] / features['total_enrolment']
)

features['biometric_pressure_ratio'] = (
    features['biometric_update_pressure'] / features['total_enrolment']
)


In [14]:
features['risk_score'] = (
    features['monthly_growth'].fillna(0).abs() * 0.4 +
    features['demo_pressure_ratio'].fillna(0) * 0.3 +
    features['biometric_pressure_ratio'].fillna(0) * 0.3
)


In [15]:
features.to_csv(
    "../data/processed/feature_dataset.csv",
    index=False
)

print("✅ FEATURE ENGINEERING COMPLETED SAFELY")


✅ FEATURE ENGINEERING COMPLETED SAFELY
