# Comparative and Trend Analysis

Objective:
- Compare Aadhaar enrolments with demographic and biometric updates
- Identify state-wise and year-wise differences
- Extract cross-dataset trends and operational insights


In [None]:
import pandas as pd
import matplotlib.pyplot as plt


In [None]:
enrol_df = pd.read_csv("../data/processed/enrolment_cleaned.csv")
demo_df = pd.read_csv("../data/processed/demographic_update_cleaned.csv")
bio_df = pd.read_csv("../data/processed/biometric_update_cleaned.csv")


In [None]:
enrol_df["total_enrolment"] = (
    enrol_df["age_0_5"] +
    enrol_df["age_5_17"] +
    enrol_df["age_18_greater"]
)
demo_df["total_updates"] = (
    demo_df["demo_age_5_17"] +
    demo_df["demo_age_17_"]
)
bio_df["total_biometric_updates"] = (
    bio_df["bio_age_5_17"] +
    bio_df["bio_age_17_"]
)


In [None]:
state_enrol = enrol_df.groupby("state")["total_enrolment"].sum()
state_demo = demo_df.groupby("state")["total_updates"].sum()
state_bio = bio_df.groupby("state")["total_biometric_updates"].sum()


In [None]:
state_comparison = pd.concat(
    [state_enrol, state_demo, state_bio],
    axis=1
)

state_comparison.columns = [
    "total_enrolments",
    "total_demographic_updates",
    "total_biometric_updates"
]

state_comparison.head()


In [None]:
state_comparison["update_to_enrolment_ratio"] = (
    (state_comparison["total_demographic_updates"] +
     state_comparison["total_biometric_updates"]) /
    state_comparison["total_enrolments"]
)

state_comparison.sort_values(
    "update_to_enrolment_ratio",
    ascending=False
).head(10)


In [None]:
top_states = state_comparison.sort_values(
    "total_enrolments", ascending=False
).head(5)

top_states.plot(kind="bar")
plt.title("Enrolment vs Updates (Top 5 States)")
plt.xlabel("State")
plt.ylabel("Count")
plt.tight_layout()
plt.show()


In [None]:
year_enrol = (
    enrol_df
    .groupby("year")["total_enrolment"]
    .sum()
    .sort_index()
)

year_demo = (
    demo_df
    .groupby("year")["total_updates"]
    .sum()
    .sort_index()
)

year_bio = (
    bio_df
    .groupby("year")["total_biometric_updates"]
    .sum()
    .sort_index()
)


In [None]:
yearly_trends = pd.concat(
    [year_enrol, year_demo, year_bio],
    axis=1
)

yearly_trends.columns = [
    "enrolments",
    "demographic_updates",
    "biometric_updates"
]

yearly_trends


In [None]:
import matplotlib.pyplot as plt

plt.figure()
yearly_trends.plot(kind="line", marker="o")
plt.title("Year-wise Aadhaar Enrolment vs Update Trends")
plt.xlabel("Year")
plt.ylabel("Count")
plt.tight_layout()
plt.show()


In [None]:
state_comparison.to_csv(
    "../outputs/tables/state_level_comparative_summary.csv"
)


### Comparative Insights

- Demographic and biometric updates consistently exceed new enrolments
- States with mature Aadhaar coverage exhibit higher update-to-enrolment ratios
- Enrolment volumes stabilise over time, while updates remain persistently high
- Update trends act as early indicators of population mobility and policy interventions
