# üîÑ Notebook 02: Life Event & Temporal Analysis

## AADHAAR INTELLIGENCE SYSTEM - LENS 1

---

### Objective
Analyze temporal patterns in Aadhaar services using **real UIDAI datasets**:
- Monthly/Weekly enrollment trends
- Age-wise update patterns
- Demographic vs Biometric update correlations
- Seasonal demand patterns

### Data Sources
1. **Enrolment Data** - age_0_5, age_5_17, age_18_greater
2. **Demographic Updates** - demo_age_5_17, demo_age_18_greater
3. **Biometric Updates** - bio_age_5_17, bio_age_18_greater

### Methods
- Time series decomposition
- Correlation analysis
- Trend visualization

### Key Insight
> "Identify seasonal patterns and age-wise service demand"

In [None]:
# ============================================
# CELL 1: Import Libraries
# ============================================

import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from collections import Counter, defaultdict
import os
import glob
import warnings
warnings.filterwarnings('ignore')

# Visualization
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

print("‚úÖ Libraries imported successfully")
print(f"üìÖ Analysis Date: {datetime.now().strftime('%Y-%m-%d %H:%M')}")

In [None]:
# ============================================
# CELL 2: Load Real UIDAI Datasets
# ============================================

DATA_DIR = '../data/'
OUTPUT_DIR = '../outputs/'
os.makedirs(f"{OUTPUT_DIR}/charts", exist_ok=True)

def load_all_csvs(folder_path):
    """Load and concatenate all CSV files from a folder"""
    all_files = glob.glob(os.path.join(folder_path, "**/*.csv"), recursive=True)
    if not all_files:
        return None
    dfs = []
    for file in all_files:
        df = pd.read_csv(file)
        dfs.append(df)
    return pd.concat(dfs, ignore_index=True)

print("üìä LOADING REAL UIDAI DATASETS...")
print("="*60)

# Load all 3 datasets
df_enrolment = load_all_csvs(f"{DATA_DIR}enrolment/")
df_demographic = load_all_csvs(f"{DATA_DIR}demographic/")
df_biometric = load_all_csvs(f"{DATA_DIR}biometric/")

print(f"‚úÖ Enrolment Records: {len(df_enrolment):,}")
print(f"‚úÖ Demographic Records: {len(df_demographic):,}")
print(f"‚úÖ Biometric Records: {len(df_biometric):,}")

# Convert dates
df_enrolment['date'] = pd.to_datetime(df_enrolment['date'], format='%d-%m-%Y')
df_demographic['date'] = pd.to_datetime(df_demographic['date'], format='%d-%m-%Y')
df_biometric['date'] = pd.to_datetime(df_biometric['date'], format='%d-%m-%Y')

# Calculate totals
df_enrolment['total_enrolments'] = df_enrolment['age_0_5'] + df_enrolment['age_5_17'] + df_enrolment['age_18_greater']

print(f"\nüìÖ Date Range: {df_enrolment['date'].min().date()} to {df_enrolment['date'].max().date()}")

In [None]:
# ============================================
# CELL 3: Monthly Trend Analysis
# ============================================

print("\nüìà MONTHLY ENROLLMENT TREND ANALYSIS")
print("="*60)

# Aggregate by month
df_enrolment['year_month'] = df_enrolment['date'].dt.to_period('M')

monthly_enrolments = df_enrolment.groupby('year_month').agg({
    'age_0_5': 'sum',
    'age_5_17': 'sum',
    'age_18_greater': 'sum',
    'total_enrolments': 'sum'
}).reset_index()

monthly_enrolments['year_month'] = monthly_enrolments['year_month'].astype(str)

print(f"\nüìä Monthly Summary:")
print(f"   Total Months: {len(monthly_enrolments)}")
print(f"   Avg Monthly Enrolments: {monthly_enrolments['total_enrolments'].mean():,.0f}")
print(f"   Peak Month: {monthly_enrolments.loc[monthly_enrolments['total_enrolments'].idxmax(), 'year_month']}")
print(f"   Peak Enrolments: {monthly_enrolments['total_enrolments'].max():,}")

display(monthly_enrolments)

In [None]:
# ============================================
# CELL 4: Monthly Trend Visualization
# ============================================

# Create trend visualization
fig_trend = make_subplots(
    rows=2, cols=1,
    subplot_titles=('Total Monthly Enrollments', 'Enrollment by Age Group'),
    shared_xaxes=True,
    vertical_spacing=0.1
)

# Total enrollments trend
fig_trend.add_trace(
    go.Scatter(
        x=monthly_enrolments['year_month'],
        y=monthly_enrolments['total_enrolments'],
        mode='lines+markers',
        name='Total',
        line=dict(color='#1B998B', width=3),
        marker=dict(size=8)
    ),
    row=1, col=1
)

# Age group breakdown
fig_trend.add_trace(
    go.Bar(x=monthly_enrolments['year_month'], y=monthly_enrolments['age_0_5'],
           name='0-5 years', marker_color='#FF6B35'),
    row=2, col=1
)
fig_trend.add_trace(
    go.Bar(x=monthly_enrolments['year_month'], y=monthly_enrolments['age_5_17'],
           name='5-17 years', marker_color='#004E89'),
    row=2, col=1
)
fig_trend.add_trace(
    go.Bar(x=monthly_enrolments['year_month'], y=monthly_enrolments['age_18_greater'],
           name='18+ years', marker_color='#D62828'),
    row=2, col=1
)

fig_trend.update_layout(
    title=dict(text='<b>AADHAAR ENROLLMENT TRENDS</b><br><sup>Monthly Analysis by Age Group</sup>', x=0.5),
    height=700,
    barmode='stack',
    template='plotly_white',
    showlegend=True
)

fig_trend.write_html(f"{OUTPUT_DIR}/charts/02_monthly_trends.html")
print("üìä Monthly trend chart saved!")

In [None]:
# ============================================
# CELL 5: Day-of-Week Analysis
# ============================================

print("\nüìÖ DAY-OF-WEEK ENROLLMENT PATTERN")
print("="*60)

# Add day of week
df_enrolment['day_of_week'] = df_enrolment['date'].dt.dayofweek
df_enrolment['day_name'] = df_enrolment['date'].dt.day_name()

# Aggregate by day of week
daily_pattern = df_enrolment.groupby(['day_of_week', 'day_name']).agg({
    'total_enrolments': 'sum',
    'age_0_5': 'sum',
    'age_5_17': 'sum',
    'age_18_greater': 'sum'
}).reset_index().sort_values('day_of_week')

print("\nüìä Enrollments by Day of Week:")
display(daily_pattern)

# Visualization
fig_daily = px.bar(
    daily_pattern,
    x='day_name',
    y='total_enrolments',
    color='day_name',
    title='<b>ENROLLMENTS BY DAY OF WEEK</b>',
    color_discrete_sequence=px.colors.qualitative.Bold
)

fig_daily.update_layout(
    xaxis_title='Day of Week',
    yaxis_title='Total Enrollments',
    showlegend=False,
    template='plotly_white'
)

fig_daily.write_html(f"{OUTPUT_DIR}/charts/02_daily_pattern.html")
print("üìä Daily pattern chart saved!")

In [None]:
# ============================================
# CELL 6: Age Group Comparison Across Services
# ============================================

print("\nüë∂ AGE GROUP ANALYSIS ACROSS SERVICES")
print("="*60)

# Aggregate totals for each service
enrol_totals = {
    '0-5 years': df_enrolment['age_0_5'].sum(),
    '5-17 years': df_enrolment['age_5_17'].sum(),
    '18+ years': df_enrolment['age_18_greater'].sum()
}

# Demographic (5-17 and 17+)
demo_cols = [c for c in df_demographic.columns if 'demo_age' in c]
demo_5_17 = df_demographic['demo_age_5_17'].sum() if 'demo_age_5_17' in df_demographic.columns else 0
demo_18_col = [c for c in df_demographic.columns if '17' in c and c != 'demo_age_5_17']
demo_18 = df_demographic[demo_18_col[0]].sum() if demo_18_col else 0

# Biometric (5-17 and 17+)
bio_5_17 = df_biometric['bio_age_5_17'].sum() if 'bio_age_5_17' in df_biometric.columns else 0
bio_18_col = [c for c in df_biometric.columns if '17' in c and c != 'bio_age_5_17']
bio_18 = df_biometric[bio_18_col[0]].sum() if bio_18_col else 0

# Create comparison dataframe
age_comparison = pd.DataFrame({
    'Service': ['Enrolment', 'Enrolment', 'Enrolment', 'Demographic', 'Demographic', 'Biometric', 'Biometric'],
    'Age Group': ['0-5', '5-17', '18+', '5-17', '18+', '5-17', '18+'],
    'Count': [
        enrol_totals['0-5 years'], enrol_totals['5-17 years'], enrol_totals['18+ years'],
        demo_5_17, demo_18, bio_5_17, bio_18
    ]
})

print("\nüìä Service-wise Age Group Distribution:")
display(age_comparison)

# Visualization
fig_age = px.bar(
    age_comparison,
    x='Service',
    y='Count',
    color='Age Group',
    barmode='group',
    title='<b>AGE GROUP DISTRIBUTION BY SERVICE TYPE</b>',
    color_discrete_map={'0-5': '#1B998B', '5-17': '#F77F00', '18+': '#D62828'}
)

fig_age.update_layout(template='plotly_white', height=500)
fig_age.write_html(f"{OUTPUT_DIR}/charts/02_age_comparison.html")
print("üìä Age comparison chart saved!")

In [None]:
# ============================================
# CELL 7: State-wise Temporal Analysis
# ============================================

print("\nüìç STATE-WISE TEMPORAL ANALYSIS")
print("="*60)

# Top 10 states by enrollment
state_monthly = df_enrolment.groupby(['state', 'year_month']).agg({
    'total_enrolments': 'sum'
}).reset_index()

top_states = df_enrolment.groupby('state')['total_enrolments'].sum().nlargest(10).index.tolist()
state_monthly_top = state_monthly[state_monthly['state'].isin(top_states)]
state_monthly_top['year_month'] = state_monthly_top['year_month'].astype(str)

# Create heatmap
pivot_data = state_monthly_top.pivot(index='state', columns='year_month', values='total_enrolments').fillna(0)

fig_heatmap = px.imshow(
    pivot_data.values,
    x=pivot_data.columns,
    y=pivot_data.index,
    color_continuous_scale='Viridis',
    aspect='auto',
    title='<b>STATE-WISE MONTHLY ENROLLMENT HEATMAP</b><br><sup>Top 10 States by Total Enrollments</sup>'
)

fig_heatmap.update_layout(
    xaxis_title='Month',
    yaxis_title='State',
    height=500,
    template='plotly_white'
)

fig_heatmap.write_html(f"{OUTPUT_DIR}/charts/02_state_heatmap.html")
print("üìä State heatmap saved!")

In [None]:
# ============================================
# CELL 8: Enrollment vs Updates Correlation
# ============================================

print("\nüîó ENROLLMENT vs UPDATES CORRELATION")
print("="*60)

# Aggregate by pincode for correlation
enrol_by_pin = df_enrolment.groupby('pincode')['total_enrolments'].sum().reset_index()

# Get demographic totals for each column
demo_total_col = df_demographic.filter(like='demo_age').sum(axis=1)
df_demographic['total_demo'] = demo_total_col
demo_by_pin = df_demographic.groupby('pincode')['total_demo'].sum().reset_index()

# Get biometric totals
bio_total_col = df_biometric.filter(like='bio_age').sum(axis=1)
df_biometric['total_bio'] = bio_total_col
bio_by_pin = df_biometric.groupby('pincode')['total_bio'].sum().reset_index()

# Merge
correlation_data = enrol_by_pin.merge(demo_by_pin, on='pincode', how='inner')
correlation_data = correlation_data.merge(bio_by_pin, on='pincode', how='inner')

# Calculate correlations
corr_enrol_demo = correlation_data['total_enrolments'].corr(correlation_data['total_demo'])
corr_enrol_bio = correlation_data['total_enrolments'].corr(correlation_data['total_bio'])
corr_demo_bio = correlation_data['total_demo'].corr(correlation_data['total_bio'])

print(f"\nüìä Correlation Analysis:")
print(f"   Enrolment ‚Üî Demographic: {corr_enrol_demo:.3f}")
print(f"   Enrolment ‚Üî Biometric: {corr_enrol_bio:.3f}")
print(f"   Demographic ‚Üî Biometric: {corr_demo_bio:.3f}")

# Scatter plot
fig_corr = px.scatter(
    correlation_data.sample(min(5000, len(correlation_data))),
    x='total_enrolments',
    y='total_demo',
    size='total_bio',
    title=f'<b>ENROLLMENT vs UPDATES CORRELATION</b><br><sup>Correlation: {corr_enrol_demo:.2f}</sup>',
    labels={'total_enrolments': 'Total Enrollments', 'total_demo': 'Demographic Updates'},
    opacity=0.5
)

fig_corr.update_layout(template='plotly_white')
fig_corr.write_html(f"{OUTPUT_DIR}/charts/02_correlation.html")
print("üìä Correlation chart saved!")

In [None]:
# ============================================
# CELL 9: District-Level Event Patterns
# ============================================

print("\nüèôÔ∏è DISTRICT-LEVEL EVENT PATTERNS")
print("="*60)

# District-level aggregation
district_events = df_enrolment.groupby(['state', 'district']).agg({
    'total_enrolments': 'sum',
    'age_0_5': 'sum',
    'age_5_17': 'sum',
    'age_18_greater': 'sum'
}).reset_index()

# Calculate proportions
district_events['pct_0_5'] = district_events['age_0_5'] / district_events['total_enrolments'] * 100
district_events['pct_5_17'] = district_events['age_5_17'] / district_events['total_enrolments'] * 100
district_events['pct_18_plus'] = district_events['age_18_greater'] / district_events['total_enrolments'] * 100

# Top 20 districts by enrollment
top_districts = district_events.nlargest(20, 'total_enrolments')

print(f"\nüèÜ Top 10 Districts by Enrollment:")
for i, row in top_districts.head(10).iterrows():
    print(f"   {row['district']}, {row['state'][:20]}: {row['total_enrolments']:,.0f}")

# Stacked bar chart
fig_district = go.Figure()

for age_col, name, color in [
    ('pct_0_5', '0-5 Years', '#2ecc71'),
    ('pct_5_17', '5-17 Years', '#3498db'),
    ('pct_18_plus', '18+ Years', '#9b59b6')
]:
    fig_district.add_trace(go.Bar(
        y=top_districts['district'],
        x=top_districts[age_col],
        name=name,
        orientation='h',
        marker_color=color
    ))

fig_district.update_layout(
    barmode='stack',
    title='<b>TOP 20 DISTRICTS: AGE DISTRIBUTION</b>',
    xaxis_title='Percentage',
    yaxis_title='District',
    template='plotly_white',
    height=600
)

fig_district.write_html(f"{OUTPUT_DIR}/charts/02_district_events.html")
print("üìä District event patterns saved!")

In [None]:
# ============================================
# CELL 10: Seasonal Patterns Analysis
# ============================================

print("\nüìÖ SEASONAL PATTERNS ANALYSIS")
print("="*60)

# Create seasonal categories
def get_season(month):
    if month in [12, 1, 2]:
        return 'Winter'
    elif month in [3, 4, 5]:
        return 'Summer'
    elif month in [6, 7, 8, 9]:
        return 'Monsoon'
    else:
        return 'Autumn'

df_enrolment['season'] = df_enrolment['month'].apply(get_season)

# Seasonal aggregation
seasonal_data = df_enrolment.groupby('season').agg({
    'total_enrolments': 'sum',
    'age_0_5': 'sum',
    'age_5_17': 'sum',
    'age_18_greater': 'sum'
}).reset_index()

# Order seasons properly
season_order = ['Winter', 'Summer', 'Monsoon', 'Autumn']
seasonal_data['season'] = pd.Categorical(seasonal_data['season'], categories=season_order, ordered=True)
seasonal_data = seasonal_data.sort_values('season')

print("\nüìä Seasonal Enrollment Patterns:")
for _, row in seasonal_data.iterrows():
    print(f"   {row['season']}: {row['total_enrolments']:,.0f}")

# Radar chart for seasonal patterns
fig_radar = go.Figure()

fig_radar.add_trace(go.Scatterpolar(
    r=list(seasonal_data['total_enrolments']/1000) + [seasonal_data['total_enrolments'].iloc[0]/1000],
    theta=season_order + [season_order[0]],
    fill='toself',
    name='Enrollments (K)'
))

fig_radar.update_layout(
    polar=dict(radialaxis=dict(visible=True)),
    title='<b>SEASONAL ENROLLMENT PATTERNS</b>',
    template='plotly_white'
)

fig_radar.write_html(f"{OUTPUT_DIR}/charts/02_seasonal_patterns.html")
print("üìä Seasonal pattern chart saved!")

In [None]:
# ============================================
# CELL 11: Save Analysis Results
# ============================================

print("\nüíæ SAVING TEMPORAL ANALYSIS RESULTS")
print("="*60)

# 1. Save monthly trends
monthly_trend.to_csv(f"{OUTPUT_DIR}/02_monthly_trends.csv", index=False)
print("‚úÖ Monthly trends saved")

# 2. Save day-of-week patterns
dow_data.to_csv(f"{OUTPUT_DIR}/02_day_of_week_patterns.csv", index=False)
print("‚úÖ Day-of-week patterns saved")

# 3. Save state temporal analysis
state_month_pivot.to_csv(f"{OUTPUT_DIR}/02_state_monthly_heatmap.csv")
print("‚úÖ State monthly heatmap saved")

# 4. Save age group comparison
age_trends.to_csv(f"{OUTPUT_DIR}/02_age_group_trends.csv", index=False)
print("‚úÖ Age group trends saved")

# 5. Save correlation data
correlation_data.to_csv(f"{OUTPUT_DIR}/02_correlation_analysis.csv", index=False)
print("‚úÖ Correlation analysis saved")

# 6. Save district events
district_events.to_csv(f"{OUTPUT_DIR}/02_district_events.csv", index=False)
print("‚úÖ District event patterns saved")

# 7. Save seasonal patterns
seasonal_data.to_csv(f"{OUTPUT_DIR}/02_seasonal_patterns.csv", index=False)
print("‚úÖ Seasonal patterns saved")

print("\n" + "="*60)
print("üìÅ All outputs saved to:", OUTPUT_DIR)

In [None]:
# ============================================
# CELL 12: Summary & Key Findings
# ============================================

print("\n" + "="*70)
print("üìä LIFE EVENTS & TEMPORAL ANALYSIS - SUMMARY")
print("="*70)

print(f"""
üéØ KEY FINDINGS:
================

üìÖ TEMPORAL PATTERNS:
   ‚Ä¢ Total Records Analyzed: {len(df_enrolment):,}
   ‚Ä¢ Date Range: {df_enrolment['date'].min()} to {df_enrolment['date'].max()}
   ‚Ä¢ Unique States: {df_enrolment['state'].nunique()}
   ‚Ä¢ Unique Districts: {df_enrolment['district'].nunique()}

üë∂ AGE GROUP DISTRIBUTION:
   ‚Ä¢ 0-5 Years: {df_enrolment['age_0_5'].sum():,.0f} ({df_enrolment['age_0_5'].sum()/df_enrolment['total_enrolments'].sum()*100:.1f}%)
   ‚Ä¢ 5-17 Years: {df_enrolment['age_5_17'].sum():,.0f} ({df_enrolment['age_5_17'].sum()/df_enrolment['total_enrolments'].sum()*100:.1f}%)
   ‚Ä¢ 18+ Years: {df_enrolment['age_18_greater'].sum():,.0f} ({df_enrolment['age_18_greater'].sum()/df_enrolment['total_enrolments'].sum()*100:.1f}%)

üìà CORRELATIONS:
   ‚Ä¢ Enrolment ‚Üî Demographic Updates: {corr_enrol_demo:.3f}
   ‚Ä¢ Enrolment ‚Üî Biometric Updates: {corr_enrol_bio:.3f}
   ‚Ä¢ Demographic ‚Üî Biometric: {corr_demo_bio:.3f}

üìÅ OUTPUTS GENERATED:
   ‚Ä¢ 7 CSV files with analysis results
   ‚Ä¢ 6 Interactive HTML charts
""")

print("="*70)
print("‚úÖ TEMPORAL ANALYSIS COMPLETE!")
print("="*70)