In [1]:
import pandas as pd
import os

In [2]:
def load_and_merge_csvs(folder_path):
    files = [
        os.path.join(folder_path, f)
        for f in os.listdir(folder_path)
        if f.endswith(".csv")
    ]

    df_list = [pd.read_csv(f) for f in files]
    merged_df = pd.concat(df_list, ignore_index=True)

    return merged_df



# Loading Enrolment data :-

In [3]:
enrolment_df = load_and_merge_csvs("../data/raw/enrolment/")

In [4]:
enrolment_df.head()

Unnamed: 0,date,state,district,pincode,age_0_5,age_5_17,age_18_greater
0,31-12-2025,Karnataka,Bidar,585330,2,3,0
1,31-12-2025,Karnataka,Bidar,585402,6,0,0
2,31-12-2025,Karnataka,Bidar,585413,1,0,0
3,31-12-2025,Karnataka,Bidar,585418,1,2,0
4,31-12-2025,Karnataka,Bidar,585421,4,3,0


Checking Size of data :

In [5]:
enrolment_df.shape

(1006029, 7)

Checking names of columns

In [6]:
enrolment_df.columns

Index(['date', 'state', 'district', 'pincode', 'age_0_5', 'age_5_17',
       'age_18_greater'],
      dtype='object')

# Loading Biometric Data :-

In [7]:
biometric_df = load_and_merge_csvs("../data/raw/biometric/")

In [8]:
biometric_df.head()

Unnamed: 0,date,state,district,pincode,bio_age_5_17,bio_age_17_
0,01-03-2025,Haryana,Mahendragarh,123029,280,577
1,01-03-2025,Bihar,Madhepura,852121,144,369
2,01-03-2025,Jammu and Kashmir,Punch,185101,643,1091
3,01-03-2025,Bihar,Bhojpur,802158,256,980
4,01-03-2025,Tamil Nadu,Madurai,625514,271,815


In [9]:
biometric_df.shape


(1861108, 6)

In [10]:
biometric_df.columns


Index(['date', 'state', 'district', 'pincode', 'bio_age_5_17', 'bio_age_17_'], dtype='object')

# Loading Demographic Data:-

In [11]:
demographic_df = load_and_merge_csvs("../data/raw/demographic/")


In [12]:
demographic_df.head()


Unnamed: 0,date,state,district,pincode,demo_age_5_17,demo_age_17_
0,01-03-2025,Uttar Pradesh,Gorakhpur,273213,49,529
1,01-03-2025,Andhra Pradesh,Chittoor,517132,22,375
2,01-03-2025,Gujarat,Rajkot,360006,65,765
3,01-03-2025,Andhra Pradesh,Srikakulam,532484,24,314
4,01-03-2025,Rajasthan,Udaipur,313801,45,785


In [13]:
demographic_df.shape

(2071700, 6)

# Merging the respective CSVs

In [14]:
biometric_df.to_csv(
    "../data/processed/interim/biometric_raw_merged.csv",
    index=False
)

enrolment_df.to_csv(
    "../data/processed/interim/enrolment_raw_merged.csv",
    index=False
)

demographic_df.to_csv(
    "../data/processed/interim/demographic_raw_merged.csv",
    index=False
)