In [5]:
import pandas as pd
import numpy as np
import random

np.random.seed(42)
random.seed(42)

N = 5000

states = ['Abia', 'Adamawa', 'Akwa_Ibom', 'Anambra', 'Bauchi', 'Bayelsa', 'Benue', 'Borno', 'Cross_River',
          'Delta', 'Ebonyi', 'Edo', 'Ekiti', 'Enugu', 'FCT', 'Gombe', 'Imo', 'Jigawa', 'Kaduna', 'Kano',
          'Katsina', 'Kebbi', 'Kogi', 'Kwara', 'Lagos', 'Nasarawa', 'Niger', 'Ogun', 'Ondo', 'Osun', 'Oyo',
          'Plateau', 'Rivers', 'Sokoto', 'Taraba', 'Yobe', 'Zamfara']

state_weights = [0.02, 0.03, 0.03, 0.03, 0.04, 0.01, 0.04, 0.04, 0.02, 0.03, 0.02, 0.02, 0.01, 0.02, 0.01,
                 0.02, 0.03, 0.04, 0.05, 0.08, 0.06, 0.03, 0.02, 0.02, 0.06, 0.02, 0.04, 0.03, 0.02, 0.02,
                 0.05, 0.03, 0.04, 0.04, 0.02, 0.03, 0.03]

rural_urban = ['Rural', 'Urban']
education_levels = ['None', 'Primary', 'Secondary', 'Tertiary']
marital_statuses = ['Married', 'Single', 'Widowed', 'Divorced']
employment_statuses = ['Informal', 'Unemployed', 'Formal', 'Student']
antenatal_qualities = ['Poor', 'Fair', 'Good']
cravings_list = ['Yam', 'Pepper_Soup', 'Egusi_Soup', 'Plantain', 'Dates', 'Fruits', 'None',
                 'Coke', 'Salad', 'Suya', 'Moi-Moi', 'Bole', 'Jollof_Rice', 'Oat']
aversions_list = ['Fish', 'Meat', 'Oil', 'None', 'Eggs', 'Groundnut', 'Fufu']
moods = ['Happy', 'Anxious', 'Sad', 'Calm', 'Excited']
symptoms_list = ['Nausea', 'Back_Pain', 'Fatigue', 'Swelling']
delivery_methods = ['Vaginal', 'C-Section', 'Assisted']
delivery_places = ['Hospital', 'Home', 'TBA']
complications_list = ['None', 'Hemorrhage', 'Infection', 'Eclampsia']
support_list = ['Partner', 'Mother', 'Nurse', 'Doula', 'TBA', 'Sister']
baby_healths = ['Healthy', 'Low_Weight', 'Other_Issues']
postnatal_care = ['Yes', 'No', 'Unknown']

demographics = {
'mother_id': [f'M{i:04d}' for i in range(1, N+1)],
'age': np.random.normal(25, 5, N).clip(15, 49).astype(int),
'state': random.choices(states, weights=state_weights, k=N),
'rural_urban': random.choices(rural_urban, weights=[0.5, 0.5], k=N),
'education_level': random.choices(education_levels, weights=[0.15, 0.15, 0.4, 0.3], k=N),
'marital_status': random.choices(marital_statuses, weights=[0.8, 0.15, 0.03, 0.02], k=N),
'employment_status': random.choices(employment_statuses, weights=[0.5, 0.2, 0.2, 0.1], k=N)
}
demographics_df = pd.DataFrame(demographics)

pregnancy = {
'mother_id': [f'M{i:04d}' for i in range(1, N+1)],
'num_pregnancies': [max(1, int(np.random.poisson(4 if demographics['rural_urban'][i] == 'Rural' else 3))) for i in range(N)],
'weeks_at_delivery': [random.choices([random.randint(37, 42), random.randint(34, 36)], weights=[0.95, 0.05])[0] for _ in range(N)],
'antenatal_visits': [random.choices(range(9), weights=[0.1, 0.1, 0.1, 0.15, 0.2, 0.15, 0.1, 0.05, 0.05])[0] for _ in range(N)],
'antenatal_quality': random.choices(antenatal_qualities, weights=[0.2, 0.4, 0.4], k=N),
'cravings': [','.join(random.sample(cravings_list, k=random.randint(1, 2))) for _ in range(N)],
'aversions': [','.join(random.sample(aversions_list, k=random.randint(1, 2))) for _ in range(N)],
'mood': random.choices(moods, weights=[0.2, 0.2, 0.2, 0.2, 0.2], k=N),
'symptoms': [','.join(random.sample(symptoms_list, k=random.randint(0, 3))) for _ in range(N)]
}
pregnancy_df = pd.DataFrame(pregnancy)

delivery = {
'mother_id': [f'M{i:04d}' for i in range(1, N+1)],
'delivery_method': random.choices(delivery_methods, weights=[0.8, 0.15, 0.05], k=N),
'place_of_delivery': random.choices(delivery_places, weights=[0.5, 0.4, 0.1], k=N),
'birth_complications': random.choices(complications_list, weights=[0.8, 0.1, 0.05, 0.05], k=N),
'support_system': [','.join(random.sample(support_list, k=random.randint(1, 2))) for _ in range(N)],
'postpartum_depression': random.choices(['Yes', 'No'], weights=[0.15, 0.85], k=N),
'baby_weight_kg': np.random.normal(3.5, 0.5, N).clip(2.0, 4.5).round(1),
'baby_health': [],
'postnatal_care_access': random.choices(postnatal_care, weights=[0.6, 0.3, 0.1], k=N)
}

for i in range(N): if delivery['baby_weight_kg'][i] < 2.5: delivery['baby_health'].append('Low_Weight')
    else: delivery['baby_health'].append(random.choices(baby_healths, weights=[0.9, 0.05, 0.05])[0])

delivery_df = pd.DataFrame(delivery)

demographics_df.to_csv('nigeria_maternal_demographics_5000.csv', index=False)
pregnancy_df.to_csv('nigeria_pregnancy_experience_5000.csv', index=False)
delivery_df.to_csv('nigeria_delivery_postpartum_5000.csv', index=False)

print("CSV files generated: nigeria_maternal_demographics_5000.csv, nigeria_pregnancy_experience_5000.csv, nigeria_delivery_postpartum_5000.csv")


CSV files generated: nigeria_maternal_demographics_5000.csv, nigeria_pregnancy_experience_5000.csv, nigeria_delivery_postpartum_5000.csv
