In [3]:
import pandas as pd
import numpy as np

# Load original cross-sectional data
df = pd.read_csv("../Enhanced_Kurigram_Dataset.csv")

# Separate static and dynamic features
static_cols = ['Age', 'Previous_Complications', 'Preexisting_Diabetes']
dynamic_cols = ['Systolic_BP', 'Diastolic', 'BS', 'Body_Temp', 'BMI', 
                'Heart_Rate', 'Gestational_Diabetes', 'Mental_Health', 'Risk_Level']

# Container for synthetic visit-wise records
synthetic_visits = []

# Simulate 9 visits for each mother
for _, row in df.iterrows():
    base = row.copy()
    
    for visit in range(1, 10):  # 9 visits
        visit_data = {}
        # Add static features
        for col in static_cols:
            visit_data[col] = base[col]
        
        # Add visit number
        visit_data['Visit'] = visit

        # Simulate dynamic features with realistic increments/noise
        visit_data['Systolic_BP'] = base['Systolic_BP'] + np.random.normal(visit * 1.5, 3)  # 1.5 mmHg/visit, SD=3
        visit_data['Diastolic'] = base['Diastolic'] + np.random.normal(visit * 1.0, 2)      # 1 mmHg/visit, SD=2
        visit_data['BS'] = base['BS'] + np.random.normal(0, 5)                              # SD=5 mg/dL
        visit_data['Body_Temp'] = base['Body_Temp'] + np.random.normal(0, 0.2)             # SD=0.2 C
        visit_data['BMI'] = base['BMI'] + visit * np.random.uniform(0.2, 0.4)              # 0.2-0.4/visit
        visit_data['Heart_Rate'] = base['Heart_Rate'] + np.random.normal(visit * 1.0, 2)   # 1 bpm/visit, SD=2
        
        # Conditional logic for state flips
        visit_data['Gestational_Diabetes'] = (
            base['Gestational_Diabetes'] 
            if visit < 5 
            else np.random.choice([0, 1], p=[0.7, 0.3])
        )
        visit_data['Mental_Health'] = (
            base['Mental_Health'] 
            if np.random.rand() > 0.2 
            else 1 - base['Mental_Health']
        )
        
        # Leave Risk_Level for RL agent to learn (can be set 0, or simulated later)
        visit_data['Risk_Level'] = base['Risk_Level']
        
        synthetic_visits.append(visit_data)

# Final DataFrame
df_long = pd.DataFrame(synthetic_visits)

# Save to file
df_long.to_csv("Synthetic_Longitudinal_ANC.csv", index=False)

# Preview
df_long.head(18)  # Two mothers, first two visits each


Unnamed: 0,Age,Previous_Complications,Preexisting_Diabetes,Visit,Systolic_BP,Diastolic,BS,Body_Temp,BMI,Heart_Rate,Gestational_Diabetes,Mental_Health,Risk_Level
0,22.0,1.0,1.0,1,97.032731,59.19481,18.212615,100.024295,18.368989,80.001625,0.0,1.0,0.0
1,22.0,1.0,1.0,2,94.255636,61.227159,6.494514,99.92679,18.550995,83.548431,0.0,1.0,0.0
2,22.0,1.0,1.0,3,91.008979,61.069317,20.050136,99.82606,18.72301,84.148081,0.0,1.0,0.0
3,22.0,1.0,1.0,4,102.280027,63.447907,11.43866,99.831661,19.216178,80.65897,0.0,1.0,0.0
4,22.0,1.0,1.0,5,96.936846,65.546385,-0.997395,99.922681,19.149522,84.973351,0.0,1.0,0.0
5,22.0,1.0,1.0,6,99.874412,65.774166,17.014759,100.061916,19.493585,84.150946,0.0,1.0,0.0
6,22.0,1.0,1.0,7,104.46248,64.821581,6.025812,99.85609,20.47211,86.16551,0.0,1.0,0.0
7,22.0,1.0,1.0,8,101.93565,69.181805,16.196737,100.061669,19.87076,86.276834,0.0,0.0,0.0
8,22.0,1.0,1.0,9,99.934785,70.484104,5.943976,100.094727,21.481387,89.590473,0.0,1.0,0.0
9,22.0,0.0,0.0,1,113.544528,72.025198,8.432318,97.884848,20.678479,75.193427,0.0,0.0,1.0
