In [12]:
# --- Multi-week synthetic data generation (same-directory setup) ---

# 1) Imports + (optional) reload to ensure latest module code is used
import importlib, sys
from pathlib import Path
import pandas as pd

# If guardian_synth_notebook.py sits beside this notebook, make sure that folder is on sys.path
# (Usually not needed, but harmless.)
sys.path.append(str(Path(".").resolve()))

import guardian_synth_notebook as gsm
importlib.reload(gsm)  # pick up any edits to guardian_synth_notebook.py

# 2) Inputs/Outputs: same directory as the notebook
INPUT  = "ai_behavior_profile_dataset_updated.csv"  # file lives in this folder
OUTDIR = "."                                        # write outputs here

# Quick checks
print("Input exists? ", Path(INPUT).exists(), " -> ", Path(INPUT).resolve())
Path(OUTDIR).mkdir(parents=True, exist_ok=True)

# 3) Run the multi-week generator (Weeks 4–7, 100 rows each, sequential patientId)
results = gsm.generate_week_series(
    input_path=INPUT,
    output_dir=OUTDIR,
    weeks=[4,5,6,7],
    rows_per_week=100,
    start_col='observationStart',
    end_col='observationEnd',
    symptom_col='entitiesExtracted.symptoms',
    procedures_col='entitiesExtracted.procedures',
    medications_col='entitiesExtracted.medications',
    patient_id_col='patientId',
    seed=888,
    hours_min=23.0,
    hours_max=24.0,
    start_hour_min=7,
    start_hour_max=10,
    max_procs=3,
    max_meds=2,
    one_med_per_symptom=True,
    patient_id_prefix='P',
    patient_id_width=4
)

# 4) Confirm files and preview a few rows
for w in [4,5,6,7]:
    p = Path(OUTDIR) / f"guardian_monitor_week{w}.csv"
    print(f"Week {w} saved:", p.exists(), "->", p.resolve())

print("\nWeek 6 preview:")
display(results[6][[
    'patientId',
    'entitiesExtracted.symptoms',
    'entitiesExtracted.procedures',
    'entitiesExtracted.medications',
    'observationStart',
    'observationEnd'
]].head())

# 5) Verify durations are 23–24h
dur_hours = (
    pd.to_datetime(results[6]['observationEnd']) - pd.to_datetime(results[6]['observationStart'])
).dt.total_seconds()/3600
print("\nWeek 6 duration (hours):")
print(dur_hours.describe())


Input exists?  True  ->  /Users/harshadamarla/Documents/T2 Docs/Team Project - A/Synthetic DataGenerator/ai_behavior_profile_dataset_updated.csv
Week 4 saved: True -> /Users/harshadamarla/Documents/T2 Docs/Team Project - A/Synthetic DataGenerator/guardian_monitor_week4.csv
Week 5 saved: True -> /Users/harshadamarla/Documents/T2 Docs/Team Project - A/Synthetic DataGenerator/guardian_monitor_week5.csv
Week 6 saved: True -> /Users/harshadamarla/Documents/T2 Docs/Team Project - A/Synthetic DataGenerator/guardian_monitor_week6.csv
Week 7 saved: True -> /Users/harshadamarla/Documents/T2 Docs/Team Project - A/Synthetic DataGenerator/guardian_monitor_week7.csv

Week 6 preview:


Unnamed: 0,patientId,entitiesExtracted.symptoms,entitiesExtracted.procedures,entitiesExtracted.medications,observationStart,observationEnd
0,P0251,"['headache', 'nausea']","Blood pressure check, Electrolytes (if persist...","Paracetamol, Ondansetron",2025-08-13 08:20:36,2025-08-14 07:45:23
1,P0252,"['fatigue', 'dizziness']",,,2025-08-14 07:23:40,2025-08-15 07:15:40
2,P0253,"['nausea', 'headache']","Electrolytes (if persistent), Blood pressure c...","Ondansetron, Ibuprofen",2025-08-05 07:56:00,2025-08-06 07:28:50
3,P0254,"['fatigue', 'dizziness']",,,2025-08-21 07:22:13,2025-08-22 06:22:38
4,P0255,"['headache', 'fatigue']",Neurological exam,Ibuprofen,2025-08-14 08:41:47,2025-08-15 07:48:39



Week 6 duration (hours):
count    100.000000
mean      23.486211
std        0.290689
min       23.004167
25%       23.242917
50%       23.466806
75%       23.706736
max       23.999444
dtype: float64


In [9]:
!jupyter nbconvert --to script SyntheticData.ipynb


[NbConvertApp] Converting notebook SyntheticData.ipynb to script
[NbConvertApp] Writing 2223 bytes to SyntheticData.py
