In [1]:
import json
import os
from collections import defaultdict

import pandas as pd

# Paths
RAW_DATA_DIR = "data/raw/"
NOTES_DIR = "data/notes/"

In [2]:
# Ensure output folder exists
os.makedirs(NOTES_DIR, exist_ok=True)

# Load CSVs
patients = pd.read_csv(os.path.join(RAW_DATA_DIR, "patients.csv"))
conditions = pd.read_csv(os.path.join(RAW_DATA_DIR, "conditions.csv"))
encounters = pd.read_csv(os.path.join(RAW_DATA_DIR, "encounters.csv"))
medications = pd.read_csv(os.path.join(RAW_DATA_DIR, "medications.csv"))
observations = pd.read_csv(os.path.join(RAW_DATA_DIR, "observations.csv"))

In [3]:
# Build patient-related maps
def group_by_patient(df, column="DESCRIPTION"):
    patient_map = defaultdict(list)
    for _, row in df.iterrows():
        patient_id = row["PATIENT"]
        desc = row.get(column, "")
        if pd.notna(desc):
            patient_map[patient_id].append(str(desc))
    return patient_map


condition_map = group_by_patient(conditions)
medication_map = group_by_patient(medications)
observation_map = group_by_patient(observations, column="DESCRIPTION")
encounter_map = group_by_patient(encounters, column="REASON")

In [5]:
# Generate clinical notes
for _, patient in patients.iterrows():
    patient_id = patient["Id"]
    gender = str(patient["GENDER"]).capitalize()
    birth_year = int(str(patient["BIRTHDATE"])[:4])
    age = 2025 - birth_year  # Fixed reference year
    name = f"Patient-{patient_id[:6]}"

    conditions_text = ", ".join(condition_map[patient_id]) or "No known conditions."
    meds_text = ", ".join(medication_map[patient_id]) or "No medications recorded."
    obs_text = (
        "; ".join(observation_map[patient_id][:5]) or "No significant lab results."
    )
    reasons = ". ".join(encounter_map[patient_id][:2]) or "Routine checkup."

    note = f"""Patient: {name}, {age}-year-old {gender}
              Chief Complaint:
              {reasons}

              Medical History:
              {conditions_text}

              Clinical Findings:
              {obs_text}

              Medications:
              {meds_text}

              Plan:
              Follow up as necessary.
              """

    # # Save .txt
    # txt_path = os.path.join(NOTES_DIR, f"{patient_id}.txt")
    # with open(txt_path, "w") as f:
    #     f.write(note)
    
    # Save .json
    json_path = os.path.join(NOTES_DIR, f"{patient_id}.json")
    with open(json_path, 'w') as f:
        json.dump({
            "patient_id": patient_id,
            "name": name,
            "age": age,
            "gender": gender,
            "note": note
        }, f, indent=2)

print(f"✅ Generated {len(patients)} clinical notes in '{NOTES_DIR}'")

✅ Generated 140 clinical notes in 'data/notes/'
