In [1]:
import os
import pandas as pd
import json

In [74]:
data_path = "C:/Users/reema.alhenaki/Desktop/llama3_Data/data/cleaned"


In [75]:
patient_df = pd.read_csv(os.path.join(data_path, "HIS_Patient.csv"))
vitals_df = pd.read_csv(os.path.join(data_path, "HIS_PatientVitalSigns.csv"))
appointments_df = pd.read_csv(os.path.join(data_path, "HIS_Appointment.csv"))
docOrders_df = pd.read_csv(os.path.join(data_path, "HIS_DoctorOrder.csv"))

In [76]:
def summarize_patient_info(row):
    # Name
    full_name = f"{row['FirstName']} {row['MiddleName']} {row['LastName']}"

    # Gender
    gender = "male" if row["Gender"] == 1 else "female"

    # DOB and Age
    dob = pd.to_datetime(row["DateofBirth"], errors='coerce')
    today = pd.Timestamp.now()
    if pd.notnull(dob):
        age = today.year - dob.year - ((today.month, today.day) < (dob.month, dob.day))
    else:
        age = "unknown"

    # Nationality mapping
    nationality_map = {
        "SAU": ("Saudi", "Saudi Arabian"),
        "EGY": ("Egyptian", "Egyptian"),
        "IND": ("Indian", "Indian"),
        # Add more as needed
    }
    nationality_id = row.get("NationalityID", "UNK")
    nationality_adj, nationality_full = nationality_map.get(nationality_id, ("Unknown", "Unknown"))

    # Final summary
    summary = (
        f"{full_name} is a {age}-year-old {nationality_adj} {gender} patient. "
        f"The first recorded visit was on {row['FirstVisit']} and the assigned doctor was with the ID: {row['RegisteredDoctor']}."
    )
    return summary


In [77]:
def summarize_vital_signs(vitals_df):
    """Summarize vital signs for the patient."""
    summaries = []
    for _, row in vitals_df.iterrows():
        obs_date = pd.to_datetime(row.get('CreatedOn', ''), errors='coerce')
        if pd.isnull(obs_date):
            obs_date = 'an unknown date'
        else:
            obs_date = obs_date.date()

        summary = f"On {obs_date}, the vital signs were: "
        summary += f"Temperature {row.get('TemperatureCelcius', 'N/A')}°C, "
        summary += f"Pulse {row.get('PulseBeatPerMinute', 'N/A')} bpm, "
        summary += f"Respiration {row.get('RespirationBeatPerMinute', 'N/A')} breaths/min, "
        summary += f"Blood Pressure {row.get('BloodPressureHigher', 'N/A')}/{row.get('BloodPressureLower', 'N/A')} mmHg, "
        summary += f"Oxygen Saturation {row.get('SAO2', 'N/A')}%, "
        summary += f"Pain Score {row.get('PainScore', 'N/A')}. "

        if pd.notnull(row.get('PainLocation', '')):
            summary += f"Pain located at {row['PainLocation']}. "
        if pd.notnull(row.get('PainCharacter', '')):
            summary += f"Pain described as {row['PainCharacter']}. "

        summaries.append(summary.strip())
    return summaries


In [78]:
def summarize_appointments(appointments_df):
    
    appointment_type_map = {
    1: "General Consultation",
    2: "Follow-up",
    3: "Routine Check",
    4: "Emergency",
}
  
    
    visit_type_map = {
        1: "Consultation",
        2: "Follow-up",
        3: "Emergency",
        4: "Routine Checkup",
        5: "Walk-in",
        6: "Inpatient",
        7: "Telemedicine"
    }

    summaries = []
    for _, row in appointments_df.iterrows():
        app_date = pd.to_datetime(row.get('AppointmentDate', ''), errors='coerce')
        if pd.isnull(app_date):
            app_date_str = 'an unknown date'
        else:
            app_date_str = app_date.date()

        visit_type_num = row.get('VisitType')
        visit_type_desc = visit_type_map.get(visit_type_num, f"Type {visit_type_num}")

        summary = f"The appointment was on {app_date_str} for {visit_type_desc}."
        summary += f" It took place at Clinic ID {row.get('ClinicID', 'N/A')} with Doctor ID {row.get('DoctorID', 'N/A')}."

        notes = row.get('Notes', '')
        if pd.notnull(notes) and notes.strip():
            summary += f" Notes: {notes}."

        if row.get('IsVirtual', False):
            summary += " PS. This was a virtual appointment."

        summaries.append(summary.strip())
    return summaries


In [79]:
def summarize_doctor_notes(doctor_notes_df):
    """Summarize doctor's notes for the patient."""
    summaries = []
    for _, row in doctor_notes_df.iterrows():
        note_date = pd.to_datetime(row.get('ActualOrderDate', ''), errors='coerce')
        if pd.isnull(note_date):
            note_date = 'an unknown date'
        else:
            note_date = note_date.date()

        order_note = row.get('OrderNotes', '')
        nurse_note = row.get('NursingNotes', '')

        summary = f"Doctor's note on {note_date}: {order_note}. "
        if pd.notnull(nurse_note) and nurse_note.strip() != "":
            summary += f"Nursing note: {nurse_note}. "

        summaries.append(summary.strip())
    return summaries


In [80]:
def generate_patient_summary(patient_df, vitals_df, appointments_df, doctor_notes_df):
    patient_summaries = []
    for _, patient_row in patient_df.iterrows():
        patient_id = patient_row['PatientID']

        # Get related records from other tables
        patient_vitals = vitals_df[vitals_df['PatientID'] == patient_id]
        patient_appointments = appointments_df[appointments_df['PatientID'] == patient_id]
        patient_notes = doctor_notes_df[doctor_notes_df['PatientID'] == patient_id]

        # Build individual summaries
        basic_info = summarize_patient_info(patient_row)
        vitals_info = summarize_vital_signs(patient_vitals)
        appointments_info = summarize_appointments(patient_appointments)
        doctor_notes_info = summarize_doctor_notes(patient_notes)

        # Combine everything into one patient profile
        patient_profile = {
            "PatientID": patient_id,
            "BasicInfo": basic_info,
            "VitalsSummary": vitals_info,
            "AppointmentsSummary": appointments_info,
            "DoctorNotesSummary": doctor_notes_info
        }

        patient_summaries.append(patient_profile)

    return patient_summaries


In [82]:
all_patients_data = []

for pid in patient_ids:
    # Extract dataframes filtered by PatientID
    patient_info = patient_df[patient_df["PatientID"] == pid].to_dict(orient="records")
    vital_signs = vitals_df[vitals_df["PatientID"] == pid].to_dict(orient="records")
    appointments = appointments_df[appointments_df["PatientID"] == pid].to_dict(orient="records")
    doctor_orders = docOrders_df[docOrders_df["PatientID"] == pid].to_dict(orient="records")

    # Remove redundant "PatientID" from nested dicts
    for record in vital_signs:
        record.pop("PatientID", None)
    for record in appointments:
        record.pop("PatientID", None)
    for record in doctor_orders:
        record.pop("PatientID", None)
    if patient_info:
        patient_info[0].pop("PatientID", None)

    # Summaries 
    basic_info_summary = summarize_patient_info(pd.Series(patient_info[0]) if patient_info else pd.Series())
    vitals_summary = summarize_vital_signs(pd.DataFrame(vital_signs))
    appointments_summary = summarize_appointments(pd.DataFrame(appointments))
    doctor_notes_summary = summarize_doctor_notes(pd.DataFrame(doctor_orders))

    full_summary = {
        "BasicInfoSummary": basic_info_summary,
        "VitalsSummary": vitals_summary,
        "AppointmentsSummary": appointments_summary,
        "DoctorNotesSummary": doctor_notes_summary
    }

    patient_data = {
        "PatientID": pid,
        "PatientInfo": patient_info[0] if patient_info else {},
        "VitalSigns": vital_signs,
        "Appointments": appointments,
        "DoctorOrders": doctor_orders,
        "Summary": full_summary
    }

    all_patients_data.append(patient_data)


# Save to JSON
output_path = "/Users/reema/Desktop/llama3_Data/data/json/patient_summaries.json"
with open(output_path, "w", encoding="utf-8") as f:
    json.dump(all_patients_data, f, indent=2, ensure_ascii=False)

print(f"✅ Summaries generated and saved to {output_path}")


✅ Summaries generated and saved to /Users/reema/Desktop/llama3_Data/data/patient_summaries.json


  dob = pd.to_datetime(row["DateofBirth"], errors='coerce')
  dob = pd.to_datetime(row["DateofBirth"], errors='coerce')
  dob = pd.to_datetime(row["DateofBirth"], errors='coerce')
  dob = pd.to_datetime(row["DateofBirth"], errors='coerce')
  dob = pd.to_datetime(row["DateofBirth"], errors='coerce')
  dob = pd.to_datetime(row["DateofBirth"], errors='coerce')
  dob = pd.to_datetime(row["DateofBirth"], errors='coerce')
  dob = pd.to_datetime(row["DateofBirth"], errors='coerce')
  dob = pd.to_datetime(row["DateofBirth"], errors='coerce')
  dob = pd.to_datetime(row["DateofBirth"], errors='coerce')
