# Processing

In [1]:
# import libraries

import math
import random

import pandas as pd

In [2]:
# data folder

data_folder = '../../data/health_records'

In [3]:
# load electronic health records data

patients = pd.read_csv(f"{data_folder}/patients.csv")

allergies = pd.read_csv(f"{data_folder}/allergies.csv")
careplans = pd.read_csv(f"{data_folder}/careplans.csv")
conditions = pd.read_csv(f"{data_folder}/conditions.csv")
devices = pd.read_csv(f"{data_folder}/devices.csv")
encounters = pd.read_csv(f"{data_folder}/encounters.csv")
imagings = pd.read_csv(f"{data_folder}/imagings.csv")
immunizations = pd.read_csv(f"{data_folder}/immunizations.csv")
medications = pd.read_csv(f"{data_folder}/medications.csv")
observations = pd.read_csv(f"{data_folder}/observations.csv")
procedures = pd.read_csv(f"{data_folder}/procedures.csv")

In [4]:
def clean_nans(obj : dict | list | float) -> dict | list | str:
    '''
    Replaces all NaN values with an empty string
    '''
    if isinstance(obj, dict):
        return {k: clean_nans(v) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [clean_nans(item) for item in obj]
    elif isinstance(obj, float) and math.isnan(obj):
        return ''
    else:
        return obj

In [5]:
def get_health_records(patient_id : str) -> dict:
    '''
    Gather all health records for a given patient
    '''
    health_record = {}
    health_record['patient'] = patients[patients['Id'] == patient_id][['Id', 'BIRTHDATE', 'MARITAL', 'RACE', 'ETHNICITY', 'GENDER']]\
        .to_dict(orient='records')[0]
    health_record['allergies'] = allergies[allergies['PATIENT'] == patient_id]['DESCRIPTION'].tolist()
    health_record['careplans'] = careplans[careplans['PATIENT'] == patient_id]['DESCRIPTION'].tolist()
    health_record['conditions'] = conditions[conditions['PATIENT'] == patient_id]['DESCRIPTION'].tolist()
    health_record['devices'] = devices[devices['PATIENT'] == patient_id]['DESCRIPTION'].tolist()
    health_record['encounters'] = encounters[encounters['PATIENT'] == patient_id]['DESCRIPTION'].tolist()
    health_record['imagings'] = imagings[imagings['PATIENT'] == patient_id][['BODYSITE_DESCRIPTION', 'MODALITY_DESCRIPTION']]\
        .values.tolist()
    health_record['immunizations'] = immunizations[immunizations['PATIENT'] == patient_id]['DESCRIPTION'].tolist()
    health_record['medications'] = medications[medications['PATIENT'] == patient_id]['DESCRIPTION'].tolist()
    health_record['observations'] = observations[observations['PATIENT'] == patient_id][['DESCRIPTION', 'VALUE', 'UNITS']]\
        .values.tolist()
    health_record['procedures'] = procedures[procedures['PATIENT'] == patient_id]['DESCRIPTION'].tolist()
    return clean_nans(health_record)

In [6]:
def display_health_records(health_records : dict):
    '''
    Displays all health records for a given patient
    '''
    print('-' * 100)
    print('Patient:')
    print(f" - ID: {health_records['patient']['Id']}")
    print(f" - BIRTHDATE: {health_records['patient']['BIRTHDATE']}")
    print(f" - MARITAL: {health_records['patient']['MARITAL']}")
    print(f" - RACE: {health_records['patient']['RACE']}")
    print(f" - ETHNICITY: {health_records['patient']['ETHNICITY']}")
    print(f" - GENDER: {health_records['patient']['GENDER']}")
    print('-' * 100)
    print('Allergies:')
    for allergy in health_records['allergies']:
        print(f' - {allergy}')
    print('-' * 100)
    print('Careplans:')
    for careplan in health_records['careplans']:
        print(f' - {careplan}')
    print('-' * 100)
    print('Conditions:')
    for condition in health_records['conditions']:
        print(f' - {condition}')
    print('-' * 100)
    print('Devices:')
    for device in health_records['devices']:
        print(f' - {device}')
    print('-' * 100)
    print('Encounters:')
    for encounter in health_records['encounters']:
        print(f' - {encounter}')
    print('-' * 100)
    print('Imagings:')
    for imaging in health_records['imagings']:
        print(f' - {imaging[0]} ({imaging[1]})')
    print('-' * 100)
    print('Immunizations:')
    for immunization in health_records['immunizations']:
        print(f' - {immunization}')
    print('-' * 100)
    print('Medications:')
    for medication in health_records['medications']:
        print(f' - {medication}')
    print('-' * 100)
    print('Observations:')
    for observation in health_records['observations']:
        print(f' - {observation[0]}: {observation[1]} {observation[2]}')
    print('-' * 100)
    print('Procedures:')
    for procedure in health_records['procedures']:
        print(f' - {procedure}')
    print('-' * 100)

In [19]:
# get electronic health records for a random patient

patient_id = random.choice(patients['Id'].values)
health_records = get_health_records(patient_id)
display_health_records(health_records)

----------------------------------------------------------------------------------------------------
Patient:
 - ID: 4440ff11-69ec-440b-a2bd-dc1c14105e8e
 - BIRTHDATE: 2001-11-20
 - MARITAL: 
 - RACE: white
 - ETHNICITY: hispanic
 - GENDER: F
----------------------------------------------------------------------------------------------------
Allergies:
 - Allergy to mould
 - House dust mite allergy
 - Dander (animal) allergy
 - Allergy to grass pollen
 - Allergy to tree pollen
 - Allergy to eggs
 - Allergy to wheat
 - Allergy to peanuts
----------------------------------------------------------------------------------------------------
Careplans:
 - Self-care interventions (procedure)
 - Skin condition care
 - Allergic disorder monitoring
----------------------------------------------------------------------------------------------------
Conditions:
 - Atopic dermatitis
 - Perennial allergic rhinitis
 - Acute viral pharyngitis (disorder)
 - Acute allergic reaction
---------------------