In [1]:
import pandas as pd
import numpy as np
import pickle
from faker import Faker
import df_helper as dfh

df_helper = dfh.df_helper()

# Initialize Faker
fake = Faker()

admission_types = [
    'URGENT', 'ELECTIVE', 'EW EMER.', 'DIRECT EMER.', 'EU OBSERVATION',
    'OBSERVATION ADMIT', 'DIRECT OBSERVATION',
    'AMBULATORY OBSERVATION', 'SURGICAL SAME DAY ADMISSION'
]
admission_locations = [
    'TRANSFER FROM HOSPITAL', 'TRANSFER FROM SKILLED NURSING FACILITY',
    'INTERNAL TRANSFER TO OR FROM PSYCH', 'PHYSICIAN REFERRAL',
    'EMERGENCY ROOM', 'PACU', 'PROCEDURE SITE',
    'WALK-IN/SELF REFERRAL', 'INFORMATION NOT AVAILABLE',
    'CLINIC REFERRAL'
]
insurance_types = [
    'Medicaid', 'Medicare', 'Other'
]
languages = [
    'ENGLISH', '?'
]
marital_statuses = [
    'SINGLE', 'MARRIED', 'DIVORCED', 'WIDOWED'
]
races = [
    'BLACK/CAPE VERDEAN', 'HISPANIC/LATINO - PUERTO RICAN', 'WHITE',
    'UNKNOWN', 'OTHER', 'BLACK/AFRICAN AMERICAN',
    'HISPANIC/LATINO - SALVADORAN', 'UNABLE TO OBTAIN',
    'WHITE - OTHER EUROPEAN', 'PORTUGUESE', 'HISPANIC/LATINO - CUBAN',
    'PATIENT DECLINED TO ANSWER', 'WHITE - BRAZILIAN',
    'HISPANIC OR LATINO'
]

drugs = [
    'Midodrine', 'Multivitamins W/minerals', 'Sodium Chloride',
    'Nephrocaps', 'Nicotine Polacrilex', 'OLANZapine', 'Ondansetron',
    'OxyCODONE (Immediate Release)'
]

diagnoses_types = [
    'Urinary tract infection, site not specified',
    'Acute respiratory failure',
    'Asthma, unspecified type, unspecified',
    'Solitary pulmonary nodule',
    'Streptococcus infection in conditions classified elsewhere and of unspecified site, streptococcus, group D [Enterococcus]',
    'Dysphagia, oropharyngeal phase', 'Esophageal reflux', 'Hypoxemia',
    'Methicillin resistant pneumonia due to Staphylococcus aureus',
    'Attention deficit disorder with hyperactivity'
]

def generate_patients(n):
    patients = []
    for i in range(n):
        gender = np.random.choice(['M', 'F'])
        patients.append({
            'patient_id': i,
            'name': fake.name_male() if gender == "M" else fake.name_female(),
            'age': np.random.randint(1, 90),
            'gender': gender,
            'insurance': np.random.choice(insurance_types),
            'language': np.random.choice(languages),
            'maritalStatus': np.random.choice(marital_statuses),
            'race': np.random.choice(races),
        })
    return pd.DataFrame(patients)

def generate_staff(n):
    staff = []
    for i in range(n):
        staff.append({
            'staff_id': i,
            'name': fake.name(),
            'role': np.random.choice(['Physician', 'Nurse', 'Admin']),
            'shift_start': fake.time(),
            'shift_end': fake.time()
        })
    return pd.DataFrame(staff)

def generate_admissions(patients:pd.DataFrame):
    admissions = []
    for i in range(len(patients)):
        admissions.append({
            'adm_id': i,
            'patient_id': patients.iloc[i].patient_id,
            'admit_time': fake.date_time(),
            'discharge_time': None,
            'type': np.random.choice(admission_types),
            'location': np.random.choice(admission_locations),
            
        })
    return pd.DataFrame(admissions)

def generate_omr(admissions):
    table = []
    for i in range(len(admissions)):
        omr = {
            'omr_id': i,
            'adm_id': admissions.iloc[i].adm_id,
            'weight': np.random.randint(60, 120),
            ##'height': np.random.randint(150, 220)/ 10.0, #not used in modeling
            'bp_systolic': np.random.randint(80, 220),
            'bp_diastolic': np.random.randint(60, 120),
        }
        table.append(omr)
    return pd.DataFrame(table)

def generate_prescriptions(admissions):
    prescriptions = []
    for i in range(len(admissions)):
        prescriptions.append({
            'prescription_id': i,
            'adm_id': admissions.iloc[i].adm_id,
            'drug': np.random.choice(drugs),
        })
    return pd.DataFrame(prescriptions)

def generate_diagnoses(admissions:pd.DataFrame):
    diag = []
    for i in range(len(admissions)):
        diag.append({
            'diagnosis_id': i,
            'adm_id': admissions.iloc[i].adm_id,
            'diagnosis': np.random.choice(diagnoses_types),
        })
    return pd.DataFrame(diag)

def generate_rooms(n):
    rooms = []
    beds = []
    bed_taken = 0
    room_id = 0
    bed_id = 0
    for i in range(n):
        if i % 2 == 0:#create new room
            rooms.append({
                'room_id': room_id,
                'capacity': 1,
            })
            beds.append({
                "bed_id": bed_id,
                "room": room_id,
                "adm_id": None
            })
            room_id += 1
        else:
            room = np.random.choice(rooms)#add bed to existing room
            room["capacity"] += 1
            beds.append({
                "bed_id": bed_id,
                "room": room["room_id"],
                "adm_id": None
            })
        bed_id += 1

    return pd.DataFrame(rooms), pd.DataFrame(beds)

def assign_beds(admissions, beds):
    for i in range(len(admissions)):
        adm = admissions.iloc[i].adm_id
        bed = beds[beds.adm_id.isna()].sample(1).iloc[0,0]  #choose random empty bed
        beds.loc[bed, "adm_id"] = adm.astype(int)
        print(bed)

In [2]:
staff = generate_staff(10)
patients = generate_patients(10)
admissions = generate_admissions(patients)
admissions.discharge_time = pd.to_datetime(admissions.discharge_time)
omr = generate_omr(admissions)
prescriptions = generate_prescriptions(admissions)
diagnoses = generate_diagnoses(admissions)
rooms, beds = generate_rooms(20)
assign_beds(admissions, beds)

19
9
10
11
14
18
12
1
2
3


In [3]:
staff.head()

Unnamed: 0,staff_id,name,role,shift_start,shift_end
0,0,Henry Moore,Nurse,13:16:56,20:26:04
1,1,Shannon Reed,Nurse,00:26:27,09:57:44
2,2,Monica White,Nurse,07:02:21,21:15:45
3,3,Lisa May,Admin,19:46:15,07:12:00
4,4,Madeline Harrison,Physician,23:23:21,23:03:24


In [4]:
patients.head()

Unnamed: 0,patient_id,name,age,gender,insurance,language,maritalStatus,race
0,0,Sandra Mcmahon,74,F,Medicare,?,WIDOWED,HISPANIC OR LATINO
1,1,Charles Long,15,M,Medicare,ENGLISH,WIDOWED,HISPANIC/LATINO - SALVADORAN
2,2,Lisa Campos,25,F,Medicaid,ENGLISH,MARRIED,BLACK/CAPE VERDEAN
3,3,Joanna King,54,F,Medicaid,ENGLISH,MARRIED,WHITE
4,4,Jason Perez,22,M,Other,ENGLISH,DIVORCED,UNABLE TO OBTAIN


In [5]:
admissions

Unnamed: 0,adm_id,patient_id,admit_time,discharge_time,type,location
0,0,0,1986-01-10 11:34:47.067091,NaT,AMBULATORY OBSERVATION,PACU
1,1,1,1991-06-04 03:39:28.181177,NaT,DIRECT OBSERVATION,EMERGENCY ROOM
2,2,2,2021-12-07 05:29:40.856196,NaT,SURGICAL SAME DAY ADMISSION,PROCEDURE SITE
3,3,3,2002-02-28 23:01:00.993720,NaT,DIRECT OBSERVATION,CLINIC REFERRAL
4,4,4,2021-10-19 05:48:57.125391,NaT,ELECTIVE,PACU
5,5,5,1989-06-02 10:21:13.925163,NaT,AMBULATORY OBSERVATION,INFORMATION NOT AVAILABLE
6,6,6,2016-10-20 08:28:02.479717,NaT,DIRECT EMER.,TRANSFER FROM SKILLED NURSING FACILITY
7,7,7,1994-06-09 04:35:18.482680,NaT,EW EMER.,PHYSICIAN REFERRAL
8,8,8,2007-07-14 02:05:08.647848,NaT,EW EMER.,PACU
9,9,9,1996-03-04 20:06:20.788843,NaT,ELECTIVE,EMERGENCY ROOM


In [6]:
admissions.iloc[0,0]

0

In [7]:
omr.head()

Unnamed: 0,omr_id,adm_id,weight,bp_systolic,bp_diastolic
0,0,0,70,147,86
1,1,1,118,185,118
2,2,2,86,154,88
3,3,3,98,105,100
4,4,4,84,175,60


In [8]:
prescriptions.head()

Unnamed: 0,prescription_id,adm_id,drug
0,0,0,Nephrocaps
1,1,1,Midodrine
2,2,2,Nephrocaps
3,3,3,Ondansetron
4,4,4,OxyCODONE (Immediate Release)


In [9]:
diagnoses.head()

Unnamed: 0,diagnosis_id,adm_id,diagnosis
0,0,0,Methicillin resistant pneumonia due to Staphyl...
1,1,1,Streptococcus infection in conditions classifi...
2,2,2,Streptococcus infection in conditions classifi...
3,3,3,"Dysphagia, oropharyngeal phase"
4,4,4,Methicillin resistant pneumonia due to Staphyl...


In [10]:
rooms

Unnamed: 0,room_id,capacity
0,0,3
1,1,3
2,2,2
3,3,2
4,4,2
5,5,3
6,6,1
7,7,2
8,8,1
9,9,1


In [11]:
beds

Unnamed: 0,bed_id,room,adm_id
0,0,0,
1,1,0,7.0
2,2,1,8.0
3,3,0,9.0
4,4,2,
5,5,1,
6,6,3,
7,7,3,
8,8,4,
9,9,2,1.0


In [12]:
with open('app/data/staff.csv', 'w') as f:
    staff.to_csv(f, index=False)
with open('app/data/patients.csv', 'w') as f:
    patients.to_csv(f, index=False)
with open('app/data/admissions.csv', 'w') as f:
    admissions.to_csv(f, index=False)
with open('app/data/omr.csv', 'w') as f:
    omr.to_csv(f, index=False)
with open('app/data/prescriptions.csv', 'w') as f:
    prescriptions.to_csv(f, index=False)
with open("app/data/diagnoses.csv", "w") as f:
    diagnoses.to_csv(f, index=False)
with open("app/data/rooms.csv", "w") as f:
    rooms.to_csv(f, index=False)
with open("app/data/beds.csv", "w") as f:
    beds.to_csv(f, index=False)

In [13]:
with open("app/data/admission_types.pkl", "wb") as f:
    pickle.dump(admission_types, f)
with open("app/data/admission_locations.pkl", "wb") as f:
    pickle.dump(admission_locations, f)
with open("app/data/insurance_types.pkl", "wb") as f:
    pickle.dump(insurance_types, f)
with open("app/data/languages.pkl", "wb") as f:
    pickle.dump(languages, f)
with open("app/data/marital_statuses.pkl", "wb") as f:
    pickle.dump(marital_statuses, f)
with open("app/data/races.pkl", "wb") as f:
    pickle.dump(races, f)
with open("app/data/drugs.pkl", "wb") as f:
    pickle.dump(drugs, f)
with open("app/data/diagnoses_types.pkl", "wb") as f:
    pickle.dump(diagnoses_types, f)

In [14]:
patient_db = pd.merge(patients, admissions, how="inner", on="patient_id")
patient_db = pd.merge(patient_db, omr, how="inner", on="adm_id")
patient_db = pd.merge(patient_db, prescriptions, how="inner", on="adm_id")
patient_db = pd.merge(patient_db, diagnoses, how="inner", on="adm_id")

df_helper.drop_columns(
    patient_db,[
        'language',
        'admit_time', 
        'discharge_time',
        'omr_id',
        'prescription_id', 
        'diagnosis_id',
])
patient_db

Unnamed: 0,patient_id,name,age,gender,insurance,maritalStatus,race,adm_id,type,location,weight,bp_systolic,bp_diastolic,drug,diagnosis
0,0,Sandra Mcmahon,74,F,Medicare,WIDOWED,HISPANIC OR LATINO,0,AMBULATORY OBSERVATION,PACU,70,147,86,Nephrocaps,Methicillin resistant pneumonia due to Staphyl...
1,1,Charles Long,15,M,Medicare,WIDOWED,HISPANIC/LATINO - SALVADORAN,1,DIRECT OBSERVATION,EMERGENCY ROOM,118,185,118,Midodrine,Streptococcus infection in conditions classifi...
2,2,Lisa Campos,25,F,Medicaid,MARRIED,BLACK/CAPE VERDEAN,2,SURGICAL SAME DAY ADMISSION,PROCEDURE SITE,86,154,88,Nephrocaps,Streptococcus infection in conditions classifi...
3,3,Joanna King,54,F,Medicaid,MARRIED,WHITE,3,DIRECT OBSERVATION,CLINIC REFERRAL,98,105,100,Ondansetron,"Dysphagia, oropharyngeal phase"
4,4,Jason Perez,22,M,Other,DIVORCED,UNABLE TO OBTAIN,4,ELECTIVE,PACU,84,175,60,OxyCODONE (Immediate Release),Methicillin resistant pneumonia due to Staphyl...
5,5,Megan Weber,1,F,Medicare,MARRIED,BLACK/AFRICAN AMERICAN,5,AMBULATORY OBSERVATION,INFORMATION NOT AVAILABLE,76,141,75,Sodium Chloride,Solitary pulmonary nodule
6,6,Adam Farmer,15,M,Medicaid,SINGLE,BLACK/CAPE VERDEAN,6,DIRECT EMER.,TRANSFER FROM SKILLED NURSING FACILITY,79,128,108,Nicotine Polacrilex,"Asthma, unspecified type, unspecified"
7,7,Alexis Harris,24,F,Medicaid,SINGLE,WHITE - OTHER EUROPEAN,7,EW EMER.,PHYSICIAN REFERRAL,105,139,94,OxyCODONE (Immediate Release),Streptococcus infection in conditions classifi...
8,8,Robert Peters,61,M,Other,MARRIED,BLACK/CAPE VERDEAN,8,EW EMER.,PACU,115,153,108,Multivitamins W/minerals,"Asthma, unspecified type, unspecified"
9,9,Kayla Peters,37,F,Medicaid,SINGLE,PATIENT DECLINED TO ANSWER,9,ELECTIVE,EMERGENCY ROOM,114,165,99,OLANZapine,Streptococcus infection in conditions classifi...


In [15]:
with open("app/data/patient_db.csv", "w") as f:
    patient_db.to_csv(f, index=False)