In [1]:
import pandas as pd
import numpy as np
import pickle
from faker import Faker
import df_helper as dfh

df_helper = dfh.df_helper()

# Initialize Faker
fake = Faker()

admission_types = [
    'URGENT', 'ELECTIVE', 'EW EMER.', 'DIRECT EMER.', 'EU OBSERVATION',
    'OBSERVATION ADMIT', 'DIRECT OBSERVATION',
    'AMBULATORY OBSERVATION', 'SURGICAL SAME DAY ADMISSION'
]
admission_locations = [
    'TRANSFER FROM HOSPITAL', 'TRANSFER FROM SKILLED NURSING FACILITY',
    'INTERNAL TRANSFER TO OR FROM PSYCH', 'PHYSICIAN REFERRAL',
    'EMERGENCY ROOM', 'PACU', 'PROCEDURE SITE',
    'WALK-IN/SELF REFERRAL', 'INFORMATION NOT AVAILABLE',
    'CLINIC REFERRAL'
]
insurance_types = [
    'Medicaid', 'Medicare', 'Other'
]
languages = [
    'ENGLISH', '?'
]
marital_statuses = [
    'SINGLE', 'MARRIED', 'DIVORCED', 'WIDOWED'
]
races = [
    'BLACK/CAPE VERDEAN', 'HISPANIC/LATINO - PUERTO RICAN', 'WHITE',
    'UNKNOWN', 'OTHER', 'BLACK/AFRICAN AMERICAN',
    'HISPANIC/LATINO - SALVADORAN', 'UNABLE TO OBTAIN',
    'WHITE - OTHER EUROPEAN', 'PORTUGUESE', 'HISPANIC/LATINO - CUBAN',
    'PATIENT DECLINED TO ANSWER', 'WHITE - BRAZILIAN',
    'HISPANIC OR LATINO'
]

drugs = [
    'Midodrine', 'Multivitamins W/minerals', 'Sodium Chloride',
    'Nephrocaps', 'Nicotine Polacrilex', 'OLANZapine', 'Ondansetron',
    'OxyCODONE (Immediate Release)'
]

diagnoses_types = [
    'Urinary tract infection, site not specified',
    'Acute respiratory failure',
    'Asthma, unspecified type, unspecified',
    'Solitary pulmonary nodule',
    'Streptococcus infection in conditions classified elsewhere and of unspecified site, streptococcus, group D [Enterococcus]',
    'Dysphagia, oropharyngeal phase', 'Esophageal reflux', 'Hypoxemia',
    'Methicillin resistant pneumonia due to Staphylococcus aureus',
    'Attention deficit disorder with hyperactivity'
]

# List of sample medical supplies and equipment categories
inventory_categories = [
    'Medical Equipment', 'Pharmaceuticals', 'Surgical Tools',
    'Diagnostic Tools', 'PPE', 'Cleaning Supplies'
]

# List of sample item names
inventory_items = {
    'Medical Equipment': ['X-Ray Machine', 'Ultrasound Machine', 'MRI Scanner', 'ECG Monitor'],
    'Pharmaceuticals': ['Aspirin', 'Ibuprofen', 'Amoxicillin', 'Paracetamol'],
    'Surgical Tools': ['Scalpel', 'Forceps', 'Sutures', 'Hemostat'],
    'Diagnostic Tools': ['Thermometer', 'Blood Pressure Monitor', 'Pulse Oximeter'],
    'PPE': ['Face Masks', 'Gloves', 'Gowns', 'Face Shields'],
    'Cleaning Supplies': ['Disinfectant', 'Sanitizer', 'Alcohol Swabs', 'Wipes']
}

# Function to generate a random inventory item
def generate_inventory_item():
    category = np.random.choice(inventory_categories)
    item_name = np.random.choice(inventory_items[category])
    quantity = np.random.randint(10, 500)  # Random quantity of items
    price_per_unit = round(np.random.uniform(5, 5000), 2)  # Random price per unit
    expiry_date = fake.date_between(start_date='today', end_date='+3y')  # Random expiry date within 3 years
    return {
        'Category': category,
        'Item Name': item_name,
        'Quantity': quantity,
        'Price per Unit ($)': price_per_unit,
        'Total Value ($)': round(quantity * price_per_unit, 2),
        'Expiry Date': expiry_date
    }

# Generate hospital inventory data
def generate_hospital_inventory(num_items):
    inventory = []
    for _ in range(num_items):
        inventory.append(generate_inventory_item())
    return pd.DataFrame(inventory)

def generate_patients(n):
    patients = []
    for i in range(n):
        gender = np.random.choice(['M', 'F'])
        patients.append({
            'patient_id': i,
            'name': fake.name_male() if gender == "M" else fake.name_female(),
            'age': np.random.randint(1, 90),
            'gender': gender,
            'insurance': np.random.choice(insurance_types),
            'language': np.random.choice(languages),
            'maritalStatus': np.random.choice(marital_statuses),
            'race': np.random.choice(races),
        })
    return pd.DataFrame(patients)

def generate_staff(n):
    staff = []
    for i in range(n):
        staff.append({
            'staff_id': i,
            'staff_name': fake.name(),
            'role': np.random.choice(['Physician', 'Nurse', 'Admin']),
            'shift_start': fake.time(),
            'shift_end': fake.time()
        })
    return pd.DataFrame(staff)

def generate_admissions(patients:pd.DataFrame):
    admissions = []
    for i in range(len(patients)):
        admissions.append({
            'adm_id': i,
            'patient_id': patients.iloc[i].patient_id,
            'admit_time': fake.date_time(),
            'discharge_time': None,
            'type': np.random.choice(admission_types),
            'location': np.random.choice(admission_locations),
            
        })
    return pd.DataFrame(admissions)

def generate_omr(admissions):
    table = []
    for i in range(len(admissions)):
        omr = {
            'omr_id': i,
            'adm_id': admissions.iloc[i].adm_id,
            'weight': np.random.randint(60, 120),
            ##'height': np.random.randint(150, 220)/ 10.0, #not used in modeling
            'bp_systolic': np.random.randint(80, 220),
            'bp_diastolic': np.random.randint(60, 120),
        }
        table.append(omr)
    return pd.DataFrame(table)

def generate_prescriptions(admissions):
    prescriptions = []
    for i in range(len(admissions)):
        prescriptions.append({
            'prescription_id': i,
            'adm_id': admissions.iloc[i].adm_id,
            'drug': np.random.choice(drugs),
        })
    return pd.DataFrame(prescriptions)

def generate_diagnoses(admissions:pd.DataFrame):
    diag = []
    for i in range(len(admissions)):
        diag.append({
            'diagnosis_id': i,
            'adm_id': admissions.iloc[i].adm_id,
            'diagnosis': np.random.choice(diagnoses_types),
        })
    return pd.DataFrame(diag)

def generate_rooms(n):
    rooms = []
    beds = []
    bed_taken = 0
    room_id = 0
    bed_id = 0
    for i in range(n):
        if i % 2 == 0:#create new room
            rooms.append({
                'room_id': room_id,
                'capacity': 1,
            })
            beds.append({
                "bed_id": bed_id,
                "room": room_id,
                "adm_id": None
            })
            room_id += 1
        else:
            room = np.random.choice(rooms)#add bed to existing room
            room["capacity"] += 1
            beds.append({
                "bed_id": bed_id,
                "room": room["room_id"],
                "adm_id": None
            })
        bed_id += 1

    return pd.DataFrame(rooms), pd.DataFrame(beds)

def assign_beds(admissions, beds):
    for i in range(len(admissions)):
        adm = admissions.iloc[i].adm_id
        bed = beds[beds.adm_id.isna()].sample(1).iloc[0,0]  #choose random empty bed
        beds.loc[bed, "adm_id"] = adm.astype(int)
        print(bed)

In [2]:
hospital_inventory = generate_hospital_inventory(50)
staff = generate_staff(20)
patients = generate_patients(30)
admissions = generate_admissions(patients)
admissions.discharge_time = pd.to_datetime(admissions.discharge_time)
omr = generate_omr(admissions)
prescriptions = generate_prescriptions(admissions)
diagnoses = generate_diagnoses(admissions)
rooms, beds = generate_rooms(50)
assign_beds(admissions, beds)

18
4
20
48
24
11
19
3
13
0
8
10
29
14
49
42
6
39
9
27
41
7
45
5
1
17
35
32
12
26


In [3]:
hospital_inventory.head()

Unnamed: 0,Category,Item Name,Quantity,Price per Unit ($),Total Value ($),Expiry Date
0,Diagnostic Tools,Pulse Oximeter,23,848.34,19511.82,2026-08-11
1,Diagnostic Tools,Pulse Oximeter,384,2288.86,878922.24,2025-01-19
2,Medical Equipment,MRI Scanner,430,3585.18,1541627.4,2026-07-27
3,Medical Equipment,MRI Scanner,85,974.52,82834.2,2026-08-13
4,PPE,Gloves,362,3892.39,1409045.18,2025-06-04


In [4]:
staff.head()

Unnamed: 0,staff_id,staff_name,role,shift_start,shift_end
0,0,Cathy Anderson,Admin,05:15:29,22:01:47
1,1,Tiffany Strickland,Admin,03:40:41,15:06:58
2,2,Melanie Miller,Physician,00:30:45,05:04:32
3,3,Christopher Greene,Physician,05:04:56,22:43:50
4,4,Justin Reeves,Physician,01:56:10,02:25:59


In [5]:
patients.head()

Unnamed: 0,patient_id,name,age,gender,insurance,language,maritalStatus,race
0,0,John Rodriguez,79,M,Other,ENGLISH,SINGLE,UNKNOWN
1,1,Stephanie Franklin,1,F,Medicare,ENGLISH,DIVORCED,HISPANIC/LATINO - PUERTO RICAN
2,2,Michael Santos,38,M,Medicare,ENGLISH,DIVORCED,HISPANIC/LATINO - CUBAN
3,3,Charles Hamilton,88,M,Medicaid,ENGLISH,MARRIED,BLACK/AFRICAN AMERICAN
4,4,Joseph Smith,74,M,Medicare,ENGLISH,WIDOWED,HISPANIC OR LATINO


In [6]:
admissions

Unnamed: 0,adm_id,patient_id,admit_time,discharge_time,type,location
0,0,0,1993-02-21 23:22:13.308502,NaT,EU OBSERVATION,WALK-IN/SELF REFERRAL
1,1,1,1979-03-12 13:54:23.673450,NaT,URGENT,TRANSFER FROM SKILLED NURSING FACILITY
2,2,2,1986-06-29 01:23:12.099928,NaT,EW EMER.,PROCEDURE SITE
3,3,3,2016-01-02 15:29:14.504039,NaT,URGENT,PHYSICIAN REFERRAL
4,4,4,2011-12-24 18:11:30.717533,NaT,EW EMER.,INTERNAL TRANSFER TO OR FROM PSYCH
5,5,5,1973-04-06 05:45:29.855749,NaT,ELECTIVE,CLINIC REFERRAL
6,6,6,1974-02-21 17:43:43.440703,NaT,SURGICAL SAME DAY ADMISSION,EMERGENCY ROOM
7,7,7,1975-07-26 19:25:10.995304,NaT,AMBULATORY OBSERVATION,CLINIC REFERRAL
8,8,8,2023-07-21 01:32:11.878057,NaT,AMBULATORY OBSERVATION,EMERGENCY ROOM
9,9,9,1975-06-13 18:34:17.660378,NaT,URGENT,CLINIC REFERRAL


In [7]:
admissions.iloc[0,0]

0

In [8]:
omr.head()

Unnamed: 0,omr_id,adm_id,weight,bp_systolic,bp_diastolic
0,0,0,84,177,78
1,1,1,100,210,119
2,2,2,67,190,63
3,3,3,109,123,118
4,4,4,108,116,112


In [9]:
prescriptions.head()

Unnamed: 0,prescription_id,adm_id,drug
0,0,0,Ondansetron
1,1,1,Midodrine
2,2,2,Ondansetron
3,3,3,OLANZapine
4,4,4,Midodrine


In [10]:
diagnoses.head()

Unnamed: 0,diagnosis_id,adm_id,diagnosis
0,0,0,Acute respiratory failure
1,1,1,"Asthma, unspecified type, unspecified"
2,2,2,Esophageal reflux
3,3,3,"Dysphagia, oropharyngeal phase"
4,4,4,Solitary pulmonary nodule


In [11]:
rooms

Unnamed: 0,room_id,capacity
0,0,5
1,1,4
2,2,4
3,3,2
4,4,3
5,5,2
6,6,1
7,7,3
8,8,2
9,9,1


In [12]:
beds

Unnamed: 0,bed_id,room,adm_id
0,0,0,9.0
1,1,0,24.0
2,2,1,
3,3,0,7.0
4,4,2,1.0
5,5,1,23.0
6,6,3,16.0
7,7,0,21.0
8,8,4,10.0
9,9,2,18.0


In [13]:
with open('app/data/staff.csv', 'w') as f:
    staff.to_csv(f, index=False)
with open('app/data/patients.csv', 'w') as f:
    patients.to_csv(f, index=False)
with open('app/data/admissions.csv', 'w') as f:
    admissions.to_csv(f, index=False)
with open('app/data/omr.csv', 'w') as f:
    omr.to_csv(f, index=False)
with open('app/data/prescriptions.csv', 'w') as f:
    prescriptions.to_csv(f, index=False)
with open("app/data/diagnoses.csv", "w") as f:
    diagnoses.to_csv(f, index=False)
with open("app/data/rooms.csv", "w") as f:
    rooms.to_csv(f, index=False)
with open("app/data/beds.csv", "w") as f:
    beds.to_csv(f, index=False)

In [14]:
with open("app/data/admission_types.pkl", "wb") as f:
    pickle.dump(admission_types, f)
with open("app/data/admission_locations.pkl", "wb") as f:
    pickle.dump(admission_locations, f)
with open("app/data/insurance_types.pkl", "wb") as f:
    pickle.dump(insurance_types, f)
with open("app/data/languages.pkl", "wb") as f:
    pickle.dump(languages, f)
with open("app/data/marital_statuses.pkl", "wb") as f:
    pickle.dump(marital_statuses, f)
with open("app/data/races.pkl", "wb") as f:
    pickle.dump(races, f)
with open("app/data/drugs.pkl", "wb") as f:
    pickle.dump(drugs, f)
with open("app/data/diagnoses_types.pkl", "wb") as f:
    pickle.dump(diagnoses_types, f)

In [15]:
patient_db = pd.merge(patients, admissions, how="inner", on="patient_id")
patient_db = pd.merge(patient_db, omr, how="inner", on="adm_id")
patient_db = pd.merge(patient_db, prescriptions, how="inner", on="adm_id")
patient_db = pd.merge(patient_db, diagnoses, how="inner", on="adm_id")

df_helper.drop_columns(
    patient_db,[
        'language',
        'admit_time', 
        'discharge_time',
        'omr_id',
        'prescription_id', 
        'diagnosis_id',
])
patient_db

Unnamed: 0,patient_id,name,age,gender,insurance,maritalStatus,race,adm_id,type,location,weight,bp_systolic,bp_diastolic,drug,diagnosis
0,0,John Rodriguez,79,M,Other,SINGLE,UNKNOWN,0,EU OBSERVATION,WALK-IN/SELF REFERRAL,84,177,78,Ondansetron,Acute respiratory failure
1,1,Stephanie Franklin,1,F,Medicare,DIVORCED,HISPANIC/LATINO - PUERTO RICAN,1,URGENT,TRANSFER FROM SKILLED NURSING FACILITY,100,210,119,Midodrine,"Asthma, unspecified type, unspecified"
2,2,Michael Santos,38,M,Medicare,DIVORCED,HISPANIC/LATINO - CUBAN,2,EW EMER.,PROCEDURE SITE,67,190,63,Ondansetron,Esophageal reflux
3,3,Charles Hamilton,88,M,Medicaid,MARRIED,BLACK/AFRICAN AMERICAN,3,URGENT,PHYSICIAN REFERRAL,109,123,118,OLANZapine,"Dysphagia, oropharyngeal phase"
4,4,Joseph Smith,74,M,Medicare,WIDOWED,HISPANIC OR LATINO,4,EW EMER.,INTERNAL TRANSFER TO OR FROM PSYCH,108,116,112,Midodrine,Solitary pulmonary nodule
5,5,Janet Buck,12,F,Medicare,WIDOWED,BLACK/CAPE VERDEAN,5,ELECTIVE,CLINIC REFERRAL,101,187,91,OLANZapine,Hypoxemia
6,6,Darius Murphy,18,M,Other,MARRIED,WHITE,6,SURGICAL SAME DAY ADMISSION,EMERGENCY ROOM,72,91,97,Ondansetron,"Asthma, unspecified type, unspecified"
7,7,Steven Parker,82,M,Medicaid,MARRIED,WHITE,7,AMBULATORY OBSERVATION,CLINIC REFERRAL,89,209,71,OLANZapine,"Asthma, unspecified type, unspecified"
8,8,Christopher Ali,50,M,Other,SINGLE,PORTUGUESE,8,AMBULATORY OBSERVATION,EMERGENCY ROOM,74,104,73,Nicotine Polacrilex,"Dysphagia, oropharyngeal phase"
9,9,Michael Lee,6,M,Medicaid,WIDOWED,UNKNOWN,9,URGENT,CLINIC REFERRAL,71,209,118,OLANZapine,Streptococcus infection in conditions classifi...


In [17]:
with open("app/data/patient_db.csv", "w") as f:
    patient_db.to_csv(f, index=False)
with open("app/data/hospital_inventory.csv", "w") as f:
    hospital_inventory.to_csv(f, index=False)