In [1]:
import pandas as pd
import numpy as np
import pickle
from faker import Faker
import df_helper as dfh

df_helper = dfh.df_helper()

# Initialize Faker
fake = Faker()

admission_types = [
    'URGENT', 'ELECTIVE', 'EW EMER.', 'DIRECT EMER.', 'EU OBSERVATION',
    'OBSERVATION ADMIT', 'DIRECT OBSERVATION',
    'AMBULATORY OBSERVATION', 'SURGICAL SAME DAY ADMISSION'
]
admission_locations = [
    'TRANSFER FROM HOSPITAL', 'TRANSFER FROM SKILLED NURSING FACILITY',
    'INTERNAL TRANSFER TO OR FROM PSYCH', 'PHYSICIAN REFERRAL',
    'EMERGENCY ROOM', 'PACU', 'PROCEDURE SITE',
    'WALK-IN/SELF REFERRAL', 'INFORMATION NOT AVAILABLE',
    'CLINIC REFERRAL'
]
insurance_types = [
    'Medicaid', 'Medicare', 'Other'
]
languages = [
    'ENGLISH', '?'
]
marital_statuses = [
    'SINGLE', 'MARRIED', 'DIVORCED', 'WIDOWED'
]
races = [
    'BLACK/CAPE VERDEAN', 'HISPANIC/LATINO - PUERTO RICAN', 'WHITE',
    'UNKNOWN', 'OTHER', 'BLACK/AFRICAN AMERICAN',
    'HISPANIC/LATINO - SALVADORAN', 'UNABLE TO OBTAIN',
    'WHITE - OTHER EUROPEAN', 'PORTUGUESE', 'HISPANIC/LATINO - CUBAN',
    'PATIENT DECLINED TO ANSWER', 'WHITE - BRAZILIAN',
    'HISPANIC OR LATINO'
]

drugs = [
    'Midodrine', 'Multivitamins W/minerals', 'Sodium Chloride',
    'Nephrocaps', 'Nicotine Polacrilex', 'OLANZapine', 'Ondansetron',
    'OxyCODONE (Immediate Release)'
]

diagnoses_types = [
    'Urinary tract infection, site not specified',
    'Acute respiratory failure',
    'Asthma, unspecified type, unspecified',
    'Solitary pulmonary nodule',
    'Streptococcus infection in conditions classified elsewhere and of unspecified site, streptococcus, group D [Enterococcus]',
    'Dysphagia, oropharyngeal phase', 'Esophageal reflux', 'Hypoxemia',
    'Methicillin resistant pneumonia due to Staphylococcus aureus',
    'Attention deficit disorder with hyperactivity'
]

# List of sample medical supplies and equipment categories
inventory_categories = [
    'Medical Equipment', 'Pharmaceuticals', 'Surgical Tools',
    'Diagnostic Tools', 'PPE', 'Cleaning Supplies'
]

# List of sample item names
inventory_items = {
    'Medical Equipment': ['X-Ray Machine', 'Ultrasound Machine', 'MRI Scanner', 'ECG Monitor'],
    'Pharmaceuticals': ['Aspirin', 'Ibuprofen', 'Amoxicillin', 'Paracetamol'],
    'Surgical Tools': ['Scalpel', 'Forceps', 'Sutures', 'Hemostat'],
    'Diagnostic Tools': ['Thermometer', 'Blood Pressure Monitor', 'Pulse Oximeter'],
    'PPE': ['Face Masks', 'Gloves', 'Gowns', 'Face Shields'],
    'Cleaning Supplies': ['Disinfectant', 'Sanitizer', 'Alcohol Swabs', 'Wipes']
}

# Function to generate a random inventory item
def generate_inventory_item():
    category = np.random.choice(inventory_categories)
    item_name = np.random.choice(inventory_items[category])
    quantity = np.random.randint(10, 500)  # Random quantity of items
    price_per_unit = round(np.random.uniform(5, 5000), 2)  # Random price per unit
    expiry_date = fake.date_between(start_date='today', end_date='+3y')  # Random expiry date within 3 years
    return {
        'category': category,
        'item_name': item_name,
        'quantity': quantity,
        'price_per_unit($)': price_per_unit,
        'total_value($)': round(quantity * price_per_unit, 2),
        'expiry_date': expiry_date
    }

# Generate hospital inventory data
def generate_hospital_inventory(num_items):
    inventory = []
    for _ in range(num_items):
        inventory.append(generate_inventory_item())
    return pd.DataFrame(inventory)

def generate_patients(n):
    patients = []
    for i in range(n):
        gender = np.random.choice(['M', 'F'])
        patients.append({
            'patient_id': i,
            'name': fake.name_male() if gender == "M" else fake.name_female(),
            'age': np.random.randint(1, 90),
            'gender': gender,
            'insurance': np.random.choice(insurance_types),
            'language': np.random.choice(languages),
            'maritalStatus': np.random.choice(marital_statuses),
            'race': np.random.choice(races),
        })
    return pd.DataFrame(patients)

def generate_staff(n):
    staff = []
    for i in range(n):
        staff.append({
            'staff_id': i,
            'staff_name': fake.name(),
            'role': np.random.choice(['Physician', 'Nurse', 'Admin']),
            'shift_start': fake.time(),
            'shift_end': fake.time()
           
        })
    return pd.DataFrame(staff)

def generate_admissions(patients:pd.DataFrame):
    admissions = []
    for i in range(len(patients)):
        admissions.append({
            'adm_id': i,
            'patient_id': patients.iloc[i].patient_id,
            'admit_time': fake.date_time(),
            'discharge_time': None,
            'type': np.random.choice(admission_types),
            'location': np.random.choice(admission_locations),
            
        })
    return pd.DataFrame(admissions)

def generate_omr(admissions):
    table = []
    for i in range(len(admissions)):
        omr = {
            'omr_id': i,
            'adm_id': admissions.iloc[i].adm_id,
            'weight': np.random.randint(60, 120),
            ##'height': np.random.randint(150, 220)/ 10.0, #not used in modeling
            'bp_systolic': np.random.randint(80, 220),
            'bp_diastolic': np.random.randint(60, 120),
        }
        table.append(omr)
    return pd.DataFrame(table)

def generate_prescriptions(admissions):
    prescriptions = []
    for i in range(len(admissions)):
        prescriptions.append({
            'prescription_id': i,
            'adm_id': admissions.iloc[i].adm_id,
            'drug': np.random.choice(drugs),
        })
    return pd.DataFrame(prescriptions)

def generate_diagnoses(admissions:pd.DataFrame):
    diag = []
    for i in range(len(admissions)):
        diag.append({
            'diagnosis_id': i,
            'adm_id': admissions.iloc[i].adm_id,
            'diagnosis': np.random.choice(diagnoses_types),
        })
    return pd.DataFrame(diag)

def generate_rooms(n):
    rooms = []
    beds = []
    bed_taken = 0
    room_id = 0
    bed_id = 0
    for i in range(n):
        if i % 2 == 0:#create new room
            rooms.append({
                'room_id': room_id,
                'capacity': 1,
            })
            beds.append({
                "bed_id": bed_id,
                "room": room_id,
                "adm_id": None
            })
            room_id += 1
        else:
            room = np.random.choice(rooms)#add bed to existing room
            room["capacity"] += 1
            beds.append({
                "bed_id": bed_id,
                "room": room["room_id"],
                "adm_id": None
            })
        bed_id += 1

    return pd.DataFrame(rooms), pd.DataFrame(beds)

def assign_beds(admissions, beds):
    for i in range(len(admissions)):
        adm = admissions.iloc[i].adm_id
        bed = beds[beds.adm_id.isna()].sample(1).iloc[0,0]  #choose random empty bed
        beds.loc[bed, "adm_id"] = adm.astype(int)
        print(bed)
        
def match_doctors_nurses_to_patients(patients, staffs):
    doctors = staffs[staffs["role"] == "Physician"]
    nurses = staffs[staffs["role"] == "Nurse"]
    
    doctor_list = doctors[["staff_id", "staff_name"]].to_dict(orient="records")
    nurse_list = nurses[["staff_id", "staff_name"]].to_dict(orient="records")
    
    if len(doctor_list) == 0 or len(nurse_list) == 0:
        raise HTTPException(status_code=400, detail="Not enough doctors or nurses available for matching")
    
    # Shuffle the lists
    np.random.shuffle(doctor_list)
    np.random.shuffle(nurse_list)
    
    matched_assignments = []
    num_doctors = len(doctor_list)
    num_nurses = len(nurse_list)
    
    for i, patient in patients.iterrows():
        # Randomly pick a doctor and nurse
        assigned_doctor = np.random.choice(doctor_list)
        assigned_nurse = np.random.choice(nurse_list)
        
        matched_assignments.append({
            "patient_id": patient["patient_id"],
            "patient_name": patient["name"],
            "assigned_doctor": assigned_doctor["staff_name"],
            "assigned_nurse": assigned_nurse["staff_name"]
        })
    
    return matched_assignments


In [2]:
hospital_inventory = generate_hospital_inventory(50)
staffs = generate_staff(20)
patients = generate_patients(30)
admissions = generate_admissions(patients)
admissions.discharge_time = pd.to_datetime(admissions.discharge_time)
omr = generate_omr(admissions)
prescriptions = generate_prescriptions(admissions)
diagnoses = generate_diagnoses(admissions)
rooms, beds = generate_rooms(50)
assign_beds(admissions, beds)
match_doctors_nurses_to_patients(patients, staffs)

44
15
20
19
17
48
30
6
45
13
8
16
43
33
25
2
39
4
46
11
34
32
27
36
14
0
35
9
28
1


[{'patient_id': 0,
  'patient_name': 'Nicolas Nichols',
  'assigned_doctor': 'Jesse Bright',
  'assigned_nurse': 'Meghan Garrett'},
 {'patient_id': 1,
  'patient_name': 'Monica Phillips',
  'assigned_doctor': 'Victor Parker',
  'assigned_nurse': 'Jennifer Dunlap'},
 {'patient_id': 2,
  'patient_name': 'Brian Carrillo',
  'assigned_doctor': 'Valerie Reyes',
  'assigned_nurse': 'Jennifer Dunlap'},
 {'patient_id': 3,
  'patient_name': 'Mary Robinson',
  'assigned_doctor': 'Katie Wright',
  'assigned_nurse': 'Jeremy Fitzgerald'},
 {'patient_id': 4,
  'patient_name': 'Jeffrey Chung',
  'assigned_doctor': 'Victor Parker',
  'assigned_nurse': 'Alexander Moses'},
 {'patient_id': 5,
  'patient_name': 'Jacqueline Moore',
  'assigned_doctor': 'Jesse Bright',
  'assigned_nurse': 'Jared Melton DVM'},
 {'patient_id': 6,
  'patient_name': 'Eric Jordan',
  'assigned_doctor': 'Valerie Reyes',
  'assigned_nurse': 'Jennifer Dunlap'},
 {'patient_id': 7,
  'patient_name': 'Sandy Weaver',
  'assigned_doctor

In [3]:
match_dnp = pd.DataFrame(match_doctors_nurses_to_patients(patients, staffs))

# Create a dictionary to store assigned patients by doctor and nurse
assigned_patients_dict = {}

# Iterate over the matched assignments to populate the dictionary
for _, row in match_dnp.iterrows():
    doctor_name = row['assigned_doctor']
    nurse_name = row['assigned_nurse']
    patient_name = row['patient_name']
    
    if doctor_name not in assigned_patients_dict:
        assigned_patients_dict[doctor_name] = []
    if nurse_name not in assigned_patients_dict:
        assigned_patients_dict[nurse_name] = []
        
    assigned_patients_dict[doctor_name].append(patient_name)
    assigned_patients_dict[nurse_name].append(patient_name)

# Update the 'staffs' DataFrame with assigned patients
staffs['assigned_patients'] = staffs['staff_name'].map(assigned_patients_dict)
staffs.head()

Unnamed: 0,staff_id,staff_name,role,shift_start,shift_end,assigned_patients
0,0,Jesse Bright,Physician,08:53:50,12:44:36,"[Eric Jordan, Kyle Cherry, Laura Taylor]"
1,1,Alexander Moses,Nurse,07:24:26,02:58:01,"[Nicolas Nichols, Nancy Morgan, Jamie Villarre..."
2,2,Peter Smith,Admin,15:53:11,00:38:16,
3,3,William Banks,Nurse,02:47:10,13:54:18,"[Andrew Oliver, Edward Blair, Kyle Cherry]"
4,4,Valerie Reyes,Physician,14:30:48,14:09:21,"[Monica Phillips, Mary Robinson, Samantha Walk..."


In [4]:
hospital_inventory.head()

Unnamed: 0,category,item_name,quantity,price_per_unit($),total_value($),expiry_date
0,Medical Equipment,ECG Monitor,332,1616.28,536604.96,2026-12-02
1,Medical Equipment,ECG Monitor,139,1022.39,142112.21,2025-01-17
2,PPE,Face Masks,383,2045.97,783606.51,2025-10-03
3,Diagnostic Tools,Blood Pressure Monitor,479,3156.72,1512068.88,2024-11-18
4,Cleaning Supplies,Wipes,314,3334.43,1047011.02,2027-03-25


In [5]:
staffs.head()

Unnamed: 0,staff_id,staff_name,role,shift_start,shift_end,assigned_patients
0,0,Jesse Bright,Physician,08:53:50,12:44:36,"[Eric Jordan, Kyle Cherry, Laura Taylor]"
1,1,Alexander Moses,Nurse,07:24:26,02:58:01,"[Nicolas Nichols, Nancy Morgan, Jamie Villarre..."
2,2,Peter Smith,Admin,15:53:11,00:38:16,
3,3,William Banks,Nurse,02:47:10,13:54:18,"[Andrew Oliver, Edward Blair, Kyle Cherry]"
4,4,Valerie Reyes,Physician,14:30:48,14:09:21,"[Monica Phillips, Mary Robinson, Samantha Walk..."


In [6]:
patients.head()

Unnamed: 0,patient_id,name,age,gender,insurance,language,maritalStatus,race
0,0,Nicolas Nichols,88,M,Medicare,ENGLISH,WIDOWED,UNKNOWN
1,1,Monica Phillips,73,F,Medicaid,ENGLISH,DIVORCED,UNABLE TO OBTAIN
2,2,Brian Carrillo,44,M,Other,ENGLISH,MARRIED,WHITE - OTHER EUROPEAN
3,3,Mary Robinson,66,F,Other,ENGLISH,DIVORCED,WHITE - OTHER EUROPEAN
4,4,Jeffrey Chung,78,M,Medicare,ENGLISH,DIVORCED,HISPANIC/LATINO - CUBAN


In [7]:
admissions

Unnamed: 0,adm_id,patient_id,admit_time,discharge_time,type,location
0,0,0,1979-01-04 13:09:51.468943,NaT,URGENT,CLINIC REFERRAL
1,1,1,1995-08-02 14:35:45.809428,NaT,URGENT,PACU
2,2,2,1989-04-14 05:09:30.955822,NaT,AMBULATORY OBSERVATION,TRANSFER FROM SKILLED NURSING FACILITY
3,3,3,1981-01-25 02:02:29.045430,NaT,EW EMER.,EMERGENCY ROOM
4,4,4,1993-08-04 13:02:13.666809,NaT,AMBULATORY OBSERVATION,CLINIC REFERRAL
5,5,5,1974-11-06 01:33:57.607265,NaT,URGENT,TRANSFER FROM HOSPITAL
6,6,6,2008-03-22 16:56:39.534661,NaT,EW EMER.,CLINIC REFERRAL
7,7,7,2007-04-23 15:56:21.114056,NaT,AMBULATORY OBSERVATION,PHYSICIAN REFERRAL
8,8,8,1973-11-03 01:08:51.921077,NaT,DIRECT EMER.,PACU
9,9,9,2012-08-02 01:01:29.235952,NaT,AMBULATORY OBSERVATION,INFORMATION NOT AVAILABLE


In [8]:
admissions.iloc[0,0]

0

In [9]:
omr.head()

Unnamed: 0,omr_id,adm_id,weight,bp_systolic,bp_diastolic
0,0,0,113,118,70
1,1,1,109,135,92
2,2,2,65,107,72
3,3,3,114,145,83
4,4,4,98,88,75


In [10]:
prescriptions.head()

Unnamed: 0,prescription_id,adm_id,drug
0,0,0,Nephrocaps
1,1,1,Ondansetron
2,2,2,Multivitamins W/minerals
3,3,3,Midodrine
4,4,4,Multivitamins W/minerals


In [11]:
diagnoses.head()

Unnamed: 0,diagnosis_id,adm_id,diagnosis
0,0,0,Streptococcus infection in conditions classifi...
1,1,1,"Dysphagia, oropharyngeal phase"
2,2,2,Solitary pulmonary nodule
3,3,3,Solitary pulmonary nodule
4,4,4,Acute respiratory failure


In [12]:
rooms

Unnamed: 0,room_id,capacity
0,0,4
1,1,4
2,2,2
3,3,3
4,4,3
5,5,3
6,6,2
7,7,3
8,8,1
9,9,1


In [13]:
beds

Unnamed: 0,bed_id,room,adm_id
0,0,0,25.0
1,1,0,29.0
2,2,1,15.0
3,3,0,
4,4,2,17.0
5,5,2,
6,6,3,7.0
7,7,3,
8,8,4,10.0
9,9,0,27.0


In [14]:
with open('app/data/staff.csv', 'w') as f:
    staffs.to_csv(f, index=False)
with open('app/data/patients.csv', 'w') as f:
    patients.to_csv(f, index=False)
with open('app/data/admissions.csv', 'w') as f:
    admissions.to_csv(f, index=False)
with open('app/data/omr.csv', 'w') as f:
    omr.to_csv(f, index=False)
with open('app/data/prescriptions.csv', 'w') as f:
    prescriptions.to_csv(f, index=False)
with open("app/data/diagnoses.csv", "w") as f:
    diagnoses.to_csv(f, index=False)
with open("app/data/rooms.csv", "w") as f:
    rooms.to_csv(f, index=False)
with open("app/data/beds.csv", "w") as f:
    beds.to_csv(f, index=False)

In [15]:
with open("app/data/admission_types.pkl", "wb") as f:
    pickle.dump(admission_types, f)
with open("app/data/admission_locations.pkl", "wb") as f:
    pickle.dump(admission_locations, f)
with open("app/data/insurance_types.pkl", "wb") as f:
    pickle.dump(insurance_types, f)
with open("app/data/languages.pkl", "wb") as f:
    pickle.dump(languages, f)
with open("app/data/marital_statuses.pkl", "wb") as f:
    pickle.dump(marital_statuses, f)
with open("app/data/races.pkl", "wb") as f:
    pickle.dump(races, f)
with open("app/data/drugs.pkl", "wb") as f:
    pickle.dump(drugs, f)
with open("app/data/diagnoses_types.pkl", "wb") as f:
    pickle.dump(diagnoses_types, f)

In [16]:
patient_db = pd.merge(patients, admissions, how="inner", on="patient_id")
patient_db = pd.merge(patient_db, omr, how="inner", on="adm_id")
patient_db = pd.merge(patient_db, prescriptions, how="inner", on="adm_id")
patient_db = pd.merge(patient_db, diagnoses, how="inner", on="adm_id")

df_helper.drop_columns(
    patient_db,[
        'language',
        'admit_time', 
        'discharge_time',
        'omr_id',
        'prescription_id', 
        'diagnosis_id',
])
patient_db

Unnamed: 0,patient_id,name,age,gender,insurance,maritalStatus,race,adm_id,type,location,weight,bp_systolic,bp_diastolic,drug,diagnosis
0,0,Nicolas Nichols,88,M,Medicare,WIDOWED,UNKNOWN,0,URGENT,CLINIC REFERRAL,113,118,70,Nephrocaps,Streptococcus infection in conditions classifi...
1,1,Monica Phillips,73,F,Medicaid,DIVORCED,UNABLE TO OBTAIN,1,URGENT,PACU,109,135,92,Ondansetron,"Dysphagia, oropharyngeal phase"
2,2,Brian Carrillo,44,M,Other,MARRIED,WHITE - OTHER EUROPEAN,2,AMBULATORY OBSERVATION,TRANSFER FROM SKILLED NURSING FACILITY,65,107,72,Multivitamins W/minerals,Solitary pulmonary nodule
3,3,Mary Robinson,66,F,Other,DIVORCED,WHITE - OTHER EUROPEAN,3,EW EMER.,EMERGENCY ROOM,114,145,83,Midodrine,Solitary pulmonary nodule
4,4,Jeffrey Chung,78,M,Medicare,DIVORCED,HISPANIC/LATINO - CUBAN,4,AMBULATORY OBSERVATION,CLINIC REFERRAL,98,88,75,Multivitamins W/minerals,Acute respiratory failure
5,5,Jacqueline Moore,52,F,Medicaid,DIVORCED,BLACK/AFRICAN AMERICAN,5,URGENT,TRANSFER FROM HOSPITAL,108,161,84,Ondansetron,"Urinary tract infection, site not specified"
6,6,Eric Jordan,25,M,Other,WIDOWED,WHITE - BRAZILIAN,6,EW EMER.,CLINIC REFERRAL,88,191,61,Nicotine Polacrilex,Solitary pulmonary nodule
7,7,Sandy Weaver,86,F,Other,SINGLE,PATIENT DECLINED TO ANSWER,7,AMBULATORY OBSERVATION,PHYSICIAN REFERRAL,117,161,82,OxyCODONE (Immediate Release),Esophageal reflux
8,8,Tammy Watson,89,F,Other,WIDOWED,HISPANIC OR LATINO,8,DIRECT EMER.,PACU,111,108,91,Nephrocaps,"Asthma, unspecified type, unspecified"
9,9,Nancy Morgan,28,F,Medicare,MARRIED,PORTUGUESE,9,AMBULATORY OBSERVATION,INFORMATION NOT AVAILABLE,108,97,106,Sodium Chloride,Solitary pulmonary nodule


In [17]:
with open("app/data/patient_db.csv", "w") as f:
    patient_db.to_csv(f, index=False)
with open("app/data/hospital_inventory.csv", "w") as f:
    hospital_inventory.to_csv(f, index=False)