In [4]:
import pandas as pd
from sqlalchemy import create_engine

### AHA Data

In [5]:
# core + capabilities
keep = [
    'ID',        # hospital_id
    'MNAME',     # name
    'MLOCADDR',  # address
    'MLOCCITY',  # city
    'MSTATE',    # state
    'MLOCZIP',   # zip
    'LAT',       # latitude
    'LONG',      # longitude
    'EMDEPHOS',  # ED present?
    'TRAUMHOS',  # trauma center?
    'TRAUML90',  # trauma level
    'HOSPBD',    # total beds
    'YEAR',      # year 
    # new capability columns:
    'CTSCNHOS',  # CT scanners
    'MRIHOS',    # MRI units
    'PETCTHOS',  # PET/CT units
    'ULTSNHOS',  # ultrasound units
    'BRNBD',     # burn care beds
    'MSICBD',    # med/surg ICU beds
    'NICBD',     # neonatal ICU beds
    'PEDICBD'    # pediatric ICU beds
]

In [6]:
# Load the CSV file into a pandas DataFrame
aha_df = pd.read_csv('data/raw/albert_aha.csv', usecols=keep, encoding='latin1')

# Only keep year 2023
aha_df = aha_df[aha_df['YEAR'] == 2023]

# Rename columns to best-practice snake_case
aha_df = aha_df.rename(columns={
    'ID':                 'hospital_id',
    'MNAME':              'name',
    'MLOCADDR':           'address',
    'MLOCCITY':           'city',
    'MSTATE':             'state',
    'MLOCZIP':            'zip_code',
    'LAT':                'latitude',
    'LONG':               'longitude',
    'EMDEPHOS':           'has_ed',
    'TRAUMHOS':           'is_trauma_center',
    'TRAUML90':           'trauma_level',
    'HOSPBD':             'total_beds',
    'YEAR':               'year',
    'CTSCNHOS':           'has_ct',
    'MRIHOS':             'has_mri',
    'PETCTHOS':           'has_pet_ct',
    'ULTSNHOS':           'has_ultrasound',
    'BRNBD':              'burn_care_beds',
    'MSICBD':             'icu_med_surg_beds',
    'NICBD':              'icu_neonatal_beds',
    'PEDICBD':            'icu_pediatric_beds'
})

display(aha_df.head())

# display 10 random rows
display(aha_df.sample(10))

print(aha_df.shape)

  aha_df = pd.read_csv('data/raw/albert_aha.csv', usecols=keep, encoding='latin1')


Unnamed: 0,hospital_id,name,year,address,city,icu_med_surg_beds,icu_neonatal_beds,icu_pediatric_beds,burn_care_beds,total_beds,...,is_trauma_center,trauma_level,has_ct,has_mri,has_pet_ct,has_ultrasound,state,latitude,longitude,zip_code
2,6030010,Kwajalein Hospital,2023,U S Army Kwajalein Atoll,Kwajalein Atoll,,,,,14.0,...,,,,,,,MH,8.7167,167.7333,96555
5,6040001,Wilma N. Vazquez Medical Center,2023,"KM 39 1/2 Road 2, Call Box 7001",Vega Baja,0.0,0.0,0.0,0.0,68.0,...,0.0,,0.0,0.0,0.0,0.0,PR,18.4533,-66.4069,00694
8,6040002,Hospital San Francisco,2023,371 Avenida De Diego,San Juan,,,,,133.0,...,,,,,,,PR,18.3986,-66.0389,00923-1711
11,6040004,HIMA San Pablo Caguas,2023,Avenida Munoz Marin,Caguas,,,,,415.0,...,,,,,,,PR,18.2381,-66.0372,00726
14,6040005,Hospital Buen Samaritano,2023,Carr #2 Km 141-1 Ave Severiano Cuevas,Aguadilla,,,,,145.0,...,,,,,,,PR,18.4553,-67.1319,00603


Unnamed: 0,hospital_id,name,year,address,city,icu_med_surg_beds,icu_neonatal_beds,icu_pediatric_beds,burn_care_beds,total_beds,...,is_trauma_center,trauma_level,has_ct,has_mri,has_pet_ct,has_ultrasound,state,latitude,longitude,zip_code
13040,6730705,Ascension St. John Nowata,2023,237 South Locust Street,Nowata,,,,,14.0,...,,,,,,,OK,36.6965,-95.6301,74048-3660
4873,6390298,South Florida State Hospital,2023,800 East Cypress Drive,Hollywood,,,,,355.0,...,,,,,,,FL,26.0014,-80.2498,33025-4543
5234,6391103,AdventHealth Tampa,2023,3100 East Fletcher Avenue,Tampa,60.0,28.0,5.0,0.0,626.0,...,0.0,,1.0,1.0,1.0,1.0,FL,28.0697,-82.422,33613-4688
14342,6741219,Carrus Behavioral Hospital,2023,"1724 West U.S. Highway 82, Suite 200",Sherman,0.0,0.0,0.0,0.0,28.0,...,0.0,,0.0,0.0,0.0,0.0,TX,33.6521,-96.7003,75092-7037
13272,6740077,Premier Specialty Hospital of El Paso,2023,"2311 North Oregon Street, 5th Floor",El Paso,,,,,32.0,...,,,,,,,TX,31.7739,-106.502,79902-3216
12081,6710520,Jefferson Regional,2023,1600 West 40th Avenue,Pine Bluff,19.0,8.0,0.0,0.0,258.0,...,1.0,3.0,1.0,1.0,1.0,1.0,AR,34.1881,-92.0177,71603-6301
2133,6230590,Doylestown Health,2023,595 West State Street,Doylestown,32.0,0.0,0.0,0.0,247.0,...,0.0,,1.0,1.0,0.0,1.0,PA,40.3056,-75.1465,18901-2597
2948,6340233,Sheltering Arms Institute,2023,"13700 St. Francis Boulevard, Suite 400",Midlothian,,,,,28.0,...,,,,,,,VA,37.4666,-77.6586,23114-3222
14089,6741018,Methodist Dallas Medical Center,2023,1441 North Beckley Avenue,Dallas,36.0,50.0,0.0,0.0,375.0,...,1.0,1.0,1.0,1.0,0.0,1.0,TX,32.7604,-96.8258,75203-1201
295,6110390,MaineHealth Stephens Hospital,2023,181 Main Street,Norway,0.0,0.0,0.0,0.0,25.0,...,0.0,,1.0,1.0,0.0,1.0,ME,44.2099,-70.5323,04268-5664


(6166, 21)


In [7]:
# get number of row with no NaN values
print(aha_df.isna().sum())

hospital_id              0
name                     0
year                     0
address                  0
city                     0
icu_med_surg_beds     2430
icu_neonatal_beds     2430
icu_pediatric_beds    2430
burn_care_beds        2430
total_beds               0
has_ed                2430
is_trauma_center      2430
trauma_level          4630
has_ct                2430
has_mri               2430
has_pet_ct            2430
has_ultrasound        2430
state                    0
latitude                 0
longitude                0
zip_code                 0
dtype: int64


In [8]:
print(aha_df.columns)

Index(['hospital_id', 'name', 'year', 'address', 'city', 'icu_med_surg_beds',
       'icu_neonatal_beds', 'icu_pediatric_beds', 'burn_care_beds',
       'total_beds', 'has_ed', 'is_trauma_center', 'trauma_level', 'has_ct',
       'has_mri', 'has_pet_ct', 'has_ultrasound', 'state', 'latitude',
       'longitude', 'zip_code'],
      dtype='object')


In [9]:
# display the rows where city is pasadena
display(aha_df[aha_df['city'] == 'Pasadena'])

Unnamed: 0,hospital_id,name,year,address,city,icu_med_surg_beds,icu_neonatal_beds,icu_pediatric_beds,burn_care_beds,total_beds,...,is_trauma_center,trauma_level,has_ct,has_mri,has_pet_ct,has_ultrasound,state,latitude,longitude,zip_code
13435,6740196,Surgery Specialty Hospitals of America,2023,4301B Vista Road,Pasadena,0.0,0.0,0.0,0.0,10.0,...,0.0,,1.0,1.0,0.0,1.0,TX,29.6591,-95.1779,77504
13735,6740402,St. Luke's Health - Patients Medical Center,2023,4600 East Sam Houston Parkway South,Pasadena,8.0,0.0,0.0,0.0,61.0,...,0.0,,1.0,1.0,0.0,1.0,TX,29.6414,-95.1621,77505-3948
14069,6741002,Oceans Behavioral Hospital of Pasadena,2023,4001 Preston Drive,Pasadena,,,,,22.0,...,,,,,,,TX,29.6509,-95.17,77505-2069
14736,6742778,HCA Houston Healthcare Southeast,2023,4000 Spencer Highway,Pasadena,14.0,14.0,0.0,0.0,278.0,...,1.0,3.0,1.0,1.0,0.0,1.0,TX,29.6612,-95.1838,77504-1202
17972,6932350,Huntington Health,2023,100 West California Boulevard,Pasadena,24.0,27.0,0.0,0.0,366.0,...,1.0,2.0,1.0,1.0,0.0,1.0,CA,34.1336,-118.153,91105-3097
17975,6932360,Las Encinas Hospital,2023,2900 East Del Mar Boulevard,Pasadena,,,,,118.0,...,,,,,,,CA,34.1417,-118.091,91107-4399


In [10]:
# Generating synthetic data for hospital resource load
import numpy as np

# 1. Ensure reproducibility
np.random.seed(42)

# 2. Specify which bed types to simulate load for
bed_cols = [
    'total_beds',
    'icu_med_surg_beds',
    'icu_neonatal_beds',
    'icu_pediatric_beds',
    'burn_care_beds'
]

# 3. Generate synthetic occupancy data
for col in bed_cols:
    # fill na with 0
    aha_df[col] = aha_df[col].fillna(0)
    # generate random percentage between 20% and 95%
    pct = np.clip(np.random.normal(loc=0.6, scale=0.15, size=len(aha_df)), 0.2, 0.95)
    # Create a new column with the calculated load
    load = (aha_df[col] * pct).round()
    aha_df[f'{col}_load'] = load.astype(int)

In [11]:
# show column where icu_neonatal_beds_load is greater than 0
display(aha_df[aha_df['icu_neonatal_beds_load'] > 0])

Unnamed: 0,hospital_id,name,year,address,city,icu_med_surg_beds,icu_neonatal_beds,icu_pediatric_beds,burn_care_beds,total_beds,...,has_ultrasound,state,latitude,longitude,zip_code,total_beds_load,icu_med_surg_beds_load,icu_neonatal_beds_load,icu_pediatric_beds_load,burn_care_beds_load
32,6040012,Hospital Menonita Ponce,2023,506 Carr Road,Coto Laurel,9.0,4.0,0.0,0.0,135.0,...,1.0,PR,18.0536,-66.5629,00780,72,5,2,0,0
101,6040250,Hospital De Damas,2023,2213 Ponce Bypass,Ponce,14.0,7.0,0.0,0.0,201.0,...,1.0,PR,17.9970,-66.6180,00717,89,8,2,0,0
119,6040345,Hospital De La Concepcion,2023,"Carr 2, Km 173, Bo Cain Alto",San German,3.0,12.0,0.0,0.0,217.0,...,1.0,PR,18.0926,-67.0404,00683-3920,137,3,7,0,0
235,6110050,Northern Light Eastern Maine Medical Center,2023,489 State Street,Bangor,26.0,29.0,6.0,0.0,361.0,...,1.0,ME,44.8074,-68.7526,04401-6674,222,13,24,3,0
373,6120170,Dartmouth-Hitchcock Medical Center,2023,1 Medical Center Drive,Lebanon,38.0,30.0,8.0,0.0,438.0,...,1.0,NH,43.6760,-72.2730,03756-1000,301,21,22,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18407,6940010,Alaska Native Medical Center,2023,4315 Diplomacy Drive,Anchorage,18.0,12.0,0.0,0.0,189.0,...,0.0,AK,61.1827,-149.8010,99508-5926,84,10,8,0,0
18413,6940020,Providence Alaska Medical Center,2023,3200 Providence Drive,Anchorage,28.0,66.0,9.0,0.0,401.0,...,1.0,AK,61.1884,-149.8190,99508-4615,190,20,32,4,0
18428,6940058,Fairbanks Memorial Hospital,2023,1650 Cowles Street,Fairbanks,7.0,7.0,0.0,0.0,217.0,...,1.0,AK,64.8312,-147.7400,99701-5998,105,4,4,0,0
18524,6950330,Kaiser Permanente Medical Center,2023,3288 Moanalua Road,Honolulu,15.0,20.0,2.0,0.0,215.0,...,1.0,HI,21.3633,-157.9000,96819-1469,152,10,6,1,0


In [13]:
# push to MySQL
engine = create_engine("mysql+pymysql://root:pass@localhost:3306/hospitals")
aha_df.to_sql('aha_hospitals', engine, if_exists='replace', index=False)

6166