In [1]:
import pandas as pd
from sqlalchemy import create_engine

### AHA Data

In [2]:
# core + capabilities
keep = [
    'ID',        # hospital_id
    'MNAME',     # name
    'MLOCADDR',  # address
    'MLOCCITY',  # city
    'MSTATE',    # state
    'MLOCZIP',   # zip
    'LAT',       # latitude
    'LONG',      # longitude
    'EMDEPHOS',  # ED present?
    'TRAUMHOS',  # trauma center?
    'TRAUML90',  # trauma level
    'HOSPBD',    # total beds
    'YEAR',      # year 
    # new capability columns:
    'CTSCNHOS',  # CT scanners
    'MSCTHOS',   # multislice CT <64
    'MSCTGHOS',  # multislice CT ≥64
    'MRIHOS',    # MRI units
    'PETCTHOS',  # PET/CT units
    'SPECTHOS',  # SPECT units
    'ULTSNHOS',  # ultrasound units
    'BRNBD',     # burn care beds
    'MSICBD',    # med/surg ICU beds
    'NICBD',     # neonatal ICU beds
    'PEDICBD'    # pediatric ICU beds
]

In [3]:
# Load the CSV file into a pandas DataFrame
aha_df = pd.read_csv('data/raw/albert_aha.csv', usecols=keep, encoding='latin1')

# Only keep year 2023
aha_df = aha_df[aha_df['YEAR'] == 2023]

# Rename columns to best-practice snake_case
aha_df = aha_df.rename(columns={
    'ID':                 'hospital_id',
    'MNAME':              'name',
    'MLOCADDR':           'address',
    'MLOCCITY':           'city',
    'MSTATE':             'state',
    'MLOCZIP':            'zip_code',
    'LAT':                'latitude',
    'LONG':               'longitude',
    'EMDEPHOS':           'has_ed',
    'TRAUMHOS':           'is_trauma_center',
    'TRAUML90':           'trauma_level',
    'HOSPBD':             'total_beds',
    'YEAR':               'year',
    'CTSCNHOS':           'ct_scanners',
    'MSCTHOS':            'ct_multislice_lt64',
    'MSCTGHOS':           'ct_multislice_gte64',
    'MRIHOS':             'mri_units',
    'PETCTHOS':           'pet_ct_units',
    'SPECTHOS':           'spect_units',
    'ULTSNHOS':           'ultrasound_units',
    'BRNBD':              'burn_care_beds',
    'MSICBD':             'icu_med_surg_beds',
    'NICBD':              'icu_neonatal_beds',
    'PEDICBD':            'icu_pediatric_beds'
})

display(aha_df.head())

# display 10 random rows
display(aha_df.sample(10))

print(aha_df.shape)

  aha_df = pd.read_csv('data/raw/albert_aha.csv', usecols=keep, encoding='latin1')


Unnamed: 0,hospital_id,name,year,address,city,icu_med_surg_beds,icu_neonatal_beds,icu_pediatric_beds,burn_care_beds,total_beds,...,mri_units,ct_multislice_lt64,ct_multislice_gte64,pet_ct_units,spect_units,ultrasound_units,state,latitude,longitude,zip_code
2,6030010,Kwajalein Hospital,2023,U S Army Kwajalein Atoll,Kwajalein Atoll,,,,,14.0,...,,,,,,,MH,8.7167,167.7333,96555
5,6040001,Wilma N. Vazquez Medical Center,2023,"KM 39 1/2 Road 2, Call Box 7001",Vega Baja,0.0,0.0,0.0,0.0,68.0,...,0.0,0.0,0.0,0.0,0.0,0.0,PR,18.4533,-66.4069,00694
8,6040002,Hospital San Francisco,2023,371 Avenida De Diego,San Juan,,,,,133.0,...,,,,,,,PR,18.3986,-66.0389,00923-1711
11,6040004,HIMA San Pablo Caguas,2023,Avenida Munoz Marin,Caguas,,,,,415.0,...,,,,,,,PR,18.2381,-66.0372,00726
14,6040005,Hospital Buen Samaritano,2023,Carr #2 Km 141-1 Ave Severiano Cuevas,Aguadilla,,,,,145.0,...,,,,,,,PR,18.4553,-67.1319,00603


Unnamed: 0,hospital_id,name,year,address,city,icu_med_surg_beds,icu_neonatal_beds,icu_pediatric_beds,burn_care_beds,total_beds,...,mri_units,ct_multislice_lt64,ct_multislice_gte64,pet_ct_units,spect_units,ultrasound_units,state,latitude,longitude,zip_code
17535,6930590,City of Hope's Helford Clinical Research Hospital,2023,1500 East Duarte Road,Duarte,17.0,0.0,1.0,0.0,232.0,...,1.0,0.0,1.0,1.0,1.0,1.0,CA,34.1288,-117.973,91010-3012
14861,6743360,Rolling Plains Memorial Hospital,2023,200 East Arizona Street,Sweetwater,6.0,0.0,0.0,0.0,57.0,...,0.0,1.0,0.0,0.0,0.0,1.0,TX,32.4537,-100.398,79556-7199
16085,6860260,U. S. Public Health Service Phoenix Indian Med...,2023,4212 North 16th Street,Phoenix,,,,,127.0,...,,,,,,,AZ,33.4969,-112.049,85016-5389
4894,6390380,Mayo Clinic Hospital in Florida,2023,4500 San Pablo Road South,Jacksonville,28.0,0.0,0.0,0.0,307.0,...,1.0,1.0,1.0,1.0,1.0,1.0,FL,30.2628,-81.4415,32224-1865
10622,6631160,Mercy Hospital South,2023,10010 Kennerly Road,Saint Louis,16.0,0.0,0.0,0.0,536.0,...,1.0,1.0,1.0,1.0,1.0,1.0,MO,38.5071,-90.3802,63128-2106
5422,6410054,Kindred Hospital-Dayton,2023,707 South Edwin C Moses Boulevard,Dayton,,,,,67.0,...,,,,,,,OH,39.7466,-84.1979,45417-3462
15644,6840750,Prowers Medical Center,2023,401 Kendall Drive,Lamar,,,,,25.0,...,,,,,,,CO,38.0698,-102.61,81052-3993
16958,6920005,Asante Three Rivers Medical Center,2023,500 SW Ramsey Avenue,Grants Pass,12.0,0.0,0.0,0.0,123.0,...,1.0,1.0,1.0,0.0,1.0,1.0,OR,42.4212,-123.343,97527-5554
12278,6720133,Specialty Rehabilitation Hospital of Coushatta,2023,1110 Ringgold Avenue Suite B,Coushatta,,,,,12.0,...,,,,,,,LA,32.0288,-93.3406,71019-9073
9350,6540390,St. Dominic-Jackson Memorial Hospital,2023,969 Lakeland Drive,Jackson,39.0,23.0,0.0,0.0,660.0,...,1.0,1.0,1.0,1.0,1.0,1.0,MS,32.3322,-90.1648,39216-4606


(6166, 24)


In [48]:
# get number of row with no NaN values
print(aha_df.isna().sum())

ID             0
MNAME          0
YEAR           0
MLOCADDR       0
MLOCCITY       0
MSICBD      2430
NICBD       2430
PEDICBD     2430
BRNBD       2430
HOSPBD         0
EMDEPHOS    2430
TRAUMHOS    2430
TRAUML90    4630
CTSCNHOS    2430
MRIHOS      2430
MSCTHOS     2430
MSCTGHOS    2430
PETCTHOS    2430
SPECTHOS    2430
ULTSNHOS    2430
MSTATE         0
LAT            0
LONG           0
MLOCZIP        0
dtype: int64


In [7]:
# display the rows where city is pasadena
display(aha_df[aha_df['city'] == 'Pasadena'])

Unnamed: 0,hospital_id,name,year,address,city,icu_med_surg_beds,icu_neonatal_beds,icu_pediatric_beds,burn_care_beds,total_beds,...,mri_units,ct_multislice_lt64,ct_multislice_gte64,pet_ct_units,spect_units,ultrasound_units,state,latitude,longitude,zip_code
13435,6740196,Surgery Specialty Hospitals of America,2023,4301B Vista Road,Pasadena,0.0,0.0,0.0,0.0,10.0,...,1.0,0.0,0.0,0.0,0.0,1.0,TX,29.6591,-95.1779,77504
13735,6740402,St. Luke's Health - Patients Medical Center,2023,4600 East Sam Houston Parkway South,Pasadena,8.0,0.0,0.0,0.0,61.0,...,1.0,1.0,1.0,0.0,0.0,1.0,TX,29.6414,-95.1621,77505-3948
14069,6741002,Oceans Behavioral Hospital of Pasadena,2023,4001 Preston Drive,Pasadena,,,,,22.0,...,,,,,,,TX,29.6509,-95.17,77505-2069
14736,6742778,HCA Houston Healthcare Southeast,2023,4000 Spencer Highway,Pasadena,14.0,14.0,0.0,0.0,278.0,...,1.0,1.0,1.0,0.0,1.0,1.0,TX,29.6612,-95.1838,77504-1202
17972,6932350,Huntington Health,2023,100 West California Boulevard,Pasadena,24.0,27.0,0.0,0.0,366.0,...,1.0,1.0,1.0,0.0,1.0,1.0,CA,34.1336,-118.153,91105-3097
17975,6932360,Las Encinas Hospital,2023,2900 East Del Mar Boulevard,Pasadena,,,,,118.0,...,,,,,,,CA,34.1417,-118.091,91107-4399


In [None]:
# push to MySQL
engine = create_engine("mysql+pymysql://root:pass@localhost:3306/hospitals")
aha_df.to_sql('AHA_Hospitals', engine, if_exists='replace', index=False)