In [32]:
import pandas as pd
from sqlalchemy import create_engine

### AHA Data

In [46]:
# core + capabilities
keep = [
    'ID',        # hospital_id
    'MNAME',     # name
    'MLOCADDR',  # address
    'MLOCCITY',  # city
    'MSTATE',    # state
    'MLOCZIP',   # zip
    'LAT',       # latitude
    'LONG',      # longitude
    'EMDEPHOS',  # ED present?
    'TRAUMHOS',  # trauma center?
    'TRAUML90',  # trauma level
    'HOSPBD',    # total beds
    'YEAR',      # year 
    # new capability columns:
    'CTSCNHOS',  # CT scanners
    'MSCTHOS',   # multislice CT <64
    'MSCTGHOS',  # multislice CT ≥64
    'MRIHOS',    # MRI units
    'PETCTHOS',  # PET/CT units
    'SPECTHOS',  # SPECT units
    'ULTSNHOS',  # ultrasound units
    'BRNBD',     # burn care beds
    'MSICBD',    # med/surg ICU beds
    'NICBD',     # neonatal ICU beds
    'PEDICBD'    # pediatric ICU beds
]

In [47]:
# Load the CSV file into a pandas DataFrame
aha_df = pd.read_csv('data/raw/albert_aha.csv', usecols=keep, encoding='latin1')

# Only keep year 2023
aha_df = aha_df[aha_df['YEAR'] == 2023]
display(aha_df.head())

# display 10 random rows
display(aha_df.sample(10))

print(aha_df.shape)

  aha_df = pd.read_csv('data/raw/albert_aha.csv', usecols=keep, encoding='latin1')


Unnamed: 0,ID,MNAME,YEAR,MLOCADDR,MLOCCITY,MSICBD,NICBD,PEDICBD,BRNBD,HOSPBD,...,MRIHOS,MSCTHOS,MSCTGHOS,PETCTHOS,SPECTHOS,ULTSNHOS,MSTATE,LAT,LONG,MLOCZIP
2,6030010,Kwajalein Hospital,2023,U S Army Kwajalein Atoll,Kwajalein Atoll,,,,,14.0,...,,,,,,,MH,8.7167,167.7333,96555
5,6040001,Wilma N. Vazquez Medical Center,2023,"KM 39 1/2 Road 2, Call Box 7001",Vega Baja,0.0,0.0,0.0,0.0,68.0,...,0.0,0.0,0.0,0.0,0.0,0.0,PR,18.4533,-66.4069,00694
8,6040002,Hospital San Francisco,2023,371 Avenida De Diego,San Juan,,,,,133.0,...,,,,,,,PR,18.3986,-66.0389,00923-1711
11,6040004,HIMA San Pablo Caguas,2023,Avenida Munoz Marin,Caguas,,,,,415.0,...,,,,,,,PR,18.2381,-66.0372,00726
14,6040005,Hospital Buen Samaritano,2023,Carr #2 Km 141-1 Ave Severiano Cuevas,Aguadilla,,,,,145.0,...,,,,,,,PR,18.4553,-67.1319,00603


Unnamed: 0,ID,MNAME,YEAR,MLOCADDR,MLOCCITY,MSICBD,NICBD,PEDICBD,BRNBD,HOSPBD,...,MRIHOS,MSCTHOS,MSCTGHOS,PETCTHOS,SPECTHOS,ULTSNHOS,MSTATE,LAT,LONG,MLOCZIP
7973,6451900,Tomah VA Medical Center,2023,500 East Veterans Street,Tomah,,,,,71.0,...,,,,,,,WI,44.0036,-90.4934,54660-3105
12341,6720192,Avala,2023,67252 Industry Lane,Covington,,,,,21.0,...,,,,,,,LA,30.4058,-90.083,70433-8704
5977,6412345,CMH Regional Health System,2023,610 West Main Street,Wilmington,,,,,165.0,...,,,,,,,OH,39.4436,-83.84,45177-2125
17844,6931758,Greater Los Angeles HCS,2023,11301 Wilshire Boulevard,Los Angeles,,,,,422.0,...,,,,,,,CA,34.0527,-118.453,90073-1003
5583,6410505,The Woods at Parkside,2023,349 Olde Ridenour Road,Gahanna,,,,,50.0,...,,,,,,,OH,40.0271,-82.8818,43230-2528
13390,6740152,Sunrise Canyon Hospital,2023,1950 Aspen Ave,Lubbock,0.0,0.0,0.0,0.0,30.0,...,0.0,0.0,0.0,0.0,0.0,0.0,TX,33.5734,-101.808,79404-1211
17081,6920418,Legacy Good Samaritan Medical Center,2023,1015 NW 22nd Avenue,Portland,28.0,0.0,0.0,0.0,236.0,...,1.0,1.0,1.0,1.0,1.0,1.0,OR,45.5303,-122.698,97210-3099
2875,6340023,Spotsylvania Regional Medical Center,2023,4600 Spotsylvania Parkway,Fredericksburg,12.0,4.0,0.0,0.0,137.0,...,1.0,0.0,1.0,0.0,0.0,1.0,VA,38.2182,-77.4961,22408-7762
6159,6420225,Franciscan Health Crown Point,2023,1201 South Main Street,Crown Point,,,,,254.0,...,,,,,,,IN,41.3958,-87.3676,46307-8483
5,6040001,Wilma N. Vazquez Medical Center,2023,"KM 39 1/2 Road 2, Call Box 7001",Vega Baja,0.0,0.0,0.0,0.0,68.0,...,0.0,0.0,0.0,0.0,0.0,0.0,PR,18.4533,-66.4069,00694


(6166, 24)


In [48]:
# get number of row with no NaN values
print(aha_df.isna().sum())

ID             0
MNAME          0
YEAR           0
MLOCADDR       0
MLOCCITY       0
MSICBD      2430
NICBD       2430
PEDICBD     2430
BRNBD       2430
HOSPBD         0
EMDEPHOS    2430
TRAUMHOS    2430
TRAUML90    4630
CTSCNHOS    2430
MRIHOS      2430
MSCTHOS     2430
MSCTGHOS    2430
PETCTHOS    2430
SPECTHOS    2430
ULTSNHOS    2430
MSTATE         0
LAT            0
LONG           0
MLOCZIP        0
dtype: int64


In [None]:
# push to MySQL
engine = create_engine("mysql+pymysql://root:pass@localhost:3306/hospitals")
aha_df.to_sql('AHA_Hospitals', engine, if_exists='replace', index=False)