In [1]:
import pandas as pd
import numpy as np

import wget
import os, datetime
import shutil

import pycountry_convert as pc

In [2]:
# create dir
def createDir(currDir):
    isdir = os.path.isdir(currDir) 

    if isdir:
        try:
            shutil.rmtree(currDir, ignore_errors=True)
        except OSError:
            print ("Deletion of the directory %s failed" % currDir)

    try:
        os.mkdir(currDir)
    except OSError:
        print ("Creation of the directory %s failed" % currDir)
    else:
        print ("Successfully created the directory %s " % currDir)

In [3]:
# download files
def downloadFiles(urls, currDir):
    for url in urls:
        wget.download(url, currDir)
    print ("Successfully downloaded files")

In [4]:
def transformFiles(currDir):
    # Datasets loaded to DataFrame
    df_hospitals = pd.read_csv(currDir + "/Definitive_Healthcare:_USA_Hospital_Beds.csv")
    df_hospitals = df_hospitals.rename(columns={"X": "Long_", "Y": "Lat_"})
    df_hospitals = df_hospitals.drop(columns=['HOSPITAL_TYPE', 'HQ_ADDRESS', 'HQ_ADDRESS1', 
                                              'HQ_CITY', 'HQ_STATE', 'HQ_ZIP_CODE',
                                              'Potential_Increase_In_Bed_Capac', 'STATE_FIPS',
                                              'CNTY_FIPS', 'AVG_VENTILATOR_USAGE', 'ADULT_ICU_BEDS', 
                                              'PEDI_ICU_BEDS','BED_UTILIZATION'])
    # Replace Null values
    df_hospitals = df_hospitals.replace(np.nan, 0, regex=True)
    df_hospitals['FIPS'] = df_hospitals['FIPS'].astype(int)
    df_hospitals['NUM_LICENSED_BEDS'] = df_hospitals['NUM_LICENSED_BEDS'].astype(int)
    df_hospitals['NUM_STAFFED_BEDS'] = df_hospitals['NUM_STAFFED_BEDS'].astype(int)
    df_hospitals['NUM_ICU_BEDS'] = df_hospitals['NUM_ICU_BEDS'].astype(int)
    df_hospitals = df_hospitals[df_hospitals['Lat_'] != 0] 
    print("Table Shape: ", df_hospitals.shape)
    
    return df_hospitals

In [5]:
# Save to csv file
def saveFiletoCSV(usa_full_table, currDir):
    usa_full_table.to_csv(currDir + '/hospital_usa_county_wise.csv', index=False)
    print("File Saved at %s" % currDir)

In [6]:
currDir = "../../DataStore/Hospital-data-US"

# urls of the files
# https://coronavirus-resources.esri.com/datasets/1044bb19da8d4dbfb6a96eb1b4ebf629_0/data?geometry=10.019%2C-16.820%2C-34.981%2C72.123
urls = ['https://opendata.arcgis.com/datasets/1044bb19da8d4dbfb6a96eb1b4ebf629_0.csv']

createDir(currDir)
downloadFiles(urls, currDir)
usa_hospitals = transformFiles(currDir)
saveFiletoCSV(usa_hospitals, currDir)

Successfully created the directory ../../DataStore/Hospital-data-US 
Successfully downloaded files
Table Shape:  (6605, 10)
File Saved at ../../DataStore/Hospital-data-US


In [7]:
usa_hospitals.columns

Index(['Long_', 'Lat_', 'FID', 'HOSPITAL_NAME', 'COUNTY_NAME', 'STATE_NAME',
       'FIPS', 'NUM_LICENSED_BEDS', 'NUM_STAFFED_BEDS', 'NUM_ICU_BEDS'],
      dtype='object')

In [8]:
usa_hospitals.head()

Unnamed: 0,Long_,Lat_,FID,HOSPITAL_NAME,COUNTY_NAME,STATE_NAME,FIPS,NUM_LICENSED_BEDS,NUM_STAFFED_BEDS,NUM_ICU_BEDS
0,-112.066157,33.495498,1,Phoenix VA Health Care System (AKA Carl T Hayd...,Maricopa,Arizona,4013,62,62,0
1,-110.965885,32.181263,2,Southern Arizona VA Health Care System,Pima,Arizona,4019,295,295,2
2,-119.779742,36.773323,3,VA Central California Health Care System,Fresno,California,6019,54,54,2
3,-72.95761,41.2844,4,VA Connecticut Healthcare System - West Haven ...,New Haven,Connecticut,9009,216,216,1
4,-75.606533,39.740206,5,Wilmington VA Medical Center,New Castle,Delaware,10003,62,62,0


In [9]:
usa_hospitals.dtypes

Long_                float64
Lat_                 float64
FID                    int64
HOSPITAL_NAME         object
COUNTY_NAME           object
STATE_NAME            object
FIPS                   int64
NUM_LICENSED_BEDS      int64
NUM_STAFFED_BEDS       int64
NUM_ICU_BEDS           int64
dtype: object