# Data interchange project COVID-19
Author: Alla Topp

In [1]:
import pandas as pd

In [2]:
# importing COVID data from new york times 
new_york_data = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv')

In [3]:
new_york_data.head(5)

Unnamed: 0,date,state,fips,cases,deaths
0,2020-01-21,Washington,53,1,0
1,2020-01-22,Washington,53,1,0
2,2020-01-23,Washington,53,1,0
3,2020-01-24,Illinois,17,1,0
4,2020-01-24,Washington,53,1,0


In [4]:
len(new_york_data)

23774

In [5]:
new_york_data.dtypes

date      object
state     object
fips       int64
cases      int64
deaths     int64
dtype: object

In [6]:
new_york_data['date'] = pd.to_datetime(new_york_data['date'], errors='coerce')

In [7]:
# checking if there are any missing values in this data set
missing_values_count = new_york_data.isnull().sum()
missing_values_count  

date      0
state     0
fips      0
cases     0
deaths    0
dtype: int64

In [8]:
new_york_data.dtypes

date      datetime64[ns]
state             object
fips               int64
cases              int64
deaths             int64
dtype: object

## Estimated Inpatient Beds Occupied by State Timeseries

In [9]:
# importing data where hospital bed are oppupied by all patients

patient_all = pd.read_csv('https://healthdata.gov/api/views/jjp9-htie/rows.csv?accessType=DOWNLOAD', thousands = ',')
patient_all.head()

Unnamed: 0,state,collection_date,Inpatient Beds Occupied Estimated,Count LL,Count UL,Percentage of Inpatient Beds Occupied Estimated,Percentage LL,Percentage UL,Total Inpatient Beds,Total LL,Total UL,geocoded_state
0,CW,2021/04/07,511114,510672,511556,73.09,72.92,73.25,699640,699209,700071,
1,CW,2021/04/08,512656,512335,512977,73.4,73.31,73.49,698757,698556,698958,
2,CW,2021/04/09,509288,508963,509613,72.95,72.85,73.05,698442,698180,698704,
3,CW,2021/04/10,492483,492187,492780,70.68,70.61,70.76,697065,696884,697246,
4,CW,2021/04/11,480867,480258,481477,69.36,69.12,69.61,693586,692953,694218,


In [10]:
len(patient_all)

1612

In [11]:
patient_all.dtypes

state                                               object
collection_date                                     object
Inpatient Beds Occupied Estimated                    int64
Count LL                                             int64
Count UL                                             int64
Percentage of Inpatient Beds Occupied Estimated    float64
Percentage LL                                      float64
Percentage UL                                      float64
Total Inpatient Beds                                 int64
Total LL                                             int64
Total UL                                             int64
geocoded_state                                      object
dtype: object

In [12]:
#cols = ['Inpatient Beds Occupied Estimated', 'Count LL', 'Count UL', 'Total Inpatient Beds', 'Total LL', 'Total UL']
#patient_all[cols] = patient_all[cols].apply(pd.to_numeric, errors='ignore', axis=1)

In [13]:
patient_all['collection_date'] = pd.to_datetime(patient_all['collection_date'], errors='coerce')

In [14]:
patient_all.dtypes

state                                                      object
collection_date                                    datetime64[ns]
Inpatient Beds Occupied Estimated                           int64
Count LL                                                    int64
Count UL                                                    int64
Percentage of Inpatient Beds Occupied Estimated           float64
Percentage LL                                             float64
Percentage UL                                             float64
Total Inpatient Beds                                        int64
Total LL                                                    int64
Total UL                                                    int64
geocoded_state                                             object
dtype: object

In [15]:
# checking if there are any missing values in this data set
missing_values_all = patient_all.isnull().sum()
missing_values_all 

state                                               0
collection_date                                     0
Inpatient Beds Occupied Estimated                   0
Count LL                                            0
Count UL                                            0
Percentage of Inpatient Beds Occupied Estimated     0
Percentage LL                                       0
Percentage UL                                       0
Total Inpatient Beds                                0
Total LL                                            0
Total UL                                            0
geocoded_state                                     31
dtype: int64

## Estimated Inpatient Beds Occupied by COVID-19 Patients by State Timeseries

In [16]:
# import of data where hospital beds are occupied only by COVID patients
beds_covid = pd.read_csv('https://healthdata.gov/api/views/py8k-j5rq/rows.csv?accessType=DOWNLOAD', thousands = ',')
beds_covid.head()

Unnamed: 0,state,collection_date,Inpatient Beds Occupied by COVID-19 Patients Estimated,Count LL,Count UL,Percentage of Inpatient Beds Occupied by COVID-19 Patients Estimated,Percentage LL,Percentage UL,Total Inpatient Beds,Total LL,Total UL,geocoded_state
0,CW,2021/04/07,42978,42960,42996,6.17,6.13,6.21,699640,699209,700071,
1,CW,2021/04/08,42975,42956,42994,6.18,6.15,6.2,698757,698556,698958,
2,CW,2021/04/09,42881,42864,42898,6.17,6.14,6.19,698442,698180,698704,
3,CW,2021/04/10,42421,42394,42448,6.11,6.08,6.15,697065,696884,697246,
4,CW,2021/04/11,43220,43184,43256,6.26,6.2,6.32,693586,692953,694218,


In [17]:
len(beds_covid)

1612

In [18]:
beds_covid['collection_date'] = pd.to_datetime(beds_covid['collection_date'], errors='coerce')

In [19]:
beds_covid.dtypes

state                                                                           object
collection_date                                                         datetime64[ns]
Inpatient Beds Occupied by COVID-19 Patients Estimated                           int64
Count LL                                                                         int64
Count UL                                                                         int64
Percentage of Inpatient Beds Occupied by COVID-19 Patients Estimated           float64
Percentage LL                                                                  float64
Percentage UL                                                                  float64
Total Inpatient Beds                                                             int64
Total LL                                                                         int64
Total UL                                                                         int64
geocoded_state                             

## Estimated ICU Beds Occupied by State Timeseries 

In [20]:
# import data where occupied beds are by ICU patients 

icu_beds = pd.read_csv('https://healthdata.gov/api/views/7ctx-gtb7/rows.csv?accessType=DOWNLOAD', thousands = ',')
icu_beds.head()

Unnamed: 0,state,collection_date,Staffed Adult ICU Beds Occupied Estimated,Count LL,Count UL,Percentage of Staffed Adult ICU Beds Occupied Estimated,Percentage LL,Percentage UL,Total Staffed Adult ICU Beds,Total LL,Total UL,geocoded_state
0,CW,2021/04/07,60140,60091,60189,71.53,71.34,71.72,84075,84011,84139,
1,CW,2021/04/08,60062,60048,60075,71.73,71.68,71.78,83728,83712,83744,
2,CW,2021/04/09,59977,59960,59994,71.61,71.55,71.67,83754,83734,83773,
3,CW,2021/04/10,58667,58628,58706,70.22,70.09,70.34,83552,83523,83582,
4,CW,2021/04/11,57316,57236,57396,68.7,68.4,69.0,83432,83335,83529,


In [21]:
len(icu_beds)

1612

In [22]:
icu_beds['collection_date'] = pd.to_datetime(icu_beds['collection_date'], errors='coerce')

In [23]:
icu_beds.dtypes

state                                                              object
collection_date                                            datetime64[ns]
Staffed Adult ICU Beds Occupied Estimated                           int64
Count LL                                                            int64
Count UL                                                            int64
Percentage of Staffed Adult ICU Beds Occupied Estimated           float64
Percentage LL                                                     float64
Percentage UL                                                     float64
Total Staffed Adult ICU Beds                                        int64
Total LL                                                            int64
Total UL                                                            int64
geocoded_state                                                     object
dtype: object

# Merging data sets

### First, merging 3 healthdata.gov datasets

In [24]:
# merging all patients dataset and covid patients dataset 
mrd = pd.merge(patient_all, beds_covid, on = ['state', 'collection_date'])
mrd.head()

Unnamed: 0,state,collection_date,Inpatient Beds Occupied Estimated,Count LL_x,Count UL_x,Percentage of Inpatient Beds Occupied Estimated,Percentage LL_x,Percentage UL_x,Total Inpatient Beds_x,Total LL_x,...,Inpatient Beds Occupied by COVID-19 Patients Estimated,Count LL_y,Count UL_y,Percentage of Inpatient Beds Occupied by COVID-19 Patients Estimated,Percentage LL_y,Percentage UL_y,Total Inpatient Beds_y,Total LL_y,Total UL_y,geocoded_state_y
0,CW,2021-04-07,511114,510672,511556,73.09,72.92,73.25,699640,699209,...,42978,42960,42996,6.17,6.13,6.21,699640,699209,700071,
1,CW,2021-04-08,512656,512335,512977,73.4,73.31,73.49,698757,698556,...,42975,42956,42994,6.18,6.15,6.2,698757,698556,698958,
2,CW,2021-04-09,509288,508963,509613,72.95,72.85,73.05,698442,698180,...,42881,42864,42898,6.17,6.14,6.19,698442,698180,698704,
3,CW,2021-04-10,492483,492187,492780,70.68,70.61,70.76,697065,696884,...,42421,42394,42448,6.11,6.08,6.15,697065,696884,697246,
4,CW,2021-04-11,480867,480258,481477,69.36,69.12,69.61,693586,692953,...,43220,43184,43256,6.26,6.2,6.32,693586,692953,694218,


In [25]:
all_beds = pd.merge(mrd, icu_beds, on = ['state', 'collection_date'])
all_beds

Unnamed: 0,state,collection_date,Inpatient Beds Occupied Estimated,Count LL_x,Count UL_x,Percentage of Inpatient Beds Occupied Estimated,Percentage LL_x,Percentage UL_x,Total Inpatient Beds_x,Total LL_x,...,Staffed Adult ICU Beds Occupied Estimated,Count LL,Count UL,Percentage of Staffed Adult ICU Beds Occupied Estimated,Percentage LL,Percentage UL,Total Staffed Adult ICU Beds,Total LL,Total UL,geocoded_state
0,CW,2021-04-07,511114,510672,511556,73.09,72.92,73.25,699640,699209,...,60140,60091,60189,71.53,71.34,71.72,84075,84011,84139,
1,CW,2021-04-08,512656,512335,512977,73.40,73.31,73.49,698757,698556,...,60062,60048,60075,71.73,71.68,71.78,83728,83712,83744,
2,CW,2021-04-09,509288,508963,509613,72.95,72.85,73.05,698442,698180,...,59977,59960,59994,71.61,71.55,71.67,83754,83734,83773,
3,CW,2021-04-10,492483,492187,492780,70.68,70.61,70.76,697065,696884,...,58667,58628,58706,70.22,70.09,70.34,83552,83523,83582,
4,CW,2021-04-11,480867,480258,481477,69.36,69.12,69.61,693586,692953,...,57316,57236,57396,68.70,68.40,69.00,83432,83335,83529,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1607,TX,2021-05-07,44343,43062,45624,75.42,68.69,82.14,58798,57282,...,5475,5301,5649,79.21,71.63,86.79,6912,6716,7108,POINT (-99.317341 31.447238)
1608,WA,2021-05-07,8927,8270,9584,74.22,56.86,91.58,12027,11199,...,935,853,1017,74.98,53.80,96.16,1247,1140,1354,POINT (-120.592492 47.411715)
1609,WI,2021-05-07,7847,7787,7907,65.33,63.64,67.01,12012,11926,...,908,898,918,56.89,54.57,59.22,1596,1578,1614,POINT (-89.732969 44.639954)
1610,WV,2021-05-07,3887,3887,3887,77.93,77.93,77.93,4988,4988,...,454,454,454,74.79,74.79,74.79,607,607,607,POINT (-80.613729 38.642579)


In [26]:
# changing the name of column collection_date to date like other dataset has
all_beds.rename(columns = {'collection_date':'date'}, inplace = True) 
all_beds.head()

Unnamed: 0,state,date,Inpatient Beds Occupied Estimated,Count LL_x,Count UL_x,Percentage of Inpatient Beds Occupied Estimated,Percentage LL_x,Percentage UL_x,Total Inpatient Beds_x,Total LL_x,...,Staffed Adult ICU Beds Occupied Estimated,Count LL,Count UL,Percentage of Staffed Adult ICU Beds Occupied Estimated,Percentage LL,Percentage UL,Total Staffed Adult ICU Beds,Total LL,Total UL,geocoded_state
0,CW,2021-04-07,511114,510672,511556,73.09,72.92,73.25,699640,699209,...,60140,60091,60189,71.53,71.34,71.72,84075,84011,84139,
1,CW,2021-04-08,512656,512335,512977,73.4,73.31,73.49,698757,698556,...,60062,60048,60075,71.73,71.68,71.78,83728,83712,83744,
2,CW,2021-04-09,509288,508963,509613,72.95,72.85,73.05,698442,698180,...,59977,59960,59994,71.61,71.55,71.67,83754,83734,83773,
3,CW,2021-04-10,492483,492187,492780,70.68,70.61,70.76,697065,696884,...,58667,58628,58706,70.22,70.09,70.34,83552,83523,83582,
4,CW,2021-04-11,480867,480258,481477,69.36,69.12,69.61,693586,692953,...,57316,57236,57396,68.7,68.4,69.0,83432,83335,83529,


### Remapping dataframe to translate US States to Two letter codes

In [27]:
us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'American Samoa': 'AS',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Guam': 'GU',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
}

In [28]:
new_york_data['state'] = new_york_data['state'].map(us_state_abbrev) 

In [29]:
print(new_york_data)

            date state  fips   cases  deaths
0     2020-01-21    WA    53       1       0
1     2020-01-22    WA    53       1       0
2     2020-01-23    WA    53       1       0
3     2020-01-24    IL    17       1       0
4     2020-01-24    WA    53       1       0
...          ...   ...   ...     ...     ...
23769 2021-05-08    VA    51  666111   10885
23770 2021-05-08    WA    53  415873    5609
23771 2021-05-08    WV    54  156001    2726
23772 2021-05-08    WI    55  666042    7652
23773 2021-05-08    WY    56   58623     710

[23774 rows x 5 columns]


### Final merge

In [30]:
final_dataset = pd.merge(new_york_data, all_beds, on = ['state', 'date'])
final_dataset

Unnamed: 0,date,state,fips,cases,deaths,Inpatient Beds Occupied Estimated,Count LL_x,Count UL_x,Percentage of Inpatient Beds Occupied Estimated,Percentage LL_x,...,Staffed Adult ICU Beds Occupied Estimated,Count LL,Count UL,Percentage of Staffed Adult ICU Beds Occupied Estimated,Percentage LL,Percentage UL,Total Staffed Adult ICU Beds,Total LL,Total UL,geocoded_state
0,2021-04-07,AL,1,517452,10652,10285,10285,10285,73.95,73.95,...,1332,1332,1332,84.09,84.09,84.09,1584,1584,1584,POINT (-86.844516 32.756889)
1,2021-04-07,AK,2,63785,299,886,886,886,59.54,59.54,...,90,90,90,63.83,63.83,63.83,141,141,141,POINT (-151.631889 63.631126)
2,2021-04-07,AZ,4,846241,17025,9891,9889,9893,69.79,69.68,...,1062,1062,1062,49.05,49.05,49.05,2165,2165,2165,POINT (-111.664616 34.293239)
3,2021-04-07,AR,5,331505,5660,5370,5370,5370,69.70,69.70,...,690,690,690,68.18,68.18,68.18,1012,1012,1012,POINT (-92.439162 34.899825)
4,2021-04-07,CA,6,3687635,59975,43622,43622,43622,73.89,73.89,...,4798,4798,4798,69.87,69.87,69.87,6867,6860,6874,POINT (-119.662127 37.211164)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1576,2021-05-07,VA,51,665332,10874,11663,11432,11894,72.82,68.26,...,1287,1260,1314,67.67,62.94,72.39,1902,1864,1940,POINT (-78.666382 37.510861)
1577,2021-05-07,WA,53,414479,5609,8927,8270,9584,74.22,56.86,...,935,853,1017,74.98,53.80,96.16,1247,1140,1354,POINT (-120.592492 47.411715)
1578,2021-05-07,WV,54,155680,2719,3887,3887,3887,77.93,77.93,...,454,454,454,74.79,74.79,74.79,607,607,607,POINT (-80.613729 38.642579)
1579,2021-05-07,WI,55,665422,7625,7847,7787,7907,65.33,63.64,...,908,898,918,56.89,54.57,59.22,1596,1578,1614,POINT (-89.732969 44.639954)


## Changing LL and UL column names 

In [31]:
final_dataset.rename(columns = {'Count LL_x':'Count LL_inpatient beds occupied'}, inplace = True) 
final_dataset.rename(columns = {'Count UL_x':'Count UL_inpatient beds occupied'}, inplace = True)
final_dataset.rename(columns = {'Percentage LL_x':'Percentage LL_inpatient beds occupied'}, inplace = True)
final_dataset.rename(columns = {'Percentage UL_x':'Percentage UL_inpatient beds occupied'}, inplace = True)
final_dataset.rename(columns = {'Total LL_x':'Total LL_inpatient beds occupied'}, inplace = True)
final_dataset.rename(columns = {'Total UL_x':'Total UL_inpatient beds occupied'}, inplace = True)

In [32]:
final_dataset.rename(columns = {'Count LL_y':'Count LL_occupied by COVID-19 patients'}, inplace = True)
final_dataset.rename(columns = {'Count UL_y':'Count UL_occupied by COVID-19 patients'}, inplace = True)
final_dataset.rename(columns = {'Percentage LL_y':'Percentage LL_occupied by COVID-19 patients'}, inplace = True)
final_dataset.rename(columns = {'Percentage UL_y':'Percentage UL_occupied by COVID-19 patients'}, inplace = True)
final_dataset.rename(columns = {'Total LL_y':'Total LL_occupied by COVID-19 patients'}, inplace = True)
final_dataset.rename(columns = {'Total UL_y':'Total UL_occupied by COVID-19 patients'}, inplace = True)

In [33]:
final_dataset.rename(columns = {'Count LL':'Count LL_ICU Beds Occupied'}, inplace = True)
final_dataset.rename(columns = {'Count UL':'Count UL_ICU Beds Occupied'}, inplace = True)
final_dataset.rename(columns = {'Percentage LL':'Percentage LL_ICU Beds Occupied'}, inplace = True)
final_dataset.rename(columns = {'Percentage UL':'Percentage UL_ICU Beds Occupied'}, inplace = True)
final_dataset.rename(columns = {'Total LL':'Total LL_ICU Beds Occupied'}, inplace = True)
final_dataset.rename(columns = {'Total UL':'Total UL_ICU Beds Occupied'}, inplace = True)

## Final dataset column names 

In [34]:
# list of columns in final dataset
list(final_dataset.columns)

['date',
 'state',
 'fips',
 'cases',
 'deaths',
 'Inpatient Beds Occupied Estimated',
 'Count LL_inpatient beds occupied',
 'Count UL_inpatient beds occupied',
 'Percentage of Inpatient Beds Occupied Estimated',
 'Percentage LL_inpatient beds occupied',
 'Percentage UL_inpatient beds occupied',
 'Total Inpatient Beds_x',
 'Total LL_inpatient beds occupied',
 'Total UL_inpatient beds occupied',
 'geocoded_state_x',
 'Inpatient Beds Occupied by COVID-19 Patients Estimated',
 'Count LL_occupied by COVID-19 patients',
 'Count UL_occupied by COVID-19 patients',
 'Percentage of Inpatient Beds Occupied by COVID-19 Patients Estimated',
 'Percentage LL_occupied by COVID-19 patients',
 'Percentage UL_occupied by COVID-19 patients',
 'Total Inpatient Beds_y',
 'Total LL_occupied by COVID-19 patients',
 'Total UL_occupied by COVID-19 patients',
 'geocoded_state_y',
 'Staffed Adult ICU Beds Occupied Estimated',
 'Count LL_ICU Beds Occupied',
 'Count UL_ICU Beds Occupied',
 'Percentage of Staffed A

## Checking the final dataset

In [35]:
final_dataset

Unnamed: 0,date,state,fips,cases,deaths,Inpatient Beds Occupied Estimated,Count LL_inpatient beds occupied,Count UL_inpatient beds occupied,Percentage of Inpatient Beds Occupied Estimated,Percentage LL_inpatient beds occupied,...,Staffed Adult ICU Beds Occupied Estimated,Count LL_ICU Beds Occupied,Count UL_ICU Beds Occupied,Percentage of Staffed Adult ICU Beds Occupied Estimated,Percentage LL_ICU Beds Occupied,Percentage UL_ICU Beds Occupied,Total Staffed Adult ICU Beds,Total LL_ICU Beds Occupied,Total UL_ICU Beds Occupied,geocoded_state
0,2021-04-07,AL,1,517452,10652,10285,10285,10285,73.95,73.95,...,1332,1332,1332,84.09,84.09,84.09,1584,1584,1584,POINT (-86.844516 32.756889)
1,2021-04-07,AK,2,63785,299,886,886,886,59.54,59.54,...,90,90,90,63.83,63.83,63.83,141,141,141,POINT (-151.631889 63.631126)
2,2021-04-07,AZ,4,846241,17025,9891,9889,9893,69.79,69.68,...,1062,1062,1062,49.05,49.05,49.05,2165,2165,2165,POINT (-111.664616 34.293239)
3,2021-04-07,AR,5,331505,5660,5370,5370,5370,69.70,69.70,...,690,690,690,68.18,68.18,68.18,1012,1012,1012,POINT (-92.439162 34.899825)
4,2021-04-07,CA,6,3687635,59975,43622,43622,43622,73.89,73.89,...,4798,4798,4798,69.87,69.87,69.87,6867,6860,6874,POINT (-119.662127 37.211164)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1576,2021-05-07,VA,51,665332,10874,11663,11432,11894,72.82,68.26,...,1287,1260,1314,67.67,62.94,72.39,1902,1864,1940,POINT (-78.666382 37.510861)
1577,2021-05-07,WA,53,414479,5609,8927,8270,9584,74.22,56.86,...,935,853,1017,74.98,53.80,96.16,1247,1140,1354,POINT (-120.592492 47.411715)
1578,2021-05-07,WV,54,155680,2719,3887,3887,3887,77.93,77.93,...,454,454,454,74.79,74.79,74.79,607,607,607,POINT (-80.613729 38.642579)
1579,2021-05-07,WI,55,665422,7625,7847,7787,7907,65.33,63.64,...,908,898,918,56.89,54.57,59.22,1596,1578,1614,POINT (-89.732969 44.639954)


### Export of the files 

In [36]:
final_dataset.to_csv('final_covid.csv', index = False)
#final_dataset.to_json('file.json', orient = 'split', compression = 'infer', index = 'true') 
# https://towardsdatascience.com/automatically-update-data-sources-in-python-e424dbea68d0

### Reference:
* Data Import (https://www.datacamp.com/community/tutorials/importing-data-into-pandas)
* Dict map (https://gist.github.com/rogerallen/1583593)
* Pandas Indexing (https://www.shanelynn.ie/merge-join-dataframes-python-pandas-index-1/)
* Remap (https://www.geeksforgeeks.org/using-dictionary-to-remap-values-in-pandas-dataframe-columns/)
* NYT data (https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv)
* Healthdata.gov data (https://healthdata.gov/dataset/covid-19-estimated-patient-impact-and-hospital-capacity-state)