# Data interchange project COVID-19
Author: Alla Topp

In [1]:
import pandas as pd

In [2]:
# importing COVID data from new york times 
new_york_data = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv')

In [3]:
new_york_data.head(5)

Unnamed: 0,date,state,fips,cases,deaths
0,2020-01-21,Washington,53,1,0
1,2020-01-22,Washington,53,1,0
2,2020-01-23,Washington,53,1,0
3,2020-01-24,Illinois,17,1,0
4,2020-01-24,Washington,53,1,0


In [4]:
len(new_york_data)

23059

In [5]:
new_york_data.dtypes

date      object
state     object
fips       int64
cases      int64
deaths     int64
dtype: object

In [6]:
new_york_data['date'] = pd.to_datetime(new_york_data['date'], errors='coerce')

In [7]:
# checking if there are any missing values in this data set
missing_values_count = new_york_data.isnull().sum()
missing_values_count  

date      0
state     0
fips      0
cases     0
deaths    0
dtype: int64

In [8]:
new_york_data.dtypes

date      datetime64[ns]
state             object
fips               int64
cases              int64
deaths             int64
dtype: object

## Estimated Inpatient Beds Occupied by State Timeseries

In [10]:
# importing data where hospital bed are oppupied by all patients

patient_all = pd.read_csv('https://healthdata.gov/api/views/jjp9-htie/rows.csv?accessType=DOWNLOAD', thousands = ',')
patient_all.head()

Unnamed: 0,state,collection_date,Inpatient Beds Occupied Estimated,Count LL,Count UL,Percentage of Inpatient Beds Occupied Estimated,Percentage LL,Percentage UL,Total Inpatient Beds,Total LL,Total UL,geocoded_state
0,CW,2021/03/24,509974,509803,510145,72.61,72.53,72.69,702371,702160,702582,
1,CW,2021/03/25,509613,509459,509766,72.5,72.43,72.58,702910,702673,703148,
2,CW,2021/03/26,506032,505756,506307,72.14,72.03,72.24,701516,701329,701702,
3,CW,2021/03/27,487746,487613,487879,69.78,69.73,69.84,698961,698798,699124,
4,CW,2021/03/28,472836,472649,473023,68.02,67.94,68.1,695181,694996,695366,


In [11]:
len(patient_all)

1612

In [12]:
patient_all.dtypes

state                                               object
collection_date                                     object
Inpatient Beds Occupied Estimated                    int64
Count LL                                             int64
Count UL                                             int64
Percentage of Inpatient Beds Occupied Estimated    float64
Percentage LL                                      float64
Percentage UL                                      float64
Total Inpatient Beds                                 int64
Total LL                                             int64
Total UL                                             int64
geocoded_state                                      object
dtype: object

In [13]:
#cols = ['Inpatient Beds Occupied Estimated', 'Count LL', 'Count UL', 'Total Inpatient Beds', 'Total LL', 'Total UL']
#patient_all[cols] = patient_all[cols].apply(pd.to_numeric, errors='ignore', axis=1)

In [14]:
patient_all['collection_date'] = pd.to_datetime(patient_all['collection_date'], errors='coerce')

In [15]:
patient_all.dtypes

state                                                      object
collection_date                                    datetime64[ns]
Inpatient Beds Occupied Estimated                           int64
Count LL                                                    int64
Count UL                                                    int64
Percentage of Inpatient Beds Occupied Estimated           float64
Percentage LL                                             float64
Percentage UL                                             float64
Total Inpatient Beds                                        int64
Total LL                                                    int64
Total UL                                                    int64
geocoded_state                                             object
dtype: object

In [16]:
# checking if there are any missing values in this data set
missing_values_all = patient_all.isnull().sum()
missing_values_all 

state                                               0
collection_date                                     0
Inpatient Beds Occupied Estimated                   0
Count LL                                            0
Count UL                                            0
Percentage of Inpatient Beds Occupied Estimated     0
Percentage LL                                       0
Percentage UL                                       0
Total Inpatient Beds                                0
Total LL                                            0
Total UL                                            0
geocoded_state                                     31
dtype: int64

## Estimated Inpatient Beds Occupied by COVID-19 Patients by State Timeseries

In [17]:
# import of data where hospital beds are occupied only by COVID patients
beds_covid = pd.read_csv('https://healthdata.gov/api/views/py8k-j5rq/rows.csv?accessType=DOWNLOAD', thousands = ',')
beds_covid.head()

Unnamed: 0,state,collection_date,Inpatient Beds Occupied by COVID-19 Patients Estimated,Count LL,Count UL,Percentage of Inpatient Beds Occupied by COVID-19 Patients Estimated,Percentage LL,Percentage UL,Total Inpatient Beds,Total LL,Total UL,geocoded_state
0,CW,2021/03/24,39403,39375,39431,5.64,5.61,5.67,702371,702160,702582,
1,CW,2021/03/25,39380,39329,39432,5.63,5.57,5.69,702910,702673,703148,
2,CW,2021/03/26,39421,39400,39442,5.65,5.63,5.67,701516,701329,701702,
3,CW,2021/03/27,38843,38754,38932,5.59,5.48,5.69,698961,698798,699124,
4,CW,2021/03/28,39128,39108,39147,5.66,5.63,5.68,695181,694996,695366,


In [18]:
len(beds_covid)

1612

In [19]:
beds_covid['collection_date'] = pd.to_datetime(beds_covid['collection_date'], errors='coerce')

In [20]:
beds_covid.dtypes

state                                                                           object
collection_date                                                         datetime64[ns]
Inpatient Beds Occupied by COVID-19 Patients Estimated                           int64
Count LL                                                                         int64
Count UL                                                                         int64
Percentage of Inpatient Beds Occupied by COVID-19 Patients Estimated           float64
Percentage LL                                                                  float64
Percentage UL                                                                  float64
Total Inpatient Beds                                                             int64
Total LL                                                                         int64
Total UL                                                                         int64
geocoded_state                             

## Estimated ICU Beds Occupied by State Timeseries 

In [21]:
# import data where occupied beds are by ICU patients 

icu_beds = pd.read_csv('https://healthdata.gov/api/views/7ctx-gtb7/rows.csv?accessType=DOWNLOAD', thousands = ',')
icu_beds.head()

Unnamed: 0,state,collection_date,Staffed Adult ICU Beds Occupied Estimated,Count LL,Count UL,Percentage of Staffed Adult ICU Beds Occupied Estimated,Percentage LL,Percentage UL,Total Staffed Adult ICU Beds,Total LL,Total UL,geocoded_state
0,CW,2021/03/24,59394,59378,59410,70.54,70.48,70.6,84199,84180,84218,
1,CW,2021/03/25,59337,59298,59376,70.63,70.51,70.76,84008,83978,84037,
2,CW,2021/03/26,59171,59160,59182,70.51,70.46,70.56,83919,83902,83936,
3,CW,2021/03/27,57814,57791,57837,69.05,68.98,69.13,83723,83705,83741,
4,CW,2021/03/28,56174,56155,56192,67.24,67.17,67.3,83546,83530,83562,


In [22]:
len(icu_beds)

1612

In [23]:
icu_beds['collection_date'] = pd.to_datetime(icu_beds['collection_date'], errors='coerce')

In [24]:
icu_beds.dtypes

state                                                              object
collection_date                                            datetime64[ns]
Staffed Adult ICU Beds Occupied Estimated                           int64
Count LL                                                            int64
Count UL                                                            int64
Percentage of Staffed Adult ICU Beds Occupied Estimated           float64
Percentage LL                                                     float64
Percentage UL                                                     float64
Total Staffed Adult ICU Beds                                        int64
Total LL                                                            int64
Total UL                                                            int64
geocoded_state                                                     object
dtype: object

# Merging data sets

### First, merging 3 healthdata.gov datasets

In [25]:
# merging all patients dataset and covid patients dataset 
mrd = pd.merge(patient_all, beds_covid, on = ['state', 'collection_date'])
mrd.head()

Unnamed: 0,state,collection_date,Inpatient Beds Occupied Estimated,Count LL_x,Count UL_x,Percentage of Inpatient Beds Occupied Estimated,Percentage LL_x,Percentage UL_x,Total Inpatient Beds_x,Total LL_x,...,Inpatient Beds Occupied by COVID-19 Patients Estimated,Count LL_y,Count UL_y,Percentage of Inpatient Beds Occupied by COVID-19 Patients Estimated,Percentage LL_y,Percentage UL_y,Total Inpatient Beds_y,Total LL_y,Total UL_y,geocoded_state_y
0,CW,2021-03-24,509974,509803,510145,72.61,72.53,72.69,702371,702160,...,39403,39375,39431,5.64,5.61,5.67,702371,702160,702582,
1,CW,2021-03-25,509613,509459,509766,72.5,72.43,72.58,702910,702673,...,39380,39329,39432,5.63,5.57,5.69,702910,702673,703148,
2,CW,2021-03-26,506032,505756,506307,72.14,72.03,72.24,701516,701329,...,39421,39400,39442,5.65,5.63,5.67,701516,701329,701702,
3,CW,2021-03-27,487746,487613,487879,69.78,69.73,69.84,698961,698798,...,38843,38754,38932,5.59,5.48,5.69,698961,698798,699124,
4,CW,2021-03-28,472836,472649,473023,68.02,67.94,68.1,695181,694996,...,39128,39108,39147,5.66,5.63,5.68,695181,694996,695366,


In [26]:
all_beds = pd.merge(mrd, icu_beds, on = ['state', 'collection_date'])
all_beds

Unnamed: 0,state,collection_date,Inpatient Beds Occupied Estimated,Count LL_x,Count UL_x,Percentage of Inpatient Beds Occupied Estimated,Percentage LL_x,Percentage UL_x,Total Inpatient Beds_x,Total LL_x,...,Staffed Adult ICU Beds Occupied Estimated,Count LL,Count UL,Percentage of Staffed Adult ICU Beds Occupied Estimated,Percentage LL,Percentage UL,Total Staffed Adult ICU Beds,Total LL,Total UL,geocoded_state
0,CW,2021-03-24,509974,509803,510145,72.61,72.53,72.69,702371,702160,...,59394,59378,59410,70.54,70.48,70.60,84199,84180,84218,
1,CW,2021-03-25,509613,509459,509766,72.50,72.43,72.58,702910,702673,...,59337,59298,59376,70.63,70.51,70.76,84008,83978,84037,
2,CW,2021-03-26,506032,505756,506307,72.14,72.03,72.24,701516,701329,...,59171,59160,59182,70.51,70.46,70.56,83919,83902,83936,
3,CW,2021-03-27,487746,487613,487879,69.78,69.73,69.84,698961,698798,...,57814,57791,57837,69.05,68.98,69.13,83723,83705,83741,
4,CW,2021-03-28,472836,472649,473023,68.02,67.94,68.10,695181,694996,...,56174,56155,56192,67.24,67.17,67.30,83546,83530,83562,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1607,WI,2021-04-19,7432,7426,7439,61.80,61.58,62.01,12027,12014,...,816,813,818,51.94,51.51,52.37,1570,1570,1570,POINT (-89.732969 44.639954)
1608,WI,2021-04-20,7864,7855,7873,65.47,65.22,65.72,12012,11999,...,887,886,888,56.14,55.96,56.31,1580,1580,1580,POINT (-89.732969 44.639954)
1609,WI,2021-04-21,7923,7913,7933,65.74,65.41,66.06,12053,12034,...,894,894,894,56.37,56.37,56.37,1586,1586,1586,POINT (-89.732969 44.639954)
1610,WI,2021-04-22,7849,7840,7858,65.25,65.25,65.25,12030,12030,...,907,907,907,57.08,57.08,57.08,1589,1589,1589,POINT (-89.732969 44.639954)


In [27]:
# changing the name of column collection_date to date like other dataset has
all_beds.rename(columns = {'collection_date':'date'}, inplace = True) 
all_beds.head()

Unnamed: 0,state,date,Inpatient Beds Occupied Estimated,Count LL_x,Count UL_x,Percentage of Inpatient Beds Occupied Estimated,Percentage LL_x,Percentage UL_x,Total Inpatient Beds_x,Total LL_x,...,Staffed Adult ICU Beds Occupied Estimated,Count LL,Count UL,Percentage of Staffed Adult ICU Beds Occupied Estimated,Percentage LL,Percentage UL,Total Staffed Adult ICU Beds,Total LL,Total UL,geocoded_state
0,CW,2021-03-24,509974,509803,510145,72.61,72.53,72.69,702371,702160,...,59394,59378,59410,70.54,70.48,70.6,84199,84180,84218,
1,CW,2021-03-25,509613,509459,509766,72.5,72.43,72.58,702910,702673,...,59337,59298,59376,70.63,70.51,70.76,84008,83978,84037,
2,CW,2021-03-26,506032,505756,506307,72.14,72.03,72.24,701516,701329,...,59171,59160,59182,70.51,70.46,70.56,83919,83902,83936,
3,CW,2021-03-27,487746,487613,487879,69.78,69.73,69.84,698961,698798,...,57814,57791,57837,69.05,68.98,69.13,83723,83705,83741,
4,CW,2021-03-28,472836,472649,473023,68.02,67.94,68.1,695181,694996,...,56174,56155,56192,67.24,67.17,67.3,83546,83530,83562,


### Remapping dataframe to translate US States to Two letter codes

In [28]:
us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'American Samoa': 'AS',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Guam': 'GU',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
}

In [29]:
new_york_data['state'] = new_york_data['state'].map(us_state_abbrev) 

In [30]:
print(new_york_data)

            date state  fips   cases  deaths
0     2020-01-21    WA    53       1       0
1     2020-01-22    WA    53       1       0
2     2020-01-23    WA    53       1       0
3     2020-01-24    IL    17       1       0
4     2020-01-24    WA    53       1       0
...          ...   ...   ...     ...     ...
23054 2021-04-25    VA    51  654210   10691
23055 2021-04-25    WA    53  397006    5478
23056 2021-04-25    WV    54  151382    2821
23057 2021-04-25    WI    55  656668    7473
23058 2021-04-25    WY    56   57696     705

[23059 rows x 5 columns]


### Final merge

In [31]:
final_dataset = pd.merge(new_york_data, all_beds, on = ['state', 'date'])
final_dataset

Unnamed: 0,date,state,fips,cases,deaths,Inpatient Beds Occupied Estimated,Count LL_x,Count UL_x,Percentage of Inpatient Beds Occupied Estimated,Percentage LL_x,...,Staffed Adult ICU Beds Occupied Estimated,Count LL,Count UL,Percentage of Staffed Adult ICU Beds Occupied Estimated,Percentage LL,Percentage UL,Total Staffed Adult ICU Beds,Total LL,Total UL,geocoded_state
0,2021-03-24,AL,1,512711,10487,9911,9911,9911,71.57,71.57,...,1263,1263,1263,79.43,79.43,79.43,1590,1590,1590,POINT (-86.844516 32.756889)
1,2021-03-24,AK,2,61411,298,898,898,898,60.43,60.43,...,96,96,96,75.59,75.59,75.59,127,127,127,POINT (-151.631889 63.631126)
2,2021-03-24,AZ,4,837849,16842,9628,9628,9628,68.21,68.21,...,1061,1061,1061,49.58,49.58,49.58,2140,2140,2140,POINT (-111.664616 34.293239)
3,2021-03-24,AR,5,329177,5560,5160,5160,5160,67.93,67.93,...,680,680,680,68.62,68.62,68.62,991,991,991,POINT (-92.439162 34.899825)
4,2021-03-24,CA,6,3650833,58160,44478,44354,44602,74.91,74.25,...,5014,5003,5025,71.66,71.18,72.14,6997,6984,7010,POINT (-119.662127 37.211164)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1576,2021-04-23,VA,51,652321,10666,11698,11401,11995,73.19,67.28,...,1339,1306,1372,70.55,64.38,76.72,1898,1841,1955,POINT (-78.666382 37.510861)
1577,2021-04-23,WA,53,395354,5477,9126,8296,9956,74.99,52.69,...,962,864,1060,75.87,50.32,101.42,1268,1134,1402,POINT (-120.592492 47.411715)
1578,2021-04-23,WV,54,150693,2813,3880,3880,3880,78.42,78.42,...,458,442,474,75.70,75.70,75.70,605,583,627,POINT (-80.613729 38.642579)
1579,2021-04-23,WI,55,655560,7452,7872,7273,8471,65.47,48.63,...,911,832,990,57.33,37.20,77.46,1589,1429,1749,POINT (-89.732969 44.639954)


## Changing LL and UL column names 

In [32]:
final_dataset.rename(columns = {'Count LL_x':'Count LL_inpatient beds occupied'}, inplace = True) 
final_dataset.rename(columns = {'Count UL_x':'Count UL_inpatient beds occupied'}, inplace = True)
final_dataset.rename(columns = {'Percentage LL_x':'Percentage LL_inpatient beds occupied'}, inplace = True)
final_dataset.rename(columns = {'Percentage UL_x':'Percentage UL_inpatient beds occupied'}, inplace = True)
final_dataset.rename(columns = {'Total LL_x':'Total LL_inpatient beds occupied'}, inplace = True)
final_dataset.rename(columns = {'Total UL_x':'Total UL_inpatient beds occupied'}, inplace = True)

In [33]:
final_dataset.rename(columns = {'Count LL_y':'Count LL_occupied by COVID-19 patients'}, inplace = True)
final_dataset.rename(columns = {'Count UL_y':'Count UL_occupied by COVID-19 patients'}, inplace = True)
final_dataset.rename(columns = {'Percentage LL_y':'Percentage LL_occupied by COVID-19 patients'}, inplace = True)
final_dataset.rename(columns = {'Percentage UL_y':'Percentage UL_occupied by COVID-19 patients'}, inplace = True)
final_dataset.rename(columns = {'Total LL_y':'Total LL_occupied by COVID-19 patients'}, inplace = True)
final_dataset.rename(columns = {'Total UL_y':'Total UL_occupied by COVID-19 patients'}, inplace = True)

In [34]:
final_dataset.rename(columns = {'Count LL':'Count LL_ICU Beds Occupied'}, inplace = True)
final_dataset.rename(columns = {'Count UL':'Count UL_ICU Beds Occupied'}, inplace = True)
final_dataset.rename(columns = {'Percentage LL':'Percentage LL_ICU Beds Occupied'}, inplace = True)
final_dataset.rename(columns = {'Percentage UL':'Percentage UL_ICU Beds Occupied'}, inplace = True)
final_dataset.rename(columns = {'Total LL':'Total LL_ICU Beds Occupied'}, inplace = True)
final_dataset.rename(columns = {'Total UL':'Total UL_ICU Beds Occupied'}, inplace = True)

## Final dataset column names 

In [35]:
# list of columns in final dataset
list(final_dataset.columns)

['date',
 'state',
 'fips',
 'cases',
 'deaths',
 'Inpatient Beds Occupied Estimated',
 'Count LL_inpatient beds occupied',
 'Count UL_inpatient beds occupied',
 'Percentage of Inpatient Beds Occupied Estimated',
 'Percentage LL_inpatient beds occupied',
 'Percentage UL_inpatient beds occupied',
 'Total Inpatient Beds_x',
 'Total LL_inpatient beds occupied',
 'Total UL_inpatient beds occupied',
 'geocoded_state_x',
 'Inpatient Beds Occupied by COVID-19 Patients Estimated',
 'Count LL_occupied by COVID-19 patients',
 'Count UL_occupied by COVID-19 patients',
 'Percentage of Inpatient Beds Occupied by COVID-19 Patients Estimated',
 'Percentage LL_occupied by COVID-19 patients',
 'Percentage UL_occupied by COVID-19 patients',
 'Total Inpatient Beds_y',
 'Total LL_occupied by COVID-19 patients',
 'Total UL_occupied by COVID-19 patients',
 'geocoded_state_y',
 'Staffed Adult ICU Beds Occupied Estimated',
 'Count LL_ICU Beds Occupied',
 'Count UL_ICU Beds Occupied',
 'Percentage of Staffed A

## Checking the final dataset

In [36]:
final_dataset

Unnamed: 0,date,state,fips,cases,deaths,Inpatient Beds Occupied Estimated,Count LL_inpatient beds occupied,Count UL_inpatient beds occupied,Percentage of Inpatient Beds Occupied Estimated,Percentage LL_inpatient beds occupied,...,Staffed Adult ICU Beds Occupied Estimated,Count LL_ICU Beds Occupied,Count UL_ICU Beds Occupied,Percentage of Staffed Adult ICU Beds Occupied Estimated,Percentage LL_ICU Beds Occupied,Percentage UL_ICU Beds Occupied,Total Staffed Adult ICU Beds,Total LL_ICU Beds Occupied,Total UL_ICU Beds Occupied,geocoded_state
0,2021-03-24,AL,1,512711,10487,9911,9911,9911,71.57,71.57,...,1263,1263,1263,79.43,79.43,79.43,1590,1590,1590,POINT (-86.844516 32.756889)
1,2021-03-24,AK,2,61411,298,898,898,898,60.43,60.43,...,96,96,96,75.59,75.59,75.59,127,127,127,POINT (-151.631889 63.631126)
2,2021-03-24,AZ,4,837849,16842,9628,9628,9628,68.21,68.21,...,1061,1061,1061,49.58,49.58,49.58,2140,2140,2140,POINT (-111.664616 34.293239)
3,2021-03-24,AR,5,329177,5560,5160,5160,5160,67.93,67.93,...,680,680,680,68.62,68.62,68.62,991,991,991,POINT (-92.439162 34.899825)
4,2021-03-24,CA,6,3650833,58160,44478,44354,44602,74.91,74.25,...,5014,5003,5025,71.66,71.18,72.14,6997,6984,7010,POINT (-119.662127 37.211164)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1576,2021-04-23,VA,51,652321,10666,11698,11401,11995,73.19,67.28,...,1339,1306,1372,70.55,64.38,76.72,1898,1841,1955,POINT (-78.666382 37.510861)
1577,2021-04-23,WA,53,395354,5477,9126,8296,9956,74.99,52.69,...,962,864,1060,75.87,50.32,101.42,1268,1134,1402,POINT (-120.592492 47.411715)
1578,2021-04-23,WV,54,150693,2813,3880,3880,3880,78.42,78.42,...,458,442,474,75.70,75.70,75.70,605,583,627,POINT (-80.613729 38.642579)
1579,2021-04-23,WI,55,655560,7452,7872,7273,8471,65.47,48.63,...,911,832,990,57.33,37.20,77.46,1589,1429,1749,POINT (-89.732969 44.639954)


### Export of the files 

In [42]:
#final_dataset.to_csv('final_covid.csv')
#final_dataset.to_json('file.json', orient = 'split', compression = 'infer', index = 'true') 
# https://towardsdatascience.com/automatically-update-data-sources-in-python-e424dbea68d0

### Reference:
* Data Import (https://www.datacamp.com/community/tutorials/importing-data-into-pandas)
* Dict map (https://gist.github.com/rogerallen/1583593)
* Pandas Indexing (https://www.shanelynn.ie/merge-join-dataframes-python-pandas-index-1/)
* Remap (https://www.geeksforgeeks.org/using-dictionary-to-remap-values-in-pandas-dataframe-columns/)
* NYT data (https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv)
* Healthdata.gov data (https://healthdata.gov/dataset/covid-19-estimated-patient-impact-and-hospital-capacity-state)