In [1]:
import pandas as pd
import datetime

In [2]:
deaths_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv'

In [3]:
raw_data_death = pd.read_csv(deaths_url)

In [4]:
raw_data_death.head()

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,...,5/4/20,5/5/20,5/6/20,5/7/20,5/8/20,5/9/20,5/10/20,5/11/20,5/12/20,5/13/20
0,16,AS,ASM,16,60.0,,American Samoa,US,-14.271,-170.132,...,0,0,0,0,0,0,0,0,0,0
1,316,GU,GUM,316,66.0,,Guam,US,13.4443,144.7937,...,5,5,5,5,5,5,5,5,5,5
2,580,MP,MNP,580,69.0,,Northern Mariana Islands,US,15.0979,145.6739,...,2,2,2,2,2,2,2,2,2,2
3,630,PR,PRI,630,72.0,,Puerto Rico,US,18.2208,-66.5901,...,97,99,99,102,107,108,111,113,114,115
4,850,VI,VIR,850,78.0,,Virgin Islands,US,18.3358,-64.8963,...,4,4,4,4,4,4,4,5,6,6


In [9]:
# rename columns
def covid_rename_columns(input_data):
    output_data = input_data.rename(
                              columns = {'Province_State':'subregion'
                                         ,'Country_Region':'country'
                                         ,'Lat':'lat'
                                         ,'Long_':'long'
                                         }
                              )
    return(output_data)

In [10]:
# replace na with empty string for subregion column
def covid_fill_missing(input_data):
    output_data = input_data.fillna(value = {'subregion':''})
    return(output_data)

In [11]:
deaths = covid_rename_columns(raw_data_death)

In [12]:
deaths.head()

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,subregion,country,lat,long,...,5/4/20,5/5/20,5/6/20,5/7/20,5/8/20,5/9/20,5/10/20,5/11/20,5/12/20,5/13/20
0,16,AS,ASM,16,60.0,,American Samoa,US,-14.271,-170.132,...,0,0,0,0,0,0,0,0,0,0
1,316,GU,GUM,316,66.0,,Guam,US,13.4443,144.7937,...,5,5,5,5,5,5,5,5,5,5
2,580,MP,MNP,580,69.0,,Northern Mariana Islands,US,15.0979,145.6739,...,2,2,2,2,2,2,2,2,2,2
3,630,PR,PRI,630,72.0,,Puerto Rico,US,18.2208,-66.5901,...,97,99,99,102,107,108,111,113,114,115
4,850,VI,VIR,850,78.0,,Virgin Islands,US,18.3358,-64.8963,...,4,4,4,4,4,4,4,5,6,6


In [13]:
# create function to reshape data like we did for confirmed cases
def covid_melt_data(input_data, value_var_name):
    output_data = input_data.melt(id_vars = ['country','subregion','lat','long']
                                  ,var_name = 'date_RAW'
                                  ,value_name = value_var_name
                                  )
    return(output_data)

In [14]:
# clean up how we dealt with dates
def covid_convert_dates(input_data):
    output_data = input_data.assign(
         date = pd.to_datetime(input_data.date_RAW, format='%m/%d/%y')
         )
    output_data.drop(columns = ['date_RAW'], inplace = True)
    return(output_data)

In [15]:
# only keep columns we want, sort
def covid_rearrange_data(input_data,value_var_name):
    output_data = (input_data
                   .filter(['country', 'subregion', 'date', 'lat', 'long', value_var_name])
                   .sort_values(['country','subregion','date','lat','long'])
                   .reset_index(drop = True)
                   )
    return(output_data)

In [None]:
# def covid_get_data(input_url, value_var_name):
#     covid_data_inprocess = pd.read_csv(input_url)
#     covid_data_inprocess = covid_rename_columns(covid_data_inprocess)
#     covid_data_inprocess = covid_fill_missing(covid_data_inprocess)
#     covid_data_inprocess = covid_melt_data(covid_data_inprocess,value_var_name)
#     covid_data_inprocess = covid_convert_dates(covid_data_inprocess)
#     covid_data_inprocess = covid_rearrange_data(covid_data_inprocess, value_var_name)
#     return(covid_data_inprocess)