In [16]:
import pandas as pd, pickle, numpy as np, matplotlib.pyplot as plt, matplotlib.dates as mdates, matplotlib as mpl
import matplotlib.lines as mlines

In [17]:
#with open('covid_data.p','rb') as f:
#    data = pickle.load(f)

MemoryError: 

In [3]:
def createMobilityChange(df, cols_to_average, rolling_ave, new_col_name):
    for col in cols_to_average:
        df.loc[:, col] = df[col].astype(float)
    df[new_col_name] = df[cols_to_average].mean(axis=1).rolling(window=7).mean()
    return df

In [4]:
# Create the mobility change column as the average of retail and work spaces
cols_to_average = ['retail_and_recreation_percent_change_from_baseline_goog_cnty',
                   'workplaces_percent_change_from_baseline_goog_cnty']
data = createMobilityChange(data, cols_to_average, 14, 'mobility_change_cnty')

In [5]:
def addFeatures(df):
    df['pop_density'] = df['pop_total'] / df['amount_land']
    df['deaths_per_100k'] = df['deaths_cdc'] / df['pop_total'] * 100000
    df['pir_200_plus_pct'] = df['pir_200_plus'] / df['pir_total']
    df['pir_150_plus_pct'] = (df['pir_150_184'] + df['pir_185_199'] + df['pir_200_plus']) / df['pir_total']
    df['unins_pct'] = (df['unins_0_18'] + df['unins_18_34'] + df['unins_35_64'] + df['unins_65_plus'])/df['unins_total']
    df['hi_75_plus_pct'] = (df['hi_75_99'] + df['hi_100_124'] + df['hi_125_149'] + df['hi_150_199']+ df['hi_200_plus'])/df['hi_total']
    df['hi_100_plus_pct'] = (df['hi_100_124'] + df['hi_125_149'] + df['hi_150_199']+ df['hi_200_plus'])/df['hi_total']
    df['hi_150_plus_pct'] = (df['hi_125_149'] + df['hi_150_199']+ df['hi_200_plus'])/df['hi_total']
    df['e_bach_plus_pct'] = (df['e_bach'] + df['e_mast'] + df['e_prof'] + df['e_doct'])/df['e_total']
    df['e_mast_plus_pct'] = (df['e_mast'] + df['e_prof'] + df['e_doct'])/df['e_total']
    df['r_white_pct'] = df['r_white'] / df['r_total']
    df['r_black_pct'] = df['r_black'] / df['r_total']
    df['r_native_pct'] = df['r_native'] / df['r_total']
    df['r_asian_pct'] = df['r_asian'] / df['r_total']
    df['age_55_plus_pct'] = (
        (df['m_55_59']+df['m_60_61']+df['m_62_64']+df['m_65_66']+df['m_67_69']+df['m_70_74']
         +df['m_75_79']+df['m_80_84']+df['m_85_110']) +
         (df['f_55_59']+df['f_60_61']+df['f_62_64']+df['f_65_66']+df['f_67_69']+df['f_70_74']
         +df['f_75_79']+df['f_80_84']+df['f_85_110']))/ (df['m_total'] + df['f_total'])
        
    return df

In [6]:
def addPercentiles(df, percentile_cols):
    data = df
    data_list = []
    states = list(df.state_code.unique())
    for state in states:
        cols = []
        df = addFeatures(data)
        df = df[df.state_code == state]
        df = df.dropna(subset=['pop_total']).drop_duplicates(subset=['state_code','fips_code']).drop(columns=['date'])
        for percentile_col in percentile_cols:
            df = df.sort_values(by=[percentile_col])
            df['cum_pop_total'] = df['pop_total'].cumsum()
            df[percentile_col + '_percentile'] = df['cum_pop_total'] / df['pop_total'].sum()
        cols = [col + '_percentile' for col in percentile_cols]
        cols = ['state_code','fips_code'] + cols
        df = df[cols]
        data_list.append(df)
    df = pd.concat(data_list)
    data = pd.merge(data, df, how='left', left_on=['state_code','fips_code'], right_on=['state_code','fips_code'])
    return data

In [7]:
# Attach percentile columns
percentile_cols = ['p_c_i', 'pop_density','pir_200_plus_pct', 'pir_150_plus_pct', 'unins_pct',
                  'hi_75_plus_pct', 'hi_100_plus_pct', 'hi_150_plus_pct', 'e_bach_plus_pct', 'e_mast_plus_pct',
                   'r_white_pct','r_black_pct', 'r_native_pct', 'r_asian_pct','age_55_plus_pct']
data = addPercentiles(data, percentile_cols)

# Add low and high percentile columns
data['pir_grp'] = [0 if x < 0.5 else 1 for x in data['pir_200_plus_pct_percentile']]
data['unins_grp'] = [0 if x < 0.5 else 1 for x in data['unins_pct_percentile']]
data['pop_density_grp'] = [0 if x < 0.5 else 1 for x in data['pop_density_percentile']] 
data['e_grp'] = [0 if x < 0.5 else 1 for x in data['e_bach_plus_pct_percentile']] 
data['r_b_grp'] = [0 if x < 0.5 else 1 for x in data['r_black_pct_percentile']] 
data['r_w_grp'] = [0 if x < 0.5 else 1 for x in data['r_white_pct_percentile']]
data['age_55_plus_grp'] = [0 if x < 0.5 else 1 for x in data['age_55_plus_pct_percentile']]


In [8]:
# Get max death count in each county
def getDeathCounties(df, state, death_col, death_min = 100):
    df = df[df.state_code == state]
    tmp = df.groupby(['fips_code','county_name'])[[death_col]].agg('max').reset_index().sort_values(by='deaths_cdc',ascending=False)
    tmp = tmp[tmp[death_col]>=death_min]
    tmp = tmp.sort_values(by=death_col, ascending=False)
    return list(tmp['fips_code']), list(tmp['county_name'])

In [9]:
def plotCountyDeathCurves(df, state='NY', plot_col = 'deaths_per_100k',
                          death_min_col = 'deaths_cdc', death_min = 100, rolling_ave = 14,
                         start_date = '2020-03-01'):
    
    df = df[df.state_code == state]
    df = df[~df.pop_total.isnull()]
    fips_codes, county_names = getDeathCounties(df, state, death_min_col, death_min)
    
    fig = plt.figure(constrained_layout=True, figsize=(15,5))
    gs = fig.add_gridspec(1,2)  
    
    ### PLOT 1 -------------------

    # Set up subplot, axis and titles
    ax = fig.add_subplot(gs[0,0])
    ax.set_title('Total Deaths (Counties > ' + str(death_min) + ' deaths)')
    ax.set_xlabel('Date')
    ax.set_ylabel('Total Deaths per 100k')
    ax.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}'))

    legend_lines = []
    i = 0
    for fips_code, county_name in zip(fips_codes, county_names):
        # Data manipulation
        tmp = df[df.fips_code == fips_code]
        tmp = tmp.groupby(['date'])[plot_col].agg('sum').reset_index()
        tmp = tmp[tmp['date'] >= start_date]
        # Plot 
        ax.plot(tmp['date'], tmp[plot_col], color='C'+str(i))
        legend_line = mlines.Line2D([],[], color='C'+str(i), label=county_name)
        legend_lines.append(legend_line)
        i+=1

    # Fix the date labels for both plots
    ax.xaxis.set_major_locator(mdates.MonthLocator())
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
    ax.xaxis.set_minor_locator(mdates.DayLocator())

    # Put on the legend
    plt.legend(handles=legend_lines, loc='upper left', title = 'County')
    
    
    
    ### PLOT 2 -------------------

    # Set up subplot, axis and titles
    ax = fig.add_subplot(gs[0,1])
    ax.set_title(str(rolling_ave)+' Day Rolling Average Daily Deaths (Counties > ' + str(death_min) + ' deaths)')
    ax.set_xlabel('Date')
    ax.set_ylabel('Ave Daily Deaths per 100k')
    ax.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.1f}'))
    
    legend_lines = []
    i = 0
    for fips_code, county_name in zip(fips_codes, county_names):
        # Data manipulation
        tmp = df[df.fips_code == fips_code]
        tmp = tmp.groupby(['date'])[plot_col].agg('sum').reset_index()
        tmp[plot_col] = tmp[plot_col].diff().rolling(window=rolling_ave).mean()
        tmp = tmp[tmp.date >= start_date]
        # Plot 
        ax.plot(tmp['date'], tmp[plot_col], color='C'+str(i))
        legend_line = mlines.Line2D([],[], color='C'+str(i), label=county_name)
        legend_lines.append(legend_line)
        i+=1

    # Fix the date labels for both plots
    ax.xaxis.set_major_locator(mdates.MonthLocator())
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
    ax.xaxis.set_minor_locator(mdates.DayLocator())

    # Put on the legend
    plt.legend(handles=legend_lines, loc='upper left', title='County')

In [10]:
def plotGroupedDeathCurves(df, states=['NY'], plot_col = 'deaths_per_100k',
                          death_min_col = 'deaths_cdc', death_min = 0, rolling_ave = 14, bycol='pir_grp',
                          start_date = '2020-03-01'):
    
    df = df[df.state_code.isin(states)]
    df = df[~df.pop_total.isnull()]
    df = df[df[death_min_col]>=death_min]
    #fips_codes, county_names = getDeathCounties(df, state, death_min_col, death_min)
    
    fig = plt.figure(constrained_layout=True, figsize=(15,5))
    gs = fig.add_gridspec(1,2)
    
    
    ### PLOT 1 -------------------

    # Set up subplot, axis and titles
    ax = fig.add_subplot(gs[0,0])
    ax.set_title('Total Deaths (Counties > ' + str(death_min) + ' deaths)')
    ax.set_xlabel('Date')
    ax.set_ylabel('Total Deaths per 100k')
    ax.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}'))

    legend_lines = []
    labels = ["Low","High"]
    i = 0
    for i in range(2):
        # Data manipulation
        tmp = df[df[bycol]==i]
        tmp = tmp.groupby(['date'])[['pop_total','deaths_cdc']].agg('sum').reset_index()
        tmp[plot_col] = tmp['deaths_cdc'] / tmp['pop_total'] * 100000
        tmp = tmp[tmp['date'] >= start_date]
        # Plot 
        ax.plot(tmp['date'], tmp[plot_col], color='C'+str(i))
        legend_line = mlines.Line2D([],[], color='C'+str(i), label=labels[i])
        legend_lines.append(legend_line)
        i+=1

    # Fix the date labels for both plots
    ax.xaxis.set_major_locator(mdates.MonthLocator())
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
    ax.xaxis.set_minor_locator(mdates.DayLocator())

    # Put on the legend
    plt.legend(handles=legend_lines, loc='upper left', title=bycol)
    
    ### PLOT 2 -------------------

    # Set up subplot, axis and titles
    ax = fig.add_subplot(gs[0,1])
    ax.set_title(str(rolling_ave)+' Day Rolling Average Daily Deaths (Counties > ' + str(death_min) + ' deaths)')
    ax.set_xlabel('Date')
    ax.set_ylabel('Daily Deaths per 100k')
    ax.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.1f}'))

    legend_lines = []
    labels = ["Low","High"]
    i = 0
    for i in range(2):
        # Data manipulation
        tmp = df[df[bycol]==i]
        tmp = tmp.groupby(['date'])[['pop_total','deaths_cdc']].agg('sum').reset_index()
        tmp[plot_col] = tmp['deaths_cdc'] / tmp['pop_total'] * 100000
        tmp[plot_col] = tmp[plot_col].diff().rolling(window=rolling_ave).mean()
        tmp = tmp[tmp['date'] >= start_date]
        # Plot 
        ax.plot(tmp['date'], tmp[plot_col], color='C'+str(i))
        legend_line = mlines.Line2D([],[], color='C'+str(i), label=labels[i])
        legend_lines.append(legend_line)
        i+=1

    # Fix the date labels for both plots
    ax.xaxis.set_major_locator(mdates.MonthLocator())
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
    ax.xaxis.set_minor_locator(mdates.DayLocator())

    # Put on the legend
    plt.legend(handles=legend_lines, loc='upper left', title=bycol)

In [11]:
def plotInteractions(df, x_col, y_col, z_col, states):

    df = df[df.deaths_per_100k > 0]
    df = df[df.state_code.isin(states)]

    fig = plt.figure(constrained_layout=True, figsize=(10,10))
    gs = fig.add_gridspec(2,2)

    # Create plots
    ax1 = fig.add_subplot(gs[0,0])
    ax1.set_title('')
    ax1.set_xlabel(x_col)
    ax1.set_ylabel(z_col)
    ax1.scatter(df[x_col], df[z_col], c=df['date'], s=10)

    ax2 = fig.add_subplot(gs[0,1])
    ax2.set_title('')
    ax2.set_xlabel(y_col)
    ax2.set_ylabel(z_col)
    ax2.scatter(df[y_col], df[z_col], c=df['date'], s=10)

    ax3 = fig.add_subplot(gs[1,0])
    ax3.set_title('')
    ax3.set_xlabel(x_col)
    ax3.set_ylabel(y_col)
    df2 = df[df.date == df.date.max()]
    ax3.scatter(df2[x_col], df2[y_col], c=df2[z_col], cmap='Reds', s=10)


    ax4 = fig.add_subplot(gs[1,1], projection='3d')
    ax4.set_xlabel(x_col)
    ax4.set_ylabel(y_col)
    ax4.set_zlabel(z_col)
    ax4.scatter(df[x_col], df[y_col],df[z_col],c=df['date'], s=10)
    ax4.set_zlim(bottom=0)
    ax4.view_init(10,20+180)

In [12]:
def stateGraphs(df, states, case_col, death_col, start_date, rolling_ave):

    fig = plt.figure(constrained_layout=True, figsize=(12,6))
    gs = fig.add_gridspec(2,2)
    
    # Data manipulation
    df = df[df.state_code.isin(states)]
    df = df.groupby(['date'])[[case_col, death_col, 'pop_total']].agg('sum').reset_index()
    df[case_col+'_rolling'] = df[case_col].diff().rolling(window=rolling_ave).mean()
    df[death_col+'_rolling'] = df[death_col].diff().rolling(window=rolling_ave).mean()
    df['cases_per_100k'] = df[case_col] / df['pop_total'] * 100000
    df['deaths_per_100k'] = df[death_col] / df['pop_total'] * 100000
    df['cases_per_100k_rolling'] = df['cases_per_100k'].diff().rolling(window=rolling_ave).mean()
    df['deaths_per_100k_rolling'] = df['deaths_per_100k'].diff().rolling(window=rolling_ave).mean()
    df = df[df['date']>=start_date]


    ### PLOT 1 -------------------

    # Set up subplot, axis and titles
    ax1 = fig.add_subplot(gs[0,0])
    ax1.set_title('Total Cases and Deaths')
    ax1.set_xlabel('Date')
    ax1.set_ylabel('Cases')
    ax1.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}'))

    # Plot first axis
    ax1.plot(df["date"], df[case_col], color='blue')

    # Set second axis on same subplot
    ax2 = ax1.twinx()
    ax2.set_ylabel('Deaths')
    ax2.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}'))

    # Plot second axis
    ax2.plot(df["date"], df[death_col], color='red')

    # Fix the date labels for both plots
    ax2.xaxis.set_major_locator(mdates.MonthLocator())
    ax2.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
    ax2.xaxis.set_minor_locator(mdates.DayLocator())

    # Put on the legend
    red_line = mlines.Line2D([],[], color='blue', label='Cases')
    blue_line = mlines.Line2D([],[], color='red', label='Deaths')
    plt.legend(handles=[red_line, blue_line], loc='upper left')

    
    ### PLOT 2 -------------------

    # Set up subplot, axis and titles
    ax1 = fig.add_subplot(gs[0,1])
    ax1.set_title('Ave. Daily Cases and Deaths')
    ax1.set_xlabel('Date')
    ax1.set_ylabel('Cases')
    ax1.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}'))
    
    # Plot first axis
    ax1.plot(df["date"], df[case_col+'_rolling'], color='blue')

    # Set second axis on same subplot
    ax2 = ax1.twinx()
    ax2.set_ylabel('Deaths')
    ax2.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}'))

    # Plot second axis
    ax2.plot(df["date"], df[death_col+'_rolling'], color='red')

    # Fix the date labels for both plots
    ax2.xaxis.set_major_locator(mdates.MonthLocator())
    ax2.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
    ax2.xaxis.set_minor_locator(mdates.DayLocator())

    # Add legend
    red_line = mlines.Line2D([],[], color='blue', label='Cases')
    blue_line = mlines.Line2D([],[], color='red', label='Deaths')
    plt.legend(handles=[red_line, blue_line], loc='upper left')


    ### PLOT 3 -------------------

    # Set up subplot, axis and titles
    ax1 = fig.add_subplot(gs[1,0])
    ax1.set_title('Total Cases and Deaths per 100k')
    ax1.set_xlabel('Date')
    ax1.set_ylabel('Total Cases per 100k')
    ax1.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}'))
    
    # Plot first axis
    ax1.plot(df['date'], df['cases_per_100k'], color='blue')

    # Set second axis on same subplot
    ax2 = ax1.twinx()
    ax2.set_ylabel('Total Deaths per 100k')
    ax2.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}'))

    # Plot second axis
    ax2.plot(df['date'], df['deaths_per_100k'], color='red')

    # Format date axis
    ax2.xaxis.set_major_locator(mdates.MonthLocator())
    ax2.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
    ax2.xaxis.set_minor_locator(mdates.DayLocator())

    # Add legend
    red_line = mlines.Line2D([],[], color='blue', label='Cases')
    blue_line = mlines.Line2D([],[], color='red', label='Deaths')
    plt.legend(handles=[red_line, blue_line], loc='upper left')


    ### PLOT 4 -------------------

    # Set up subplot, axis and titles
    ax1 = fig.add_subplot(gs[1,1])
    ax1.set_title('Ave Daily Casese and Deaths per 100k')
    ax1.set_xlabel('Date')
    ax1.set_ylabel('Total')
    ax1.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.2f}'))
    
    # Plot first axis
    ax1.plot(df['date'], df['cases_per_100k_rolling'], color='blue')

    # Set second axis on same subplot
    ax2 = ax1.twinx()
    ax2.set_ylabel('Ave Daily')
    ax2.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.2f}'))

    # Plot second axis
    ax2.plot(df['date'], df['deaths_per_100k_rolling'], color='red')

    # Format date axis
    ax2.xaxis.set_major_locator(mdates.MonthLocator())
    ax2.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
    ax2.xaxis.set_minor_locator(mdates.DayLocator())

    # Add legend
    red_line = mlines.Line2D([],[], color='blue', label='Cases')
    blue_line = mlines.Line2D([],[], color='red', label='Deaths')
    plt.legend(handles=[red_line, blue_line], loc='upper left')

In [13]:
def statesCompare(df, states, case_col, death_col, start_date, rolling_ave):
    
    fig = plt.figure(constrained_layout=True, figsize=(12,6))
    gs = fig.add_gridspec(2,2)
    
    df = df.groupby(['state_code','date'])[[case_col, death_col, 'pop_total']].agg('sum').reset_index()
 
    ### PLOT 1 -------------------
    ## Cumulative Cases and Deaths

    # Set up subplot, axis and titles
    ax1 = fig.add_subplot(gs[0,0])
    ax1.set_title('Cumulative Deaths')
    ax1.set_xlabel('Date')
    ax1.set_ylabel('Deaths')
    ax1.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}'))

    legend_lines = []
    i = 0
    for state in states:
        tmp = df[df.state_code == state].sort_values(by='date')
        tmp = tmp[tmp.date >= start_date]
        
        # Plot 
        ax1.plot(tmp["date"], tmp[death_col], color='C'+str(i))
        legend_line = mlines.Line2D([],[], color='C'+str(i), label=state)
        legend_lines.append(legend_line)
        i+=1

    # Fix the date labels for both plots
    ax1.xaxis.set_major_locator(mdates.MonthLocator())
    ax1.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
    ax1.xaxis.set_minor_locator(mdates.DayLocator())

    # Put on the legend
    plt.legend(handles=legend_lines, loc='upper left')



    ### PLOT 2 -------------------
    ## Average Daily Deaths

    # Set up subplot, axis and titles
    ax1 = fig.add_subplot(gs[0,1])
    ax1.set_title('Average Daily Deaths')
    ax1.set_xlabel('Date')
    ax1.set_ylabel('Deaths')
    ax1.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}'))

    #ax1.plot(df["date"],  pd.DataFrame(0, index=np.arange(len(df)), columns=['test']), color='white')

    legend_lines = []
    i = 0
    for state in states:
        # Data manipulation
        tmp = df[df.state_code == state].sort_values(by='date')
        tmp[death_col] = tmp[death_col].diff().rolling(window=rolling_ave).mean() # ROLLING AVE IS HERE
        tmp = tmp[tmp.date >= start_date]
    
        # Plot 
        ax1.plot(tmp["date"], tmp[death_col], color='C'+str(i))
        legend_line = mlines.Line2D([],[], color='C'+str(i), label=state)
        legend_lines.append(legend_line)
        i+=1

    # Fix the date labels for both plots
    ax1.xaxis.set_major_locator(mdates.MonthLocator())
    ax1.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
    ax1.xaxis.set_minor_locator(mdates.DayLocator())

    # Put on the legend
    plt.legend(handles=legend_lines, loc='upper left')



    ### PLOT 3 -------------------
    ## Ave daily death per 100k

    # Set up subplot, axis and titles
    ax1 = fig.add_subplot(gs[1,1])
    ax1.set_title('Average Daily Deaths per 100k')
    ax1.set_xlabel('Date')
    ax1.set_ylabel('Deaths')
    ax1.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.1f}'))

    #ax1.plot(df["date"],  pd.DataFrame(0, index=np.arange(len(df)), columns=['test']), color='white')

    legend_lines = []
    i = 0
    for state in states:
        # Data manipulation
        tmp = df[df.state_code == state].sort_values(by='date')
        tmp['death_per_100k'] = ((tmp[death_col]/tmp['pop_total'])*100000).diff().rolling(window=rolling_ave).mean() # ROLLING AVE IS HERE
        tmp = tmp[tmp.date >= start_date]
        # Plot
        ax1.plot(tmp["date"], tmp['death_per_100k'], color='C'+str(i))
        legend_line = mlines.Line2D([],[], color='C'+str(i), label=state)
        legend_lines.append(legend_line)
        i+=1

    # Fix the date labels for both plots
    ax1.xaxis.set_major_locator(mdates.MonthLocator())
    ax1.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
    ax1.xaxis.set_minor_locator(mdates.DayLocator())

    # Put on the legend
    plt.legend(handles=legend_lines, loc='upper left')


    ### PLOT 4 -------------------
    ## Total death per 100k

    # Set up subplot, axis and titles
    ax1 = fig.add_subplot(gs[1,0])
    ax1.set_title('Total Deaths per 100k')
    ax1.set_xlabel('Date')
    ax1.set_ylabel('Deaths')
    ax1.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.1f}'))

    ax1.plot(df["date"],  pd.DataFrame(0, index=np.arange(len(df)), columns=['test']), color='white')

    legend_lines = []
    i = 0
    for state in states:
        # Data manipulation
        tmp = df[df.state_code == state].sort_values(by='date')
        tmp['death_per_100k'] = tmp[death_col]/tmp['pop_total']*100000
        tmp = tmp[tmp.date >= start_date]
        # Plot 
        ax1.plot(tmp["date"], tmp['death_per_100k'], color='C'+str(i))
        legend_line = mlines.Line2D([],[], color='C'+str(i), label=state)
        legend_lines.append(legend_line)
        i+=1

    # Fix the date labels for both plots
    ax1.xaxis.set_major_locator(mdates.MonthLocator())
    ax1.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
    ax1.xaxis.set_minor_locator(mdates.DayLocator())

    # Put on the legend
    plt.legend(handles=legend_lines, loc='upper left')

In [14]:
def graphMobility(df, states, start_date, rolling_ave):
    fig = plt.figure(constrained_layout=True, figsize=(8,5))
    gs = fig.add_gridspec(1,1)

    df = data
    col = df[["retail_and_recreation_percent_change_from_baseline_goog_st",
                    "workplaces_percent_change_from_baseline_goog_st"]].apply(pd.to_numeric)
    df['mobility_change'] = col.mean(axis=1)
    df = df.dropna(subset=['mobility_change'])
    df = df.drop_duplicates(subset=['date','state_code'])

    # Set up subplot, axis and titles
    ax1 = fig.add_subplot(gs[0,0])
    ax1.set_title('Change in Mobility')
    ax1.set_xlabel('Date')
    ax1.set_ylabel('Change in Mobility')
    ax1.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}'))


    legend_lines = []
    i = 0
    for state in states:
        # Data manipulation
        tmp = df[df['state_code']==state].sort_values(by=['date'])
        tmp['mobility_change'] = tmp['mobility_change'].rolling(window=rolling_ave).mean()
        tmp = tmp[tmp.date >= start_date]

        # Plot 
        ax1.plot(tmp.date, tmp['mobility_change'], color='C'+str(i))
        legend_line = mlines.Line2D([],[], color='C'+str(i), label=state)
        legend_lines.append(legend_line)
        i+=1

    # Fix the date labels for both plots
    ax1.xaxis.set_major_locator(mdates.MonthLocator())
    ax1.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
    ax1.xaxis.set_minor_locator(mdates.DayLocator())

    # Put on the legend
    plt.legend(handles=legend_lines, loc='lower left')

In [15]:
def plotStateTotals(df, sum_col, plot_col, min_deaths):
    df = df[df['date'] == df.date.max()]
    df = df[df[sum_col]>=min_deaths]
    df = df.groupby(['state_code'])[
        [sum_col, 'pop_total']].agg('sum').sort_values(by=[sum_col], ascending=False).reset_index()
    df[sum_col+'_per_100k'] = df[sum_col] * 100000 / df['pop_total']
    
    plt.figure(figsize=(15,6))
    plt.bar(df.index, df[plot_col], width=0.75)
    plt.xticks(df.index,df.state_code)
    plt.ylabel(plot_col)
    plt.show()