There are 3 different additional datasets other than the unemployment data that's being used here:
- Local County health outcomes (https://catalog.data.gov/dataset/places-local-data-for-better-health-place-data-2020-release)
- Vaccine hesitancy and social vulnerability indices(https://data.cdc.gov/Vaccinations/Social-Vulnerability-Index/ypqf-r5qs)
- Vaccine Hesitancy

In [None]:
import pandas as pd
import numpy as np
import os
from datetime import datetime as dt
from toolz import thread_first, take
import matplotlib.pyplot as plt
%matplotlib inline
pd.options.display.max_columns=100
pd.options.display.max_rows=500

pd.options.display.width = 200

def printer(data, sample=True, n=5, name=None, pprint=False):
    if name:
        print(name, data.shape)
    else:
        print(data.shape)
    print('-'*10)
    
    d = data.sample(min(n, data.shape[0])) if sample else data.head(n)
    if pprint:
        print(d)
    return d

In [None]:
unemploy = pd.read_csv('https://www.bls.gov/web/metro/laucntycur14.txt', 
            skiprows=5, 
            sep='|', 
            header=0, 
            names=['area_code', 'fips_state_code', 'fips_county_code', 'county', 'month_year', 'civilian_labor_force', 'employed', 'unemployed_level', 'unemployed_rate'],
            nrows=45066,
            na_values=['-'],
            dtype={'fips_state_code': str, 'fips_county_code': str})

for col in unemploy:
    if unemploy[col].dtype in ('str', 'object'):
        unemploy[col] = unemploy[col].str.strip()

unemploy[['county', 'state']] = pd.DataFrame(unemploy['county'].str.split(', ').tolist(), columns=['county', 'state'])
unemploy['month_year'] = pd.to_datetime(unemploy['month_year'].str.replace('\(p\)', ''), format='%b-%y')
unemploy['fips'] = unemploy['fips_state_code'] + unemploy['fips_county_code']
printer(unemploy)

In [None]:
covid_counties = pd.read_csv('inputs/us-counties.csv', dtype={'fips': 'str'})
covid_counties['month_year'] = pd.to_datetime(covid_counties['date']).to_numpy().astype('datetime64[M]')
covid_counties = covid_counties.groupby(['fips', 'county', 'month_year'], as_index=False).agg({'cases': 'last', 'deaths': 'last'})
covid_counties['case_diff'] = covid_counties['cases'] - covid_counties.groupby('fips')['cases'].transform('shift').fillna(0)
covid_counties['death_diff'] = covid_counties['deaths'] - covid_counties.groupby('fips')['deaths'].transform('shift').fillna(0)
printer(covid_counties, sample=False, n=20)

In [None]:
merged = unemploy[['fips', 'month_year', 'county', 'civilian_labor_force', 'employed', 'unemployed_level', 'unemployed_rate', 'state']]\
.merge(covid_counties, how='outer', on=['fips', 'month_year'], indicator=True, suffixes=['_full', ''])
merged

In [None]:
rand = np.random.choice(merged.fips.unique())
name = merged.loc[merged.fips == rand].county.iloc[0] + ', ' + merged.loc[merged.fips == rand].state.iloc[0]
merged.loc[(merged.fips == rand)]\
.assign(unemployed_count = lambda x: pd.to_numeric(x.unemployed_level.str.replace(',', '')))\
.plot(x='month_year', y=['unemployed_count', 'case_diff'], kind='line', figsize=(20, 10), title=name)

In [None]:
rename = {
    'Binge drinking among adults aged >=18 years': 'binge_drinking_pct',
    'Current lack of health insurance among adults aged 18-64 years': 'lack_health_insurance_pct', 
    'Mental health not good for >=14 days among adults aged >=18 years': 'poor_mental_health_pct', 
    'No leisure-time physical activity among adults aged >=18 years': 'no_leisure_time_pct', 
    'Sleeping less than 7 hours among adults aged >=18 years': 'bad_sleep_pct'
}

outcomes = pd.read_csv('inputs/places-local-outcomes.csv')
outcomes = outcomes.loc[outcomes.LocationName.notnull()]
keep = ['Mental Health', 'Sleep <7 hours', 'Physical Inactivity', 'Health Insurance', 'Binge Drinking']
outcomes = outcomes.loc[outcomes.Short_Question_Text.isin(keep) & (outcomes.Year == 2018)]\
.rename(columns={'StateAbbr': 'state', 'LocationName': 'county', 'Measure': 'measure_name', 'Data_Value': 'value'})\
.pivot_table(index=['state', 'county'], columns=['measure_name'], values='value')\
.reset_index()\
.rename(columns=rename)
outcomes['county'] = np.where(outcomes['state'] == 'LA', outcomes['county'] + ' Parish', outcomes['county'])
outcomes['county'] = np.where(outcomes['state'] == 'VA', outcomes['county'] + ' City', outcomes['county'])
outcomes['county'] = outcomes['county'].str.replace(' city City', ' City').str.replace('City City', 'City')
outcomes['county'] = np.where(outcomes['state'] == 'AK', outcomes['county'].apply(lambda x: x.split(' ')[0]), outcomes['county'])
printer(outcomes)

In [None]:
care_about = {'FIPS Code': 'fips', 
                                  'County Name': 'county', 
                                  'Estimated hesitant': 'estimated_hesitant', 
                                  'Estimated hesitant or unsure': 'estimated_hesitant_unsure',
                                  'Social Vulnerability Index (SVI)': 'social_vulnerability_index',
                                 'SVI Category': 'svi_category',
                                 'CVAC level of concern for vaccination rollout': 'cvac_level_of_concern', 
                                 'Percent adults fully vaccinated against COVID-19 (as of 6/10/21)': 'percent_adults_vaccinated_by_jun_2021',
                                 'State Code': 'state'}
vaccine = pd.read_csv('inputs/vaccine-hesitation.csv', dtype={'FIPS Code': 'str'}, usecols=care_about.keys())
vaccine = vaccine.rename(columns=care_about)
vaccine['fips'] = vaccine['fips'].str.zfill(5)
vaccine[['county', 'state_full_name']] = pd.DataFrame(vaccine['county'].str.split(', ').tolist(), columns=['county', 'state_full_name'])
vaccine['county'] = vaccine['county'].str.replace(' County', '').str.replace(' city', ' City')
vaccine['county'] = np.where(vaccine['state'] == 'VA', vaccine['county'] + ' City', vaccine['county'])
vaccine['county'] = vaccine['county'].str.replace(' city City', ' City').str.replace('City City', 'City')
vaccine['county'] = np.where(vaccine['state'] == 'AK', vaccine['county'].apply(lambda x: x.split(' ')[0]), vaccine['county'])
vaccine['county'] = np.where(vaccine['fips'] == '35013', 'Doña Ana', vaccine['county'])
vaccine.loc[(vaccine.state == 'VA')].head()

In [None]:
updated = vaccine.merge(outcomes, how='outer', on=['state', 'county'])\
.drop(['county', 'state'], axis=1)\
.merge(merged, how='outer', on='fips')\
.query("_merge == 'both'")\
.drop('_merge', axis=1)

printer(updated)

In [None]:
agg = {
    'estimated_hesitant': 'mean',
    'estimated_hesitant_unsure': 'mean',
    'social_vulnerability_index': 'mean',
    'svi_category': 'first',
    'cvac_level_of_concern': 'mean',
    'percent_adults_vaccinated_by_jun_2021': 'mean',
    'binge_drinking_pct': 'mean',
    'lack_health_insurance_pct': 'mean',
    'poor_mental_health_pct': 'mean',
    'no_leisure_time_pct': 'mean', 
    'bad_sleep_pct': 'mean',
    'unemployed_rate': 'mean',
    'case_diff': 'mean',
    'death_diff': 'mean',
}
updated['svi_category'] = updated['svi_category'].copy().replace({'High Vulnerability': '2-High Vulnerability',
                                                      'Very High Vulnerability': '1-Very High Vulnerability',
                                                      'Moderate Vulnerability': '3-Moderate Vulnerability',
                                                     'Low Vulnerability': '4-Low Vulnerability',
                                                     'Very Low Vulnerability': '5-Very Low Vulnerability'})
updated['svi_simple'] = updated['svi_category'].copy().replace({'High Vulnerability': '1-High',
                                                      'Very High Vulnerability': '1-High',
                                                      'Moderate Vulnerability': '2-Medium',
                                                     'Low Vulnerability': '3-Low',
                                                     'Very Low Vulnerability': '3-Low'})
group_by = 'svi_category'
overall_month = updated.groupby([group_by, 'month_year'], as_index=False).agg(agg)
overall_county = updated.groupby([group_by, 'county'], as_index=False).agg(agg)
overall_state= updated.groupby([group_by, 'state'], as_index=False).agg(agg)

In [None]:
fig, ax = plt.subplots(1, figsize=(15, 8))
pal = sns.color_palette("rocket_r", as_cmap=True)
# sns.diverging_palette(250, 30, l=65, center="dark", as_cmap=True)
# sns.color_palette("coolwarm", as_cmap=True)
# sns.color_palette("crest", as_cmap=True)
sns.scatterplot(data=overall_county.query("case_diff < 50000").sort_values('svi_simple'), x='svi_simple', y='case_diff', hue='lack_health_insurance_pct', size='case_diff', 
                hue_norm=(0, updated['unemployed_rate'].max()), ax=ax, palette=pal)

In [None]:
fig, ax = plt.subplots(1, figsize=(30, 10))
sns.lineplot(data=overall_month, x='month_year', y='death_diff', hue='svi_category', palette="rocket", sort=True, ax=ax)

In [None]:
fig, ax = plt.subplots(1, figsize=(30, 10))
sns.lineplot(data=overall_month.assign(unemployed_rate = lambda x: x['unemployed_rate'].fillna(0) / 100), 
             x='month_year', y='case_diff', hue='lack_health_insurance_pct', sort=True, ax=ax)