# Import packages

In [1]:
import requests
import json
import pandas as pd
import numpy as np
from io import StringIO
from bs4 import BeautifulSoup as bs

from datetime import date
import covidcast

import geopandas as gpd

pd.set_option('display.max_columns', 100)

In [2]:
url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/mask-use/mask-use-by-county.csv'
s = requests.get(url).text
nymask = pd.read_csv(StringIO(s))

In [5]:
nymask.COUNTYFP = nymask.COUNTYFP.astype(str)
nymask.COUNTYFP = np.where(nymask['COUNTYFP'].str.len() == 4, '0' + nymask.COUNTYFP, nymask.COUNTYFP) 

In [6]:
url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv'
s = requests.get(url).text
df = pd.read_csv(StringIO(s))
print(df.shape)
print(df.columns)
# df6.head()

(897801, 6)
Index(['date', 'county', 'state', 'fips', 'cases', 'deaths'], dtype='object')


In [7]:
mask_ind = covidcast.signal("fb-survey", "smoothed_wearing_mask",
                        date(2020, 10, 1), date(2020, 11, 4),
                        "county")

In [8]:
mask_fip = mask_ind.loc[~mask_ind.geo_value.str.endswith('000')]

In [9]:
print(mask_ind.geo_value.value_counts().shape)
mask_fip.geo_value.value_counts().shape

(640,)


(592,)

In [10]:
mask_ind_means = pd.DataFrame(mask_fip.groupby(['geo_value'])['value'].mean()).reset_index()
mask_ind_means.columns = ['FIPS', 'ind_mask']
mask_ind_means

Unnamed: 0,FIPS,ind_mask
0,01003,75.669396
1,01069,84.805330
2,01073,88.936083
3,01089,89.685856
4,01097,82.273652
...,...,...
587,55133,85.785520
588,55139,88.268850
589,55141,82.577264
590,56021,72.517789


In [11]:
election = pd.read_csv('data/county_statistics.csv', index_col="Unnamed: 0")

In [12]:
election = election[election.county != 'Unassigned']
election['county'] = election['county'] + ', ' + election['state']
# election.drop(['state'], axis=1, inplace=True)

In [13]:
to_keep = ['county', 'total_votes20' , 'percentage20_Joe_Biden']
election = election.dropna(subset=['votes20_Joe_Biden'])

In [14]:
merge_elec = election[to_keep]

In [15]:
print(merge_elec.shape)
# election.columns

(4633, 3)


In [16]:
geo_df = gpd.read_file('https://opendata.arcgis.com/datasets/4cb598ae041348fb92270f102a6783cb_0.geojson')


In [17]:
geo_df = geo_df[~geo_df.Countyname.str.contains("Out of")]
geo_df = geo_df[~geo_df.Countyname.str.contains("Unassigned")]
geo_df = geo_df[~geo_df.ST_Name.str.contains("Puerto Rico")]
geo_df.drop(geo_df.tail(7).index, inplace=True)
geo_df['county'] = geo_df['Countyname'] + ', ' + geo_df['ST_Abbr']

geo_df.drop(columns=['Day_1', 'Day_2', 'Day_3', 'Day_4', 'Day_5', 
                     'Day_6', 'Day_7', 'Day_8', 'Day_9', 'Day_10', 
                     'Day_11', 'Day_12', 'Day_13', 'Day_14', 
                     'OBJECTID', 'ST_ID', 'PCTPOVALL_', 'Med_HH_Inc', 
                     'State_Fata', 'DateChecke', 'url', 'Thumbnail', 
                     'State_Conf', 'State_Deat', 'State_Reco', 'State_Test', 
                     'TotalPop', 'NonHispWhP', 'BlackPop', 'AmIndop', 'PacIslPop', 
                     'OtherPop', 'TwoMorPop', 'HispPop', 'NonHisp', 'Age_85', 
                     'Age_80_84', 'Age_75_79', 'Age_70_74', 'Age_65_69', 
                     'Agetotal', 'AsianPop', 'Countyname', 'ST_Name'], 
            inplace=True)


In [18]:
geo_df.shape

(3142, 45)

In [19]:
elec_geo_df = geo_df.merge(merge_elec, on = 'county',how='left', sort=True)
elec_geo_df = elec_geo_df.merge(mask_ind_means, how = 'left', on = 'FIPS')
elec_geo_df.percentage20_Joe_Biden.isna().sum()

94

In [20]:
elec_geo_df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 3142 entries, 0 to 3141
Data columns (total 48 columns):
 #   Column                  Non-Null Count  Dtype   
---  ------                  --------------  -----   
 0   ST_Abbr                 3142 non-null   object  
 1   FIPS                    3142 non-null   object  
 2   FatalityRa              3142 non-null   float64 
 3   Confirmedb              3142 non-null   float64 
 4   DeathsbyPo              3142 non-null   float64 
 5   Unemployme              3142 non-null   float64 
 6   EM_type                 3142 non-null   object  
 7   EM_date                 3142 non-null   object  
 8   EM_notes                3142 non-null   object  
 9   Confirmed               3142 non-null   int64   
 10  Deaths                  3142 non-null   int64   
 11  Beds_Licen              3142 non-null   int64   
 12  Beds_Staff              3142 non-null   int64   
 13  Beds_ICU                3142 non-null   int64   
 14  Ventilator      

In [23]:
elec_geo_df.to_file('data/elec.geojson', driver='GeoJSON')