# Clustering US Counties by Covid-19 Response Performance

A subproject of the Anti-Rona Task Force's EECS 731 Semester Project

By Benjamin Wyss

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.close('all')

### Reading Data Sets From CSV Files

##### U.S. County Populations and Statistics
taken from https://covid19.census.gov/datasets/21843f238cbb46b08615fc53e19e0daf/data?geometry=136.810%2C28.795%2C-136.179%2C67.148 on 11/7/20

In [12]:
populations = pd.read_csv('../data/raw/Average_Household_Size_and_Population_Density_-_County.csv')

In [3]:
populations

Unnamed: 0,FID,COUNTYNS,GEOID,ALAND,AWATER,NAME,State,B25010_001E,B25010_001M,B25010_002E,...,B01001_001E,B01001_001M,B01001_calc_PopDensity,created_user,created_date,last_edited_user,last_edited_date,B01001_calc_PopDensityM,SHAPE_Length,SHAPE_Area
0,1,161526,1001,1539602123,25706961,Autauga County,Alabama,2.59,0.05,2.59,...,55200,,35.853419,esri_demographics,2020/04/01 20:53:36,esri_demographics,2020/04/01 20:53:36,0.0,2.066037,0.150256
1,2,161527,1003,4117546676,1133055836,Baldwin County,Alabama,2.61,0.04,2.66,...,208107,,50.541504,esri_demographics,2020/04/01 20:53:36,esri_demographics,2020/04/01 20:53:36,0.0,4.483746,0.409904
2,3,161528,1005,2292144655,50538698,Barbour County,Alabama,2.49,0.07,2.44,...,25782,,11.247981,esri_demographics,2020/04/01 20:53:36,esri_demographics,2020/04/01 20:53:36,0.0,2.695262,0.223270
3,4,161529,1007,1612167481,9602089,Bibb County,Alabama,2.99,0.14,3.05,...,22527,,13.973114,esri_demographics,2020/04/01 20:53:36,esri_demographics,2020/04/01 20:53:36,0.0,1.887514,0.156473
4,5,161530,1009,1670103911,15015423,Blount County,Alabama,2.77,0.05,2.85,...,57645,,34.515816,esri_demographics,2020/04/01 20:53:36,esri_demographics,2020/04/01 20:53:36,0.0,2.423552,0.164405
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3215,3216,1804553,72145,118777649,57795019,Vega Baja Municipio,Puerto Rico,2.93,0.09,2.93,...,53371,,449.335380,esri_demographics,2020/04/01 20:53:36,esri_demographics,2020/04/01 20:53:36,0.0,0.485650,0.010381
3216,3217,1804554,72147,131541395,552192819,Vieques Municipio,Puerto Rico,3.55,0.38,3.03,...,8771,,66.678630,esri_demographics,2020/04/01 20:53:36,esri_demographics,2020/04/01 20:53:36,0.0,0.675819,0.012181
3217,3218,1804555,72149,92298569,3622639,Villalba Municipio,Puerto Rico,2.96,0.10,3.09,...,22993,,249.115455,esri_demographics,2020/04/01 20:53:36,esri_demographics,2020/04/01 20:53:36,0.0,0.431598,0.008189
3218,3219,1804556,72151,143005179,72592521,Yabucoa Municipio,Puerto Rico,2.91,0.10,2.90,...,34149,,238.795547,esri_demographics,2020/04/01 20:53:36,esri_demographics,2020/04/01 20:53:36,0.0,0.676628,0.012229


##### Live-Updated Covid-19 Cases by U.S. County

Live-updating github submodule taken from https://github.com/nytimes/covid-19-data

In [6]:
cases = pd.read_csv('../data/raw/covid-19-data/live/us-counties.csv')

In [7]:
cases

Unnamed: 0,date,county,state,fips,cases,deaths,confirmed_cases,confirmed_deaths,probable_cases,probable_deaths
0,2020-11-07,Autauga,Alabama,1001.0,2242,30.0,2003.0,28.0,239.0,2.0
1,2020-11-07,Baldwin,Alabama,1003.0,7134,83.0,6019.0,79.0,1115.0,4.0
2,2020-11-07,Barbour,Alabama,1005.0,1090,9.0,755.0,9.0,335.0,0.0
3,2020-11-07,Bibb,Alabama,1007.0,917,15.0,835.0,11.0,82.0,4.0
4,2020-11-07,Blount,Alabama,1009.0,2253,26.0,1716.0,26.0,537.0,0.0
...,...,...,...,...,...,...,...,...,...,...
3238,2020-11-07,Sweetwater,Wyoming,56037.0,681,4.0,642.0,,39.0,
3239,2020-11-07,Teton,Wyoming,56039.0,922,2.0,889.0,,33.0,
3240,2020-11-07,Uinta,Wyoming,56041.0,542,3.0,424.0,,118.0,
3241,2020-11-07,Washakie,Wyoming,56043.0,187,7.0,168.0,,19.0,


##### Reported Mask Use by U.S. County

Live-updating github submodule taken from https://github.com/nytimes/covid-19-data

results collected between 7/2/20 and 7/14/20 via a national NYTimes survey where participants were asked

"How often do you wear a mask in public when you expect to be within six feet of another person?"

In [8]:
masks = pd.read_csv('../data/raw/covid-19-data/mask-use/mask-use-by-county.csv')

In [9]:
masks

Unnamed: 0,COUNTYFP,NEVER,RARELY,SOMETIMES,FREQUENTLY,ALWAYS
0,1001,0.053,0.074,0.134,0.295,0.444
1,1003,0.083,0.059,0.098,0.323,0.436
2,1005,0.067,0.121,0.120,0.201,0.491
3,1007,0.020,0.034,0.096,0.278,0.572
4,1009,0.053,0.114,0.180,0.194,0.459
...,...,...,...,...,...,...
3137,56037,0.061,0.295,0.230,0.146,0.268
3138,56039,0.095,0.157,0.160,0.247,0.340
3139,56041,0.098,0.278,0.154,0.207,0.264
3140,56043,0.204,0.155,0.069,0.285,0.287


##### Covid-19 Lockdown Dates by U.S. County

taken from https://www.kaggle.com/lin0li/us-lockdown-dates-dataset on 11/7/20

In [16]:
lockdowns = pd.read_csv('../data/raw/lockdown_us.csv')

In [17]:
lockdowns

Unnamed: 0,Country,State,County,Date,Type
0,United States,Alabama,,2020-04-04,Stay at home
1,United States,Alabama,Birmingham,2020-03-24,Shelter in place
2,United States,Alaska,,2020-03-28,Stay at home
3,United States,Arizona,,2020-03-31,Stay at home
4,United States,California,,2020-03-19,Stay at home
...,...,...,...,...,...
168,United States,Virginia,,2020-03-30,Stay at home
169,United States,Washington,,2020-03-23,Stay at home
170,United States,West Virginia,,2020-03-24,Stay at home
171,United States,Wisconsin,,2020-03-25,Stay at home


### Cleaning the Data Sets

In [14]:
populations = populations[['GEOID', 'B01001_001E', 'B25010_001E', 'B01001_calc_PopDensity']]
populations = populations.rename(columns={'GEOID': 'fips_code', 'B01001_001E': 'population', 'B25010_001E':'avg_household', 'B01001_calc_PopDensity': 'population_density'})

In [15]:
populations

Unnamed: 0,fips_code,population,avg_household,population_density
0,1001,55200,2.59,35.853419
1,1003,208107,2.61,50.541504
2,1005,25782,2.49,11.247981
3,1007,22527,2.99,13.973114
4,1009,57645,2.77,34.515816
...,...,...,...,...
3215,72145,53371,2.93,449.335380
3216,72147,8771,3.55,66.678630
3217,72149,22993,2.96,249.115455
3218,72151,34149,2.91,238.795547
