In [19]:
import pandas as pd
import numpy as np
import seaborn as sns

In [4]:
# load county case data
counties = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/live/us-counties.csv')
counties.head()

Unnamed: 0,date,county,state,fips,cases,deaths,confirmed_cases,confirmed_deaths,probable_cases,probable_deaths
0,2021-04-03,Autauga,Alabama,1001.0,6606,99.0,5654.0,88.0,952.0,11.0
1,2021-04-03,Baldwin,Alabama,1003.0,20519,301.0,14543.0,224.0,5976.0,77.0
2,2021-04-03,Barbour,Alabama,1005.0,2228,55.0,1241.0,36.0,987.0,19.0
3,2021-04-03,Bibb,Alabama,1007.0,2544,58.0,2057.0,35.0,487.0,23.0
4,2021-04-03,Blount,Alabama,1009.0,6455,132.0,4974.0,111.0,1481.0,21.0


In [5]:
# load mask use data
mask_use = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/mask-use/mask-use-by-county.csv')
mask_use

Unnamed: 0,COUNTYFP,NEVER,RARELY,SOMETIMES,FREQUENTLY,ALWAYS
0,1001,0.053,0.074,0.134,0.295,0.444
1,1003,0.083,0.059,0.098,0.323,0.436
2,1005,0.067,0.121,0.120,0.201,0.491
3,1007,0.020,0.034,0.096,0.278,0.572
4,1009,0.053,0.114,0.180,0.194,0.459
...,...,...,...,...,...,...
3137,56037,0.061,0.295,0.230,0.146,0.268
3138,56039,0.095,0.157,0.160,0.247,0.340
3139,56041,0.098,0.278,0.154,0.207,0.264
3140,56043,0.204,0.155,0.069,0.285,0.287


In [6]:
# prepare mask data for merging
mask_use["COUNTYFP"] = mask_use["COUNTYFP"].astype(float)
mask_use

Unnamed: 0,COUNTYFP,NEVER,RARELY,SOMETIMES,FREQUENTLY,ALWAYS
0,1001.0,0.053,0.074,0.134,0.295,0.444
1,1003.0,0.083,0.059,0.098,0.323,0.436
2,1005.0,0.067,0.121,0.120,0.201,0.491
3,1007.0,0.020,0.034,0.096,0.278,0.572
4,1009.0,0.053,0.114,0.180,0.194,0.459
...,...,...,...,...,...,...
3137,56037.0,0.061,0.295,0.230,0.146,0.268
3138,56039.0,0.095,0.157,0.160,0.247,0.340
3139,56041.0,0.098,0.278,0.154,0.207,0.264
3140,56043.0,0.204,0.155,0.069,0.285,0.287


In [7]:
# merge mask and county data
mask_cases_county = pd.merge(counties, mask_use, right_on = 'COUNTYFP', left_on = 'fips')
mask_cases_county = mask_cases_county.drop('COUNTYFP', axis=1)
mask_cases_county

Unnamed: 0,date,county,state,fips,cases,deaths,confirmed_cases,confirmed_deaths,probable_cases,probable_deaths,NEVER,RARELY,SOMETIMES,FREQUENTLY,ALWAYS
0,2021-04-03,Autauga,Alabama,1001.0,6606,99.0,5654.0,88.0,952.0,11.0,0.053,0.074,0.134,0.295,0.444
1,2021-04-03,Baldwin,Alabama,1003.0,20519,301.0,14543.0,224.0,5976.0,77.0,0.083,0.059,0.098,0.323,0.436
2,2021-04-03,Barbour,Alabama,1005.0,2228,55.0,1241.0,36.0,987.0,19.0,0.067,0.121,0.120,0.201,0.491
3,2021-04-03,Bibb,Alabama,1007.0,2544,58.0,2057.0,35.0,487.0,23.0,0.020,0.034,0.096,0.278,0.572
4,2021-04-03,Blount,Alabama,1009.0,6455,132.0,4974.0,111.0,1481.0,21.0,0.053,0.114,0.180,0.194,0.459
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3128,2021-04-03,Sweetwater,Wyoming,56037.0,4037,37.0,3884.0,,153.0,,0.061,0.295,0.230,0.146,0.268
3129,2021-04-03,Teton,Wyoming,56039.0,3624,9.0,3544.0,,80.0,,0.095,0.157,0.160,0.247,0.340
3130,2021-04-03,Uinta,Wyoming,56041.0,2139,12.0,1819.0,,320.0,,0.098,0.278,0.154,0.207,0.264
3131,2021-04-03,Washakie,Wyoming,56043.0,890,26.0,697.0,,193.0,,0.204,0.155,0.069,0.285,0.287


Missing data in some counties based on how they collect data. In order to have consistent numbers within the data, the columns of cases and deaths should be used. The others may be used for reference, but there are too many missing values to be able to make a statement across all counties. Need to create a visualization using counties with college campuses and those without and compare their mask usage.

In [8]:
#clean mask & county data
mask_cases_county = mask_cases_county.drop(columns = ['confirmed_cases', 'confirmed_deaths', 'probable_cases', 'probable_deaths'])
mask_cases_county

Unnamed: 0,date,county,state,fips,cases,deaths,NEVER,RARELY,SOMETIMES,FREQUENTLY,ALWAYS
0,2021-04-03,Autauga,Alabama,1001.0,6606,99.0,0.053,0.074,0.134,0.295,0.444
1,2021-04-03,Baldwin,Alabama,1003.0,20519,301.0,0.083,0.059,0.098,0.323,0.436
2,2021-04-03,Barbour,Alabama,1005.0,2228,55.0,0.067,0.121,0.120,0.201,0.491
3,2021-04-03,Bibb,Alabama,1007.0,2544,58.0,0.020,0.034,0.096,0.278,0.572
4,2021-04-03,Blount,Alabama,1009.0,6455,132.0,0.053,0.114,0.180,0.194,0.459
...,...,...,...,...,...,...,...,...,...,...,...
3128,2021-04-03,Sweetwater,Wyoming,56037.0,4037,37.0,0.061,0.295,0.230,0.146,0.268
3129,2021-04-03,Teton,Wyoming,56039.0,3624,9.0,0.095,0.157,0.160,0.247,0.340
3130,2021-04-03,Uinta,Wyoming,56041.0,2139,12.0,0.098,0.278,0.154,0.207,0.264
3131,2021-04-03,Washakie,Wyoming,56043.0,890,26.0,0.204,0.155,0.069,0.285,0.287


In [9]:
#load college data 
colleges = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/colleges/colleges.csv')
colleges

Unnamed: 0,date,state,county,city,ipeds_id,college,cases,cases_2021,notes
0,2021-02-26,Alabama,Madison,Huntsville,100654,Alabama A&M University,41,,
1,2021-02-26,Alabama,Montgomery,Montgomery,100724,Alabama State University,2,,
2,2021-02-26,Alabama,Limestone,Athens,100812,Athens State University,45,10.0,
3,2021-02-26,Alabama,Lee,Auburn,100858,Auburn University,2499,324.0,
4,2021-02-26,Alabama,Montgomery,Montgomery,100830,Auburn University at Montgomery,214,74.0,
...,...,...,...,...,...,...,...,...,...
1944,2021-02-26,Wisconsin,Milwaukee,Milwaukee,240338,Wisconsin Lutheran College,122,2.0,
1945,2021-02-26,Wyoming,Natrona,Casper,240505,Casper College,363,33.0,
1946,2021-02-26,Wyoming,Goshen,Torrington,240596,Eastern Wyoming College,13,1.0,
1947,2021-02-26,Wyoming,Albany,Laramie,240727,University of Wyoming,1970,175.0,


In [10]:
#clean college data
colleges = colleges.drop(columns = ['cases_2021', 'notes'])
colleges

Unnamed: 0,date,state,county,city,ipeds_id,college,cases
0,2021-02-26,Alabama,Madison,Huntsville,100654,Alabama A&M University,41
1,2021-02-26,Alabama,Montgomery,Montgomery,100724,Alabama State University,2
2,2021-02-26,Alabama,Limestone,Athens,100812,Athens State University,45
3,2021-02-26,Alabama,Lee,Auburn,100858,Auburn University,2499
4,2021-02-26,Alabama,Montgomery,Montgomery,100830,Auburn University at Montgomery,214
...,...,...,...,...,...,...,...
1944,2021-02-26,Wisconsin,Milwaukee,Milwaukee,240338,Wisconsin Lutheran College,122
1945,2021-02-26,Wyoming,Natrona,Casper,240505,Casper College,363
1946,2021-02-26,Wyoming,Goshen,Torrington,240596,Eastern Wyoming College,13
1947,2021-02-26,Wyoming,Albany,Laramie,240727,University of Wyoming,1970


In [11]:
#merge college data with mask and case data 
all_data = pd.merge(mask_cases_county, colleges, on = ['county', 'state'], how = 'left')
all_data

Unnamed: 0,date_x,county,state,fips,cases_x,deaths,NEVER,RARELY,SOMETIMES,FREQUENTLY,ALWAYS,date_y,city,ipeds_id,college,cases_y
0,2021-04-03,Autauga,Alabama,1001.0,6606,99.0,0.053,0.074,0.134,0.295,0.444,,,,,
1,2021-04-03,Baldwin,Alabama,1003.0,20519,301.0,0.083,0.059,0.098,0.323,0.436,,,,,
2,2021-04-03,Barbour,Alabama,1005.0,2228,55.0,0.067,0.121,0.120,0.201,0.491,,,,,
3,2021-04-03,Bibb,Alabama,1007.0,2544,58.0,0.020,0.034,0.096,0.278,0.572,,,,,
4,2021-04-03,Blount,Alabama,1009.0,6455,132.0,0.053,0.114,0.180,0.194,0.459,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4007,2021-04-03,Sweetwater,Wyoming,56037.0,4037,37.0,0.061,0.295,0.230,0.146,0.268,2021-02-26,Rock Springs,240693,Western Wyoming Community College,85.0
4008,2021-04-03,Teton,Wyoming,56039.0,3624,9.0,0.095,0.157,0.160,0.247,0.340,,,,,
4009,2021-04-03,Uinta,Wyoming,56041.0,2139,12.0,0.098,0.278,0.154,0.207,0.264,,,,,
4010,2021-04-03,Washakie,Wyoming,56043.0,890,26.0,0.204,0.155,0.069,0.285,0.287,,,,,


In [13]:
#clean post-merge
all_data = all_data.drop(columns = ['date_y', 'city', 'ipeds_id', 'cases_y'])
all_data = all_data.rename(columns={'cases_x':'cases', 'date_x':'date'})
all_data

Unnamed: 0,date,county,state,fips,cases,deaths,NEVER,RARELY,SOMETIMES,FREQUENTLY,ALWAYS,college
0,2021-04-03,Autauga,Alabama,1001.0,6606,99.0,0.053,0.074,0.134,0.295,0.444,
1,2021-04-03,Baldwin,Alabama,1003.0,20519,301.0,0.083,0.059,0.098,0.323,0.436,
2,2021-04-03,Barbour,Alabama,1005.0,2228,55.0,0.067,0.121,0.120,0.201,0.491,
3,2021-04-03,Bibb,Alabama,1007.0,2544,58.0,0.020,0.034,0.096,0.278,0.572,
4,2021-04-03,Blount,Alabama,1009.0,6455,132.0,0.053,0.114,0.180,0.194,0.459,
...,...,...,...,...,...,...,...,...,...,...,...,...
4007,2021-04-03,Sweetwater,Wyoming,56037.0,4037,37.0,0.061,0.295,0.230,0.146,0.268,Western Wyoming Community College
4008,2021-04-03,Teton,Wyoming,56039.0,3624,9.0,0.095,0.157,0.160,0.247,0.340,
4009,2021-04-03,Uinta,Wyoming,56041.0,2139,12.0,0.098,0.278,0.154,0.207,0.264,
4010,2021-04-03,Washakie,Wyoming,56043.0,890,26.0,0.204,0.155,0.069,0.285,0.287,


In [18]:
#separate into colleges and not colleges
counties_with_colleges = all_data[all_data['college'].notnull()]
counties_without_colleges = all_data[all_data['college'].isnull()]
counties_with_colleges

Unnamed: 0,date,county,state,fips,cases,deaths,NEVER,RARELY,SOMETIMES,FREQUENTLY,ALWAYS,college
7,2021-04-03,Calhoun,Alabama,1015.0,14233,310.0,0.152,0.108,0.130,0.167,0.442,Jacksonville State University
15,2021-04-03,Coffee,Alabama,1031.0,5388,111.0,0.101,0.152,0.094,0.186,0.466,Enterprise State Community College
19,2021-04-03,Covington,Alabama,1039.0,4045,111.0,0.187,0.128,0.129,0.201,0.356,Lurleen B. Wallace Community College
27,2021-04-03,Etowah,Alabama,1055.0,13650,344.0,0.096,0.103,0.178,0.122,0.501,Gadsden State Community College
34,2021-04-03,Houston,Alabama,1069.0,10347,275.0,0.085,0.079,0.135,0.268,0.433,Troy University Dothan
...,...,...,...,...,...,...,...,...,...,...,...,...
3987,2021-04-03,Winnebago,Wisconsin,55139.0,20468,210.0,0.074,0.194,0.126,0.156,0.450,University of Wisconsin-Oshkosh
3989,2021-04-03,Albany,Wyoming,56001.0,3984,11.0,0.136,0.100,0.151,0.181,0.432,University of Wyoming
3996,2021-04-03,Goshen,Wyoming,56015.0,1170,23.0,0.201,0.169,0.111,0.223,0.296,Eastern Wyoming College
4001,2021-04-03,Natrona,Wyoming,56025.0,7821,135.0,0.100,0.084,0.094,0.325,0.398,Casper College


In [22]:
#take averages - can add all categories if we want
counties_with_colleges_always = counties_with_colleges['ALWAYS'].mean()
print("With colleges, always:", counties_with_colleges_always)
counties_without_colleges_always = counties_without_colleges['ALWAYS'].mean()
print("Without colleges, always:", counties_without_colleges_always)
counties_with_colleges_never = counties_with_colleges['NEVER'].mean()
print("With colleges, never:", counties_with_colleges_never)
counties_without_colleges_never = counties_with_colleges['NEVER'].mean()
print("Without colleges, never:", counties_without_colleges_never)

With colleges, always: 0.6172396921385372
Without colleges, always: 0.4843588691290463
With colleges, never: 0.050329851566795224
Without colleges, never: 0.050329851566795224
