In [2]:
import pandas as pd
import numpy as np
import seaborn as sns

In [3]:
# load county case data
counties = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/live/us-counties.csv')
counties.head()

Unnamed: 0,date,county,state,fips,cases,deaths,confirmed_cases,confirmed_deaths,probable_cases,probable_deaths
0,2021-04-26,Autauga,Alabama,1001.0,6879,107.0,5820.0,94.0,1059.0,13.0
1,2021-04-26,Baldwin,Alabama,1003.0,20847,305.0,14705.0,229.0,6142.0,76.0
2,2021-04-26,Barbour,Alabama,1005.0,2296,56.0,1282.0,37.0,1014.0,19.0
3,2021-04-26,Bibb,Alabama,1007.0,2584,62.0,2080.0,38.0,504.0,24.0
4,2021-04-26,Blount,Alabama,1009.0,6571,134.0,5064.0,112.0,1507.0,22.0


In [4]:
# load mask use data
mask_use = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/mask-use/mask-use-by-county.csv')
mask_use

Unnamed: 0,COUNTYFP,NEVER,RARELY,SOMETIMES,FREQUENTLY,ALWAYS
0,1001,0.053,0.074,0.134,0.295,0.444
1,1003,0.083,0.059,0.098,0.323,0.436
2,1005,0.067,0.121,0.120,0.201,0.491
3,1007,0.020,0.034,0.096,0.278,0.572
4,1009,0.053,0.114,0.180,0.194,0.459
...,...,...,...,...,...,...
3137,56037,0.061,0.295,0.230,0.146,0.268
3138,56039,0.095,0.157,0.160,0.247,0.340
3139,56041,0.098,0.278,0.154,0.207,0.264
3140,56043,0.204,0.155,0.069,0.285,0.287


In [5]:
# prepare mask data for merging
mask_use["COUNTYFP"] = mask_use["COUNTYFP"].astype(float)
mask_use

Unnamed: 0,COUNTYFP,NEVER,RARELY,SOMETIMES,FREQUENTLY,ALWAYS
0,1001.0,0.053,0.074,0.134,0.295,0.444
1,1003.0,0.083,0.059,0.098,0.323,0.436
2,1005.0,0.067,0.121,0.120,0.201,0.491
3,1007.0,0.020,0.034,0.096,0.278,0.572
4,1009.0,0.053,0.114,0.180,0.194,0.459
...,...,...,...,...,...,...
3137,56037.0,0.061,0.295,0.230,0.146,0.268
3138,56039.0,0.095,0.157,0.160,0.247,0.340
3139,56041.0,0.098,0.278,0.154,0.207,0.264
3140,56043.0,0.204,0.155,0.069,0.285,0.287


In [6]:
# merge mask and county data
mask_cases_county = pd.merge(counties, mask_use, right_on = 'COUNTYFP', left_on = 'fips')
mask_cases_county = mask_cases_county.drop('COUNTYFP', axis=1)
mask_cases_county

Unnamed: 0,date,county,state,fips,cases,deaths,confirmed_cases,confirmed_deaths,probable_cases,probable_deaths,NEVER,RARELY,SOMETIMES,FREQUENTLY,ALWAYS
0,2021-04-26,Autauga,Alabama,1001.0,6879,107.0,5820.0,94.0,1059.0,13.0,0.053,0.074,0.134,0.295,0.444
1,2021-04-26,Baldwin,Alabama,1003.0,20847,305.0,14705.0,229.0,6142.0,76.0,0.083,0.059,0.098,0.323,0.436
2,2021-04-26,Barbour,Alabama,1005.0,2296,56.0,1282.0,37.0,1014.0,19.0,0.067,0.121,0.120,0.201,0.491
3,2021-04-26,Bibb,Alabama,1007.0,2584,62.0,2080.0,38.0,504.0,24.0,0.020,0.034,0.096,0.278,0.572
4,2021-04-26,Blount,Alabama,1009.0,6571,134.0,5064.0,112.0,1507.0,22.0,0.053,0.114,0.180,0.194,0.459
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3128,2021-04-26,Sweetwater,Wyoming,56037.0,4159,37.0,3997.0,,162.0,,0.061,0.295,0.230,0.146,0.268
3129,2021-04-26,Teton,Wyoming,56039.0,3719,9.0,3636.0,,83.0,,0.095,0.157,0.160,0.247,0.340
3130,2021-04-26,Uinta,Wyoming,56041.0,2158,12.0,1838.0,,320.0,,0.098,0.278,0.154,0.207,0.264
3131,2021-04-26,Washakie,Wyoming,56043.0,897,26.0,703.0,,194.0,,0.204,0.155,0.069,0.285,0.287


Missing data in some counties based on how they collect data. In order to have consistent numbers within the data, the columns of cases and deaths should be used. The others may be used for reference, but there are too many missing values to be able to make a statement across all counties. Need to create a visualization using counties with college campuses and those without and compare their mask usage.

In [7]:
#clean mask & county data
mask_cases_county = mask_cases_county.drop(columns = ['confirmed_cases', 'confirmed_deaths', 'probable_cases', 'probable_deaths'])
mask_cases_county

Unnamed: 0,date,county,state,fips,cases,deaths,NEVER,RARELY,SOMETIMES,FREQUENTLY,ALWAYS
0,2021-04-26,Autauga,Alabama,1001.0,6879,107.0,0.053,0.074,0.134,0.295,0.444
1,2021-04-26,Baldwin,Alabama,1003.0,20847,305.0,0.083,0.059,0.098,0.323,0.436
2,2021-04-26,Barbour,Alabama,1005.0,2296,56.0,0.067,0.121,0.120,0.201,0.491
3,2021-04-26,Bibb,Alabama,1007.0,2584,62.0,0.020,0.034,0.096,0.278,0.572
4,2021-04-26,Blount,Alabama,1009.0,6571,134.0,0.053,0.114,0.180,0.194,0.459
...,...,...,...,...,...,...,...,...,...,...,...
3128,2021-04-26,Sweetwater,Wyoming,56037.0,4159,37.0,0.061,0.295,0.230,0.146,0.268
3129,2021-04-26,Teton,Wyoming,56039.0,3719,9.0,0.095,0.157,0.160,0.247,0.340
3130,2021-04-26,Uinta,Wyoming,56041.0,2158,12.0,0.098,0.278,0.154,0.207,0.264
3131,2021-04-26,Washakie,Wyoming,56043.0,897,26.0,0.204,0.155,0.069,0.285,0.287


In [8]:
#load college data 
colleges = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/colleges/colleges.csv')
colleges

Unnamed: 0,date,state,county,city,ipeds_id,college,cases,cases_2021,notes
0,2021-02-26,Alabama,Madison,Huntsville,100654,Alabama A&M University,41,,
1,2021-02-26,Alabama,Montgomery,Montgomery,100724,Alabama State University,2,,
2,2021-02-26,Alabama,Limestone,Athens,100812,Athens State University,45,10.0,
3,2021-02-26,Alabama,Lee,Auburn,100858,Auburn University,2499,324.0,
4,2021-02-26,Alabama,Montgomery,Montgomery,100830,Auburn University at Montgomery,214,74.0,
...,...,...,...,...,...,...,...,...,...
1944,2021-02-26,Wisconsin,Milwaukee,Milwaukee,240338,Wisconsin Lutheran College,122,2.0,
1945,2021-02-26,Wyoming,Natrona,Casper,240505,Casper College,363,33.0,
1946,2021-02-26,Wyoming,Goshen,Torrington,240596,Eastern Wyoming College,13,1.0,
1947,2021-02-26,Wyoming,Albany,Laramie,240727,University of Wyoming,1970,175.0,


In [9]:
#clean college data
colleges = colleges.drop(columns = ['cases_2021', 'notes'])
colleges

Unnamed: 0,date,state,county,city,ipeds_id,college,cases
0,2021-02-26,Alabama,Madison,Huntsville,100654,Alabama A&M University,41
1,2021-02-26,Alabama,Montgomery,Montgomery,100724,Alabama State University,2
2,2021-02-26,Alabama,Limestone,Athens,100812,Athens State University,45
3,2021-02-26,Alabama,Lee,Auburn,100858,Auburn University,2499
4,2021-02-26,Alabama,Montgomery,Montgomery,100830,Auburn University at Montgomery,214
...,...,...,...,...,...,...,...
1944,2021-02-26,Wisconsin,Milwaukee,Milwaukee,240338,Wisconsin Lutheran College,122
1945,2021-02-26,Wyoming,Natrona,Casper,240505,Casper College,363
1946,2021-02-26,Wyoming,Goshen,Torrington,240596,Eastern Wyoming College,13
1947,2021-02-26,Wyoming,Albany,Laramie,240727,University of Wyoming,1970


In [10]:
#merge college data with mask and case data 
all_data = pd.merge(mask_cases_county, colleges, on = ['county', 'state'], how = 'left')
all_data

Unnamed: 0,date_x,county,state,fips,cases_x,deaths,NEVER,RARELY,SOMETIMES,FREQUENTLY,ALWAYS,date_y,city,ipeds_id,college,cases_y
0,2021-04-26,Autauga,Alabama,1001.0,6879,107.0,0.053,0.074,0.134,0.295,0.444,,,,,
1,2021-04-26,Baldwin,Alabama,1003.0,20847,305.0,0.083,0.059,0.098,0.323,0.436,,,,,
2,2021-04-26,Barbour,Alabama,1005.0,2296,56.0,0.067,0.121,0.120,0.201,0.491,,,,,
3,2021-04-26,Bibb,Alabama,1007.0,2584,62.0,0.020,0.034,0.096,0.278,0.572,,,,,
4,2021-04-26,Blount,Alabama,1009.0,6571,134.0,0.053,0.114,0.180,0.194,0.459,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4007,2021-04-26,Sweetwater,Wyoming,56037.0,4159,37.0,0.061,0.295,0.230,0.146,0.268,2021-02-26,Rock Springs,240693,Western Wyoming Community College,85.0
4008,2021-04-26,Teton,Wyoming,56039.0,3719,9.0,0.095,0.157,0.160,0.247,0.340,,,,,
4009,2021-04-26,Uinta,Wyoming,56041.0,2158,12.0,0.098,0.278,0.154,0.207,0.264,,,,,
4010,2021-04-26,Washakie,Wyoming,56043.0,897,26.0,0.204,0.155,0.069,0.285,0.287,,,,,


In [11]:
#clean post-merge
all_data = all_data.drop(columns = ['date_y', 'city', 'ipeds_id', 'cases_y'])
all_data = all_data.rename(columns={'cases_x':'cases', 'date_x':'date'})
all_data

Unnamed: 0,date,county,state,fips,cases,deaths,NEVER,RARELY,SOMETIMES,FREQUENTLY,ALWAYS,college
0,2021-04-26,Autauga,Alabama,1001.0,6879,107.0,0.053,0.074,0.134,0.295,0.444,
1,2021-04-26,Baldwin,Alabama,1003.0,20847,305.0,0.083,0.059,0.098,0.323,0.436,
2,2021-04-26,Barbour,Alabama,1005.0,2296,56.0,0.067,0.121,0.120,0.201,0.491,
3,2021-04-26,Bibb,Alabama,1007.0,2584,62.0,0.020,0.034,0.096,0.278,0.572,
4,2021-04-26,Blount,Alabama,1009.0,6571,134.0,0.053,0.114,0.180,0.194,0.459,
...,...,...,...,...,...,...,...,...,...,...,...,...
4007,2021-04-26,Sweetwater,Wyoming,56037.0,4159,37.0,0.061,0.295,0.230,0.146,0.268,Western Wyoming Community College
4008,2021-04-26,Teton,Wyoming,56039.0,3719,9.0,0.095,0.157,0.160,0.247,0.340,
4009,2021-04-26,Uinta,Wyoming,56041.0,2158,12.0,0.098,0.278,0.154,0.207,0.264,
4010,2021-04-26,Washakie,Wyoming,56043.0,897,26.0,0.204,0.155,0.069,0.285,0.287,


In [12]:
#separate into colleges and not colleges
counties_with_colleges = all_data[all_data['college'].notnull()]
counties_without_colleges = all_data[all_data['college'].isnull()]
counties_with_colleges

Unnamed: 0,date,county,state,fips,cases,deaths,NEVER,RARELY,SOMETIMES,FREQUENTLY,ALWAYS,college
7,2021-04-26,Calhoun,Alabama,1015.0,14405,312.0,0.152,0.108,0.130,0.167,0.442,Jacksonville State University
15,2021-04-26,Coffee,Alabama,1031.0,5453,115.0,0.101,0.152,0.094,0.186,0.466,Enterprise State Community College
19,2021-04-26,Covington,Alabama,1039.0,4098,116.0,0.187,0.128,0.129,0.201,0.356,Lurleen B. Wallace Community College
27,2021-04-26,Etowah,Alabama,1055.0,13737,352.0,0.096,0.103,0.178,0.122,0.501,Gadsden State Community College
34,2021-04-26,Houston,Alabama,1069.0,10464,280.0,0.085,0.079,0.135,0.268,0.433,Troy University Dothan
...,...,...,...,...,...,...,...,...,...,...,...,...
3987,2021-04-26,Winnebago,Wisconsin,55139.0,20959,213.0,0.074,0.194,0.126,0.156,0.450,University of Wisconsin-Oshkosh
3989,2021-04-26,Albany,Wyoming,56001.0,4184,11.0,0.136,0.100,0.151,0.181,0.432,University of Wyoming
3996,2021-04-26,Goshen,Wyoming,56015.0,1202,23.0,0.201,0.169,0.111,0.223,0.296,Eastern Wyoming College
4001,2021-04-26,Natrona,Wyoming,56025.0,7920,136.0,0.100,0.084,0.094,0.325,0.398,Casper College


In [31]:
#take averages 

counties_with_colleges_always = counties_with_colleges['ALWAYS'].mean()
print("With colleges, always:", counties_with_colleges_always)
counties_without_colleges_always = counties_without_colleges['ALWAYS'].mean()
print("Without colleges, always:", counties_without_colleges_always)

counties_with_colleges_frequently = counties_with_colleges['FREQUENTLY'].mean()
print("With colleges, frequently:", counties_with_colleges_frequently)
counties_without_colleges_frequently = counties_without_colleges['FREQUENTLY'].mean()
print("Without colleges, frequently:", counties_without_colleges_frequently)

counties_with_colleges_sometimes = counties_with_colleges['SOMETIMES'].mean()
print("With colleges, sometimes:", counties_with_colleges_sometimes)
counties_without_colleges_sometimes = counties_without_colleges['SOMETIMES'].mean()
print("Without colleges, sometimes:", counties_without_colleges_sometimes)

counties_with_colleges_rarely = counties_with_colleges['RARELY'].mean()
print("With colleges, rarely:", counties_with_colleges_rarely)
counties_without_colleges_rarely = counties_without_colleges['RARELY'].mean()
print("Without colleges, rarely:", counties_without_colleges_rarely)

counties_with_colleges_never = counties_with_colleges['NEVER'].mean()
print("With colleges, never:", counties_with_colleges_never)
counties_without_colleges_never = counties_without_colleges['NEVER'].mean()
print("Without colleges, never:", counties_without_colleges_never)

avg_mask_data = {'Always' : [counties_with_colleges_always*250000, counties_without_colleges_always*250000],
                'Frequently' : [counties_with_colleges_frequently*250000, counties_without_colleges_frequently*250000],
                'Sometimes' : [counties_with_colleges_sometimes*250000, counties_without_colleges_sometimes*250000],
                'Rarely' : [counties_with_colleges_rarely*250000, counties_without_colleges_rarely*250000],
                'Never' : [counties_with_colleges_never*250000, counties_without_colleges_never*250000]}
avg_mask_df = pd.DataFrame(avg_mask_data, columns = ['Always', 'Frequently', 'Sometimes', 'Rarely', 'Never'], 
                           index=['colleges', 'no colleges'])
print(avg_mask_df)

With colleges, always: 0.6172396921385372
Without colleges, always: 0.4843588691290463
With colleges, frequently: 0.18708301264430993
Without colleges, frequently: 0.21113269493844045
With colleges, sometimes: 0.09179549202858756
Without colleges, sometimes: 0.1276105791153671
With colleges, rarely: 0.05350082462891723
Without colleges, rarely: 0.08965161878704947
With colleges, never: 0.050329851566795224
Without colleges, never: 0.08724806201550364
                    Always    Frequently     Sometimes        Rarely  \
colleges     154309.923035  46770.753161  22948.873007  13375.206157   
no colleges  121089.717282  52783.173735  31902.644779  22412.904697   

                    Never  
colleges     12582.462892  
no colleges  21812.015504  


In [32]:
# chi squared testing of mask wearing proportions in counties with colleges vs. counties without
from scipy.stats import chi2_contingency

values = chi2_contingency(avg_mask_df)
print(values)

(10590.9137105301, 0.0, 4, array([[137696.17439848,  49775.6455459 ,  27425.03276471,
         17893.58166145,  17196.78388124],
       [137703.46591841,  49778.28134979,  27426.48502128,
         17894.52919254,  17197.69451433]]))
