In [6]:
import pandas

In [7]:
import csv

In [8]:
mask = pandas.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/mask-use/mask-use-by-county.csv')

mask
# note: this data is at the county level for the United States

Unnamed: 0,COUNTYFP,NEVER,RARELY,SOMETIMES,FREQUENTLY,ALWAYS
0,1001,0.053,0.074,0.134,0.295,0.444
1,1003,0.083,0.059,0.098,0.323,0.436
2,1005,0.067,0.121,0.120,0.201,0.491
3,1007,0.020,0.034,0.096,0.278,0.572
4,1009,0.053,0.114,0.180,0.194,0.459
...,...,...,...,...,...,...
3137,56037,0.061,0.295,0.230,0.146,0.268
3138,56039,0.095,0.157,0.160,0.247,0.340
3139,56041,0.098,0.278,0.154,0.207,0.264
3140,56043,0.204,0.155,0.069,0.285,0.287


COUNTYFP: The county FIPS code.
NEVER: The estimated share of people in this county who would say never in response to the question “How often do you wear a mask in public when you expect to be within six feet of another person?”
RARELY: The estimated share of people in this county who would say rarely
SOMETIMES: The estimated share of people in this county who would say sometimes
FREQUENTLY: The estimated share of people in this county who would say frequently
ALWAYS: The estimated share of people in this county who would say always


NOTE: Calculate a Mask Score for each county = (%NEVER*0) + (%RARELY*1) + (%SOMETIMES*2) +
(%FREQUENTLY*3) + (%ALWAYS*4)

Source: (Maloney, 2020): https://www.medrxiv.org/content/10.1101/2020.10.03.20206326v2.full.pdf



We are interested in county level data in Washington State, California, and Oregon. I need to:

(a) Identify the corresponding county names of the County FIP codes (census data) to append to the the mask data
(b) Filter the data to only include counties in WA,CA, and OR
(c) Create a mask score for each county
(d) Change variable names to prepare the data to be merged with my other group members


County Fip codes:
    
Area Type: county 
Geoid structure: state + county

https://github.com/kjhealy/fips-codes/blob/master/state_and_county_fips_master.csv


In [9]:
fip = pandas.read_csv('https://raw.githubusercontent.com/kjhealy/fips-codes/master/state_and_county_fips_master.csv')

fip

Unnamed: 0,fips,name,state
0,0,UNITED STATES,
1,1000,ALABAMA,
2,1001,Autauga County,AL
3,1003,Baldwin County,AL
4,1005,Barbour County,AL
...,...,...,...
3190,56037,Sweetwater County,WY
3191,56039,Teton County,WY
3192,56041,Uinta County,WY
3193,56043,Washakie County,WY


In [10]:
#Next, we need merge the 'fip' data with the 'mask' data by fip code

#To do this, the variable names for fip code need to match up.
#mask data:'COUNTYFP'
#fip:'fips'

#step1: change column names in mask data to lowercase#

mask= mask.rename(columns=str.lower)

#step 2: in the fips data, change the 'fips' column to 'countyfp'      
fip.columns = ['countyfp', 'name','state']

fip.columns #double check the column  names have been changed 

#use pandas Data.frame merge function to merge fip and mask by 'countyfp'

mask_data=mask.merge(fip, how='left', on='countyfp')

mask_data #now the mask data has county names and state in addition to the fip codes, which I will need to merge with the group


#right = data frame to merge with (fip)
#how = merge format (left)
#on = column or index to merge on ('countyfp')


Unnamed: 0,countyfp,never,rarely,sometimes,frequently,always,name,state
0,1001,0.053,0.074,0.134,0.295,0.444,Autauga County,AL
1,1003,0.083,0.059,0.098,0.323,0.436,Baldwin County,AL
2,1005,0.067,0.121,0.120,0.201,0.491,Barbour County,AL
3,1007,0.020,0.034,0.096,0.278,0.572,Bibb County,AL
4,1009,0.053,0.114,0.180,0.194,0.459,Blount County,AL
...,...,...,...,...,...,...,...,...
3137,56037,0.061,0.295,0.230,0.146,0.268,Sweetwater County,WY
3138,56039,0.095,0.157,0.160,0.247,0.340,Teton County,WY
3139,56041,0.098,0.278,0.154,0.207,0.264,Uinta County,WY
3140,56043,0.204,0.155,0.069,0.285,0.287,Washakie County,WY


In [11]:
#mask_data includes data from the entire US. For this class project, we are only interested in including WA, CA,  and OR

#filter the mask_data to only include WA,CA, and OR

states = ['WA','CA','OR']

mask_CA_WA_OR= mask_data[mask_data.state.isin(states)]   
  
mask_CA_WA_OR #dataframe that only include CA,WA,and OR



Unnamed: 0,countyfp,never,rarely,sometimes,frequently,always,name,state
186,6001,0.019,0.008,0.055,0.123,0.795,Alameda County,CA
187,6003,0.025,0.085,0.088,0.190,0.612,Alpine County,CA
188,6005,0.045,0.013,0.099,0.188,0.655,Amador County,CA
189,6007,0.015,0.043,0.111,0.204,0.626,Butte County,CA
190,6009,0.045,0.019,0.098,0.276,0.562,Calaveras County,CA
...,...,...,...,...,...,...,...,...
2987,53069,0.045,0.057,0.079,0.161,0.658,Wahkiakum County,WA
2988,53071,0.083,0.022,0.061,0.193,0.641,Walla Walla County,WA
2989,53073,0.042,0.028,0.061,0.122,0.747,Whatcom County,WA
2990,53075,0.002,0.009,0.049,0.310,0.629,Whitman County,WA


In [12]:
#Determine whether there are any NAs in the mask_CA_WA_OR dataframe#

mask_CA_WA_OR.isnull().sum() #there are no missing values in any columns


countyfp      0
never         0
rarely        0
sometimes     0
frequently    0
always        0
name          0
state         0
dtype: int64

In [14]:
#NOTE: Calculate a Mask Score for each county 
#score= (%NEVER0) + (%RARELY1) + (%SOMETIMES2) + (%FREQUENTLY3) + (%ALWAYS*4)

#note this is in decimal format#

mask_CA_WA_OR['mask_score'] = mask_CA_WA_OR['never']*0 + mask_CA_WA_OR['rarely']*1 + mask_CA_WA_OR['sometimes']*2 + mask_CA_WA_OR['frequently']*3+ mask_CA_WA_OR['always']*4

mask_CA_WA_OR


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mask_CA_WA_OR['mask_score'] = mask_CA_WA_OR['never']*0 + mask_CA_WA_OR['rarely']*1 + mask_CA_WA_OR['sometimes']*2 + mask_CA_WA_OR['frequently']*3+ mask_CA_WA_OR['always']*4


Unnamed: 0,countyfp,never,rarely,sometimes,frequently,always,name,state,mask_score
186,6001,0.019,0.008,0.055,0.123,0.795,Alameda County,CA,3.667
187,6003,0.025,0.085,0.088,0.190,0.612,Alpine County,CA,3.279
188,6005,0.045,0.013,0.099,0.188,0.655,Amador County,CA,3.395
189,6007,0.015,0.043,0.111,0.204,0.626,Butte County,CA,3.381
190,6009,0.045,0.019,0.098,0.276,0.562,Calaveras County,CA,3.291
...,...,...,...,...,...,...,...,...,...
2987,53069,0.045,0.057,0.079,0.161,0.658,Wahkiakum County,WA,3.330
2988,53071,0.083,0.022,0.061,0.193,0.641,Walla Walla County,WA,3.287
2989,53073,0.042,0.028,0.061,0.122,0.747,Whatcom County,WA,3.504
2990,53075,0.002,0.009,0.049,0.310,0.629,Whitman County,WA,3.553


In [16]:
#make sure the variable names are correct for merging with my group members#

#rename county and state to County and State#


mask_CA_WA_OR.columns = ['countyfp', 'never','rarely','sometimes','frequently','always', 'County','State','mask_score']

mask_CA_WA_OR.columns



Index(['countyfp', 'never', 'rarely', 'sometimes', 'frequently', 'always',
       'County', 'State', 'mask_score'],
      dtype='object')

In [19]:
#The variable 'County' currently includes 'Name County'. We want to remove "county" and trailing spaces so that this varibale only includes name. 

#Ex: "Alameda County" -> "Alameda"

mask_CA_WA_OR['County'] = mask_CA_WA_OR['County'].str.replace(' County', '')

#we don't need the fip code variable anymore.

del mask_CA_WA_OR['countyfp']

mask_CA_WA_OR





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mask_CA_WA_OR['County'] = mask_CA_WA_OR['County'].str.replace(' County', '')


Unnamed: 0,never,rarely,sometimes,frequently,always,County,State,mask_score
186,0.019,0.008,0.055,0.123,0.795,Alameda,CA,3.667
187,0.025,0.085,0.088,0.190,0.612,Alpine,CA,3.279
188,0.045,0.013,0.099,0.188,0.655,Amador,CA,3.395
189,0.015,0.043,0.111,0.204,0.626,Butte,CA,3.381
190,0.045,0.019,0.098,0.276,0.562,Calaveras,CA,3.291
...,...,...,...,...,...,...,...,...
2987,0.045,0.057,0.079,0.161,0.658,Wahkiakum,WA,3.330
2988,0.083,0.022,0.061,0.193,0.641,Walla Walla,WA,3.287
2989,0.042,0.028,0.061,0.122,0.747,Whatcom,WA,3.504
2990,0.002,0.009,0.049,0.310,0.629,Whitman,WA,3.553


In [21]:
#export to .csv#

mask_CA_WA_OR.to_csv('bandtlaw.pubpol.542.clean.covid.mask.data.csv')

