In [6]:
import requests
import json
import pandas as pd
import io
pd.set_option('display.max_rows', 500)

# New York Times Dataset

## Historical Cases and Deaths by Counties

In [7]:
from io import StringIO
url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv'
s = requests.get(url).text
df6 = pd.read_csv(StringIO(s))
print(df6.shape)
print(df6.columns)
df6.head(100)

(894555, 6)
Index(['date', 'county', 'state', 'fips', 'cases', 'deaths'], dtype='object')


Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061.0,1,0.0
1,2020-01-22,Snohomish,Washington,53061.0,1,0.0
2,2020-01-23,Snohomish,Washington,53061.0,1,0.0
3,2020-01-24,Cook,Illinois,17031.0,1,0.0
4,2020-01-24,Snohomish,Washington,53061.0,1,0.0
5,2020-01-25,Orange,California,6059.0,1,0.0
6,2020-01-25,Cook,Illinois,17031.0,1,0.0
7,2020-01-25,Snohomish,Washington,53061.0,1,0.0
8,2020-01-26,Maricopa,Arizona,4013.0,1,0.0
9,2020-01-26,Los Angeles,California,6037.0,1,0.0


In [8]:
df6.fips.value_counts()

53061.0    349
17031.0    346
6059.0     345
6037.0     344
4013.0     344
          ... 
48269.0     83
2230.0      80
32009.0     52
48301.0     48
15005.0     25
Name: fips, Length: 3218, dtype: int64

## Mask Wearing Data

In [9]:
url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/mask-use/mask-use-by-county.csv'
s = requests.get(url).text
df9 = pd.read_csv(StringIO(s))
print(df9.shape)
print(df9.columns)
df9.head(100)

(3142, 6)
Index(['COUNTYFP', 'NEVER', 'RARELY', 'SOMETIMES', 'FREQUENTLY', 'ALWAYS'], dtype='object')


Unnamed: 0,COUNTYFP,NEVER,RARELY,SOMETIMES,FREQUENTLY,ALWAYS
0,1001,0.053,0.074,0.134,0.295,0.444
1,1003,0.083,0.059,0.098,0.323,0.436
2,1005,0.067,0.121,0.12,0.201,0.491
3,1007,0.02,0.034,0.096,0.278,0.572
4,1009,0.053,0.114,0.18,0.194,0.459
5,1011,0.031,0.04,0.144,0.286,0.5
6,1013,0.102,0.053,0.257,0.137,0.451
7,1015,0.152,0.108,0.13,0.167,0.442
8,1017,0.117,0.037,0.15,0.136,0.56
9,1019,0.135,0.027,0.161,0.158,0.52


# CDC Datatsets

## United States COVID-19 Cases and Deaths by State over Time

In [10]:
url = 'https://data.cdc.gov/resource/9mfq-cb36.json'
df11 = pd.read_json(url)
print(df11.shape)
print(df11.columns)
df11.head(100)

(1000, 15)
Index(['submission_date', 'state', 'tot_cases', 'new_case', 'pnew_case',
       'tot_death', 'new_death', 'pnew_death', 'created_at', 'consent_cases',
       'consent_deaths', 'conf_cases', 'prob_cases', 'conf_death',
       'prob_death'],
      dtype='object')


Unnamed: 0,submission_date,state,tot_cases,new_case,pnew_case,tot_death,new_death,pnew_death,created_at,consent_cases,consent_deaths,conf_cases,prob_cases,conf_death,prob_death
0,2020-12-08T00:00:00.000,NM,109947,0,0.0,1756,0,0.0,2020-12-09 14:45:40.234,,Not agree,,,,
1,2021-01-01T00:00:00.000,FL,1300528,0,6063.0,21673,0,7.0,2021-01-02 14:50:51.219,Not agree,Not agree,,,,
2,2020-04-30T00:00:00.000,IA,7145,302,0.0,162,14,0.0,2020-05-01 21:00:19.025,Not agree,Not agree,,,,
3,2020-06-25T00:00:00.000,NE,18346,125,0.0,260,3,0.0,2020-06-26 19:18:27.809,Not agree,Not agree,,,,
4,2020-02-24T00:00:00.000,CA,10,0,,0,0,,2020-03-26 16:22:39.452,Not agree,Not agree,,,,
5,2020-11-14T00:00:00.000,VA,201961,1161,191.0,3800,1,0.0,2020-11-15 15:12:13.050,Agree,Agree,183455.0,18506.0,3527.0,273.0
6,2020-12-02T00:00:00.000,NY,346492,5775,0.0,10117,56,0.0,2020-12-03 15:21:08.669,Not agree,Not agree,,,,
7,2020-01-23T00:00:00.000,WY,0,0,,0,0,,2020-03-26 16:22:39.452,Agree,Agree,,,,
8,2020-05-05T00:00:00.000,GA,28196,602,1.0,1258,47,0.0,2020-05-06 12:23:08.000,Agree,Agree,28182.0,14.0,1258.0,0.0
9,2020-01-29T00:00:00.000,MA,0,0,,0,0,,2020-03-26 16:22:39.452,Agree,Agree,,,,


## Provisional COVID-19 Death Counts in the US by County

In [11]:
url = 'https://data.cdc.gov/resource/kn79-hsxy.json'
df14 = pd.read_json(url)
print(df14.shape)
print(df14.columns)
df14.head(100)

(1000, 9)
Index(['data_as_of', 'start_week', 'end_week', 'state_name', 'county_name',
       'county_fips_code', 'urban_rural_code', 'covid_death', 'total_death'],
      dtype='object')


Unnamed: 0,data_as_of,start_week,end_week,state_name,county_name,county_fips_code,urban_rural_code,covid_death,total_death
0,2020-12-30T00:00:00.000,2020-02-01T00:00:00.000,2020-12-26T00:00:00.000,AK,Anchorage Borough,2020,Medium metro,95,1913
1,2020-12-30T00:00:00.000,2020-02-01T00:00:00.000,2020-12-26T00:00:00.000,AK,Fairbanks North Star Borough,2090,Small metro,20,471
2,2020-12-30T00:00:00.000,2020-02-01T00:00:00.000,2020-12-26T00:00:00.000,AL,Autauga County,1001,Medium metro,39,458
3,2020-12-30T00:00:00.000,2020-02-01T00:00:00.000,2020-12-26T00:00:00.000,AL,Baldwin County,1003,Small metro,109,2040
4,2020-12-30T00:00:00.000,2020-02-01T00:00:00.000,2020-12-26T00:00:00.000,AL,Barbour County,1005,Noncore,17,253
5,2020-12-30T00:00:00.000,2020-02-01T00:00:00.000,2020-12-26T00:00:00.000,AL,Bibb County,1007,Large fringe metro,20,175
6,2020-12-30T00:00:00.000,2020-02-01T00:00:00.000,2020-12-26T00:00:00.000,AL,Blount County,1009,Large fringe metro,17,449
7,2020-12-30T00:00:00.000,2020-02-01T00:00:00.000,2020-12-26T00:00:00.000,AL,Butler County,1013,Noncore,27,208
8,2020-12-30T00:00:00.000,2020-02-01T00:00:00.000,2020-12-26T00:00:00.000,AL,Calhoun County,1015,Small metro,145,1453
9,2020-12-30T00:00:00.000,2020-02-01T00:00:00.000,2020-12-26T00:00:00.000,AL,Chambers County,1017,Micropolitan,11,340


## Geopandas File

In [43]:
import geopandas as gpd
data = gpd.read_file('https://opendata.arcgis.com/datasets/4cb598ae041348fb92270f102a6783cb_0.geojson')
print(data.shape)
data.head(100)

(3331, 88)


Unnamed: 0,OBJECTID,Countyname,ST_Name,ST_Abbr,ST_ID,FIPS,FatalityRa,Confirmedb,DeathsbyPo,PCTPOVALL_,...,Day_11,Day_12,Day_13,Day_14,NewCasebyP,Inpat_Occ,ICU_Occ,Shape__Area,Shape__Length,geometry
0,1,Autauga,Alabama,AL,1,1001,1.16144,7742.67,89.92644,13.8,...,53.0,48.0,61.0,39.0,66.545566,96.320346,100.0,2209382000.0,246839.9,"POLYGON ((-86.41312 32.70739, -86.41219 32.526..."
1,2,Baldwin,Alabama,AL,1,1003,1.20165,6450.73,77.515113,9.8,...,200.0,166.0,203.0,125.0,49.994955,74.029401,120.089286,5770469000.0,728445.1,"MULTIPOLYGON (((-87.78878 31.29877, -87.78849 ..."
2,3,Barbour,Alabama,AL,1,1005,2.156863,6149.27,132.631325,30.9,...,7.0,20.0,27.0,0.0,8.038262,56.711409,88.571429,3258643000.0,307285.2,"POLYGON ((-85.25609 32.13767, -85.25569 32.135..."
3,4,Bibb,Alabama,AL,1,1007,2.444208,8401.79,205.357143,21.8,...,28.0,23.0,32.0,15.0,84.821429,19.047619,,2310715000.0,227887.0,"POLYGON ((-87.02685 33.24646, -87.02572 33.209..."
4,5,Blount,Alabama,AL,1,1009,1.327434,8205.39,108.921162,13.2,...,38.0,54.0,79.0,16.0,29.391425,76.190476,92.857143,2456058000.0,286306.8,"POLYGON ((-86.44507 34.24954, -86.40902 34.205..."
5,6,Bullock,Alabama,AL,1,1011,2.444444,8877.49,217.005326,42.5,...,5.0,5.0,6.0,1.0,78.911028,41.964286,,2261614000.0,244911.3,"POLYGON ((-85.87382 32.26938, -85.87508 32.267..."
6,7,Butler,Alabama,AL,1,1013,2.910737,7855.69,228.658537,24.5,...,11.0,11.0,7.0,6.0,81.300813,34.0,65.714286,2795264000.0,218921.1,"POLYGON ((-86.44820 31.96463, -86.44662 31.846..."
7,8,Calhoun,Alabama,AL,1,1015,1.6134,8515.27,137.385476,19.5,...,103.0,112.0,172.0,36.0,34.127602,78.726708,93.717277,2300057000.0,263083.7,"POLYGON ((-85.53009 33.94142, -85.53248 33.889..."
8,9,Chambers,Alabama,AL,1,1017,2.622814,7145.62,187.416332,18.7,...,15.0,15.0,28.0,8.0,47.597799,44.239631,,2222371000.0,207869.4,"POLYGON ((-85.18874 32.88973, -85.18474 32.870..."
9,10,Cherokee,Alabama,AL,1,1019,1.529903,5523.97,84.511371,16.3,...,14.0,15.0,20.0,7.0,11.524278,22.039474,,2275916000.0,235526.6,"POLYGON ((-85.51361 34.52383, -85.48437 34.392..."


In [44]:
data.columns

Index(['OBJECTID', 'Countyname', 'ST_Name', 'ST_Abbr', 'ST_ID', 'FIPS',
       'FatalityRa', 'Confirmedb', 'DeathsbyPo', 'PCTPOVALL_', 'Unemployme',
       'Med_HH_Inc', 'State_Fata', 'DateChecke', 'EM_type', 'EM_date',
       'EM_notes', 'url', 'Thumbnail', 'Confirmed', 'Deaths', 'Age_85',
       'Age_80_84', 'Age_75_79', 'Age_70_74', 'Age_65_69', 'Beds_Licen',
       'Beds_Staff', 'Beds_ICU', 'Ventilator', 'POP_ESTIMA', 'POVALL_201',
       'Unemployed', 'Median_Hou', 'Recovered', 'Active', 'State_Conf',
       'State_Deat', 'State_Reco', 'State_Test', 'AgedPop', 'NewCases',
       'NewDeaths', 'TotalPop', 'NonHispWhP', 'BlackPop', 'AmIndop',
       'AsianPop', 'PacIslPop', 'OtherPop', 'TwoMorPop', 'HispPop', 'Wh_Alone',
       'Bk_Alone', 'AI_Alone', 'As_Alone', 'NH_Alone', 'SO_Alone', 'Two_More',
       'Not_Hisp', 'Age_Less15', 'Age_15_24', 'Age_25_34', 'Age_Over75',
       'Agetotal', 'NonHisp', 'Age_35_64', 'Age_65_74', 'Day_1', 'Day_2',
       'Day_3', 'Day_4', 'Day_5', 'Day_6'

In [45]:
data.Countyname.value_counts()

Unassigned    52
Washington    31
Jefferson     26
Franklin      25
Lincoln       24
              ..
Nantucket      1
Baca           1
Arkansas       1
Marin          1
Comal          1
Name: Countyname, Length: 1969, dtype: int64

In [46]:
data.tail(100)

Unnamed: 0,OBJECTID,Countyname,ST_Name,ST_Abbr,ST_ID,FIPS,FatalityRa,Confirmedb,DeathsbyPo,PCTPOVALL_,...,Day_11,Day_12,Day_13,Day_14,NewCasebyP,Inpat_Occ,ICU_Occ,Shape__Area,Shape__Length,geometry
3231,3232,Out of CO,Colorado,,0,80008,0.0,0.0,0.0,0.0,...,,,,,0.0,,,,,
3232,3233,Unassigned,Colorado,,0,90008,0.0,0.0,0.0,0.0,...,,,,,0.0,,,,,
3233,3234,Out of CT,Connecticut,,0,80009,0.0,0.0,0.0,0.0,...,,,,,0.0,,,,,
3234,3235,Unassigned,Connecticut,,0,90009,0.0,0.0,0.0,0.0,...,,,,,0.0,,,,,
3235,3236,Out of DE,Delaware,,0,80010,0.0,0.0,0.0,0.0,...,,,,,0.0,,,,,
3236,3237,Unassigned,Delaware,,0,90010,0.0,0.0,0.0,0.0,...,,,,,0.0,,,,,
3237,3238,Out of DC,District of Columbia,,0,80011,0.0,0.0,0.0,0.0,...,,,,,0.0,,,,,
3238,3239,Unassigned,District of Columbia,,0,90011,0.0,0.0,0.0,0.0,...,,,,,0.0,,,,,
3239,3240,Out of FL,Florida,,0,80012,0.0,0.0,0.0,0.0,...,,,,,0.0,,,,,
3240,3241,Unassigned,Florida,,0,90012,0.0,0.0,0.0,0.0,...,,,,,0.0,,,,,


In [47]:
data.shape

(3331, 88)

In [48]:
data[data.Countyname.str.contains("Out of")]

Unnamed: 0,OBJECTID,Countyname,ST_Name,ST_Abbr,ST_ID,FIPS,FatalityRa,Confirmedb,DeathsbyPo,PCTPOVALL_,...,Day_11,Day_12,Day_13,Day_14,NewCasebyP,Inpat_Occ,ICU_Occ,Shape__Area,Shape__Length,geometry
3221,3222,Out of AL,Alabama,,0,80001,0.0,0.0,0.0,0.0,...,,,,,0.0,,,,,
3223,3224,Out of AK,Alaska,,0,80002,0.0,0.0,0.0,0.0,...,,,,,0.0,,,,,
3225,3226,Out of AZ,Arizona,,0,80004,0.0,0.0,0.0,0.0,...,,,,,0.0,,,,,
3227,3228,Out of AR,Arkansas,,0,80005,0.0,0.0,0.0,0.0,...,,,,,0.0,,,,,
3229,3230,Out of CA,California,,0,80006,0.0,0.0,0.0,0.0,...,,,,,0.0,,,,,
3231,3232,Out of CO,Colorado,,0,80008,0.0,0.0,0.0,0.0,...,,,,,0.0,,,,,
3233,3234,Out of CT,Connecticut,,0,80009,0.0,0.0,0.0,0.0,...,,,,,0.0,,,,,
3235,3236,Out of DE,Delaware,,0,80010,0.0,0.0,0.0,0.0,...,,,,,0.0,,,,,
3237,3238,Out of DC,District of Columbia,,0,80011,0.0,0.0,0.0,0.0,...,,,,,0.0,,,,,
3239,3240,Out of FL,Florida,,0,80012,0.0,0.0,0.0,0.0,...,,,,,0.0,,,,,


In [50]:
data = data[~data.Countyname.str.contains("Out of")]

In [51]:
data.shape

(3279, 88)

In [52]:
data = data[~data.Countyname.str.contains("Unassigned")]

In [53]:
data.shape

(3227, 88)

In [54]:
data = data[~data.ST_Name.str.contains("Puerto Rico")]

In [55]:
data.shape

(3149, 88)

In [56]:
data.tail(50)

Unnamed: 0,OBJECTID,Countyname,ST_Name,ST_Abbr,ST_ID,FIPS,FatalityRa,Confirmedb,DeathsbyPo,PCTPOVALL_,...,Day_11,Day_12,Day_13,Day_14,NewCasebyP,Inpat_Occ,ICU_Occ,Shape__Area,Shape__Length,geometry
3099,3100,Richland,Wisconsin,WI,55,55103,1.139351,6566.15,74.811532,14.3,...,5.0,9.0,5.0,0.0,5.754733,22.285714,47.619048,2891060000.0,229945.1,"POLYGON ((-90.19175 43.25000, -90.19245 43.167..."
3100,3101,Rock,Wisconsin,WI,55,55105,0.95339,8101.56,77.239485,10.9,...,83.0,82.0,62.0,54.0,25.133483,52.56745,54.986523,3481339000.0,236865.6,"POLYGON ((-88.77657 42.66763, -88.77650 42.492..."
3101,3102,Rusk,Wisconsin,WI,55,55107,1.131419,8121.86,91.892274,14.3,...,3.0,2.0,6.0,7.0,28.274546,11.666667,,4901985000.0,301952.3,"POLYGON ((-90.95685 45.63927, -90.86025 45.638..."
3102,3103,St. Croix,Wisconsin,WI,55,55109,0.437736,7386.22,32.332152,4.7,...,34.0,50.0,60.0,45.0,24.527839,22.207792,,3818979000.0,258103.4,"POLYGON ((-92.55999 45.21042, -92.37510 45.209..."
3103,3104,Sauk,Wisconsin,WI,55,55111,0.652494,7394.67,48.249778,8.6,...,38.0,13.0,28.0,42.0,59.144889,29.457364,40.0,4172037000.0,297495.9,"POLYGON ((-90.17647 43.64223, -89.93668 43.641..."
3104,3105,Sawyer,Wisconsin,WI,55,55113,0.862069,7738.49,66.711141,14.5,...,14.0,7.0,11.0,5.0,109.163685,26.857143,,7214795000.0,360111.8,"POLYGON ((-91.17528 46.15725, -91.12518 46.155..."
3105,3106,Shawano,Wisconsin,WI,55,55115,1.453993,11295.23,164.231787,9.6,...,25.0,27.0,19.0,11.0,22.060986,56.493506,,4674658000.0,357583.6,"POLYGON ((-89.05507 45.02967, -88.98168 45.028..."
3106,3107,Sheboygan,Wisconsin,WI,55,55117,0.812395,10341.6,84.01469,7.5,...,54.0,62.0,25.0,66.0,10.39357,42.255639,35.338346,2566945000.0,201057.9,"POLYGON ((-87.94445 43.89220, -87.84163 43.891..."
3107,3108,Taylor,Wisconsin,WI,55,55119,1.063234,8754.65,93.0825,10.3,...,16.0,9.0,13.0,4.0,39.192632,30.059524,0.0,5136703000.0,306754.0,"POLYGON ((-90.04227 45.38187, -90.04248 45.368..."
3108,3109,Trempealeau,Wisconsin,WI,55,55121,0.972763,10474.83,101.895252,8.0,...,12.0,13.0,10.0,12.0,64.533659,79.591837,,3754854000.0,308453.6,"POLYGON ((-91.16562 44.59699, -91.16600 44.509..."


In [64]:
data = data.dropna(subset=['Countyname'], inplace=True)

In [65]:
data.shape

AttributeError: 'NoneType' object has no attribute 'shape'