In [1]:
import numpy as np
import pandas as pd
import sklearn

### Cleaning AQI data

In [2]:
raw_data_aqi = pd.read_csv("data/daily_aqi_by_county_2020.csv")
raw_data_aqi.head()


Unnamed: 0,State Name,county Name,State Code,County Code,Date,AQI,Category,Defining Parameter,Defining Site,Number of Sites Reporting
0,Alabama,Baldwin,1,3,2020-01-01,48,Good,PM2.5,01-003-0010,1
1,Alabama,Baldwin,1,3,2020-01-04,13,Good,PM2.5,01-003-0010,1
2,Alabama,Baldwin,1,3,2020-01-07,14,Good,PM2.5,01-003-0010,1
3,Alabama,Baldwin,1,3,2020-01-10,39,Good,PM2.5,01-003-0010,1
4,Alabama,Baldwin,1,3,2020-01-13,29,Good,PM2.5,01-003-0010,1


In [3]:
data_aqi_dropped = raw_data_aqi.drop(
    labels=["State Code", "County Code", "Category", "Defining Site", "Number of Sites Reporting", "Defining Parameter"],
    axis=1
).rename(columns={"State Name":"state", "county Name": "county", "Date": "date"})
data_aqi_dropped.head()

Unnamed: 0,state,county,date,AQI
0,Alabama,Baldwin,2020-01-01,48
1,Alabama,Baldwin,2020-01-04,13
2,Alabama,Baldwin,2020-01-07,14
3,Alabama,Baldwin,2020-01-10,39
4,Alabama,Baldwin,2020-01-13,29


In [4]:
data_aqi_pivoted = data_aqi_dropped.pivot_table("AQI", ["state", "county"], "date")
data_aqi_pivoted

Unnamed: 0_level_0,date,2020-01-01,2020-01-02,2020-01-03,2020-01-04,2020-01-05,2020-01-06,2020-01-07,2020-01-08,2020-01-09,2020-01-10,...,2020-10-26,2020-10-27,2020-10-28,2020-10-29,2020-10-30,2020-10-31,2020-11-01,2020-11-02,2020-11-03,2020-11-04
state,county,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Alabama,Baldwin,48.0,,,13.0,,,14.0,,,39.0,...,,,,,,,,,,
Alabama,Clay,,,,12.0,,,16.0,,,31.0,...,,,,,,,,,,
Alabama,DeKalb,38.0,28.0,35.0,31.0,32.0,35.0,37.0,41.0,41.0,31.0,...,,,,,,,,,,
Alabama,Elmore,,,,,,,,,,,...,,,,,,,,,,
Alabama,Etowah,28.0,,,15.0,,,17.0,,,30.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wyoming,Sublette,43.0,42.0,42.0,44.0,44.0,43.0,42.0,41.0,40.0,42.0,...,,,,,,,,,,
Wyoming,Sweetwater,55.0,39.0,39.0,40.0,39.0,42.0,42.0,43.0,39.0,40.0,...,,,,,,,,,,
Wyoming,Teton,39.0,40.0,39.0,41.0,40.0,39.0,39.0,38.0,39.0,40.0,...,,,,,,,,,,
Wyoming,Uinta,40.0,38.0,40.0,40.0,38.0,41.0,40.0,38.0,37.0,39.0,...,,,,,,,,,,


### Cleaning Covid Data

In [5]:
raw_data_covid = pd.read_csv("data/us_counties_covid19_daily.csv")
raw_data_covid.head()

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061.0,1,0.0
1,2020-01-22,Snohomish,Washington,53061.0,1,0.0
2,2020-01-23,Snohomish,Washington,53061.0,1,0.0
3,2020-01-24,Cook,Illinois,17031.0,1,0.0
4,2020-01-24,Snohomish,Washington,53061.0,1,0.0


In [6]:
data_covid_dropped = raw_data_covid.drop(
    labels=["fips", "deaths"],
    axis=1
)[["state", "county", "date", "cases"]]
data_covid_dropped.head()

Unnamed: 0,state,county,date,cases
0,Washington,Snohomish,2020-01-21,1
1,Washington,Snohomish,2020-01-22,1
2,Washington,Snohomish,2020-01-23,1
3,Illinois,Cook,2020-01-24,1
4,Washington,Snohomish,2020-01-24,1


In [7]:
data_covid_pivoted = data_covid_dropped.pivot_table("cases", ["state", "county"], "date")
data_covid_pivoted

Unnamed: 0_level_0,date,2020-01-21,2020-01-22,2020-01-23,2020-01-24,2020-01-25,2020-01-26,2020-01-27,2020-01-28,2020-01-29,2020-01-30,...,2020-11-25,2020-11-26,2020-11-27,2020-11-28,2020-11-29,2020-11-30,2020-12-01,2020-12-02,2020-12-03,2020-12-04
state,county,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Alabama,Autauga,,,,,,,,,,,...,2686.0,2704.0,2716.0,2735.0,2751.0,2780.0,2818.0,2873.0,2893.0,2945.0
Alabama,Baldwin,,,,,,,,,,,...,8473.0,8576.0,8603.0,8733.0,8820.0,8890.0,9051.0,9163.0,9341.0,9501.0
Alabama,Barbour,,,,,,,,,,,...,1170.0,1170.0,1171.0,1173.0,1175.0,1178.0,1189.0,1206.0,1214.0,1217.0
Alabama,Bibb,,,,,,,,,,,...,1162.0,1170.0,1173.0,1179.0,1188.0,1196.0,1204.0,1239.0,1252.0,1270.0
Alabama,Blount,,,,,,,,,,,...,2855.0,2879.0,2888.0,2922.0,2946.0,2997.0,3061.0,3100.0,3158.0,3231.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wyoming,Teton,,,,,,,,,,,...,1476.0,1476.0,1547.0,1570.0,1606.0,1644.0,1655.0,1678.0,1693.0,1724.0
Wyoming,Uinta,,,,,,,,,,,...,1016.0,1016.0,1037.0,1037.0,1046.0,1070.0,1089.0,1121.0,1149.0,1175.0
Wyoming,Unknown,,,,,,,,,,,...,,,,,,,,,,
Wyoming,Washakie,,,,,,,,,,,...,394.0,394.0,414.0,416.0,427.0,440.0,467.0,486.0,496.0,517.0
