In [76]:
import pandas as pd

In [77]:
census_df = pd.read_csv("./acs2017_census_tract_data.csv", 
                        usecols=['County', 'State', 'TotalPop', 
                                 'Poverty', 'IncomePerCap', 'TractId'])
covid_df = pd.read_csv("./COVID_county_data.csv", usecols=['date','county','state','cases','deaths'])

census_renamer = {
    "County": "Name",
    "TotalPop": "Population",
    "IncomePerCap": "PerCapitaIncome",
    "TractID": "ID",
}

covid_renamer = {
    "date": "Month",
    "cases": "Cases",
    "deaths": "Deaths",
    "county": "Name",
    "state": "State",
}

In [78]:
census_df = census_df.rename(columns=census_renamer)
covid_df = covid_df.rename(columns=covid_renamer)

In [79]:
print(census_df.columns)
print(covid_df.columns)

Index(['TractId', 'State', 'Name', 'Population', 'PerCapitaIncome', 'Poverty'], dtype='object')
Index(['Month', 'Name', 'State', 'Cases', 'Deaths'], dtype='object')


In [80]:
census_df.head(2)

Unnamed: 0,TractId,State,Name,Population,PerCapitaIncome,Poverty
0,1001020100,Alabama,Autauga County,1845,33018.0,10.7
1,1001020200,Alabama,Autauga County,2172,18996.0,22.4


In [81]:
census_list = census_df[["Name", "State"]].copy()
county_info = census_df.groupby(['Name', 'State'])['Population'].sum().reset_index().sort_values(by=['State', 'Name'])
county_info = county_info.reset_index().drop(columns=['index'])



In [82]:
poverty_group = census_df.groupby(['Name', 'State'])['Poverty'].sum().reset_index().sort_values(by=['State', 'Name'])
county_entry_count = census_df.groupby(['Name', 'State']).size().reset_index(name='EntryCount').sort_values(by=['State', 'Name'])
poverty_group['Poverty'] /= county_entry_count['EntryCount']
poverty_group = poverty_group.reset_index().drop(columns=['index'])

In [83]:
percap_group = census_df.groupby(['Name', 'State'])['PerCapitaIncome'].sum().reset_index().sort_values(by=['State', 'Name'])
per_cap_entry = census_df.groupby(['Name', 'State']).size().reset_index(name='PerCapEntry').sort_values(by=['State', 'Name'])
percap_group['PerCapitaIncome'] /= per_cap_entry['PerCapEntry']
percap_group = percap_group.reset_index().drop(columns=['index'])

In [84]:
county_info['Poverty'] = poverty_group['Poverty']
county_info['PerCapitaIncome'] = percap_group['PerCapitaIncome']

In [85]:
county_info

Unnamed: 0,Name,State,Population,Poverty,PerCapitaIncome
0,Autauga County,Alabama,55036,14.558333,26588.166667
1,Baldwin County,Alabama,203360,12.471875,28220.375000
2,Barbour County,Alabama,26201,27.755556,17891.666667
3,Bibb County,Alabama,22580,13.925000,21799.000000
4,Blount County,Alabama,57667,16.422222,21598.444444
...,...,...,...,...,...
3215,Sweetwater County,Wyoming,44527,11.691667,32898.083333
3216,Teton County,Wyoming,22923,7.550000,47706.250000
3217,Uinta County,Wyoming,20758,14.733333,27086.000000
3218,Washakie County,Wyoming,8253,13.066667,26828.333333


In [86]:
county_info[(county_info['Name'] == 'Loudoun County') & (county_info['State'] == 'Virginia')]

Unnamed: 0,Name,State,Population,Poverty,PerCapitaIncome
2968,Loudoun County,Virginia,374558,3.824615,49615.769231


In [87]:
county_info[(county_info['Name'] == 'Washington County') & (county_info['State'] == 'Oregon')]

Unnamed: 0,Name,State,Population,Poverty,PerCapitaIncome
2241,Washington County,Oregon,572071,10.446154,34970.817308


In [88]:
county_info[(county_info['Name'] == 'Harlan County') & (county_info['State'] == 'Kentucky')]

Unnamed: 0,Name,State,Population,Poverty,PerCapitaIncome
1040,Harlan County,Kentucky,27548,33.318182,16010.363636


In [89]:
county_info[(county_info['Name'] == 'Malheur County') & (county_info['State'] == 'Oregon')]

Unnamed: 0,Name,State,Population,Poverty,PerCapitaIncome
2230,Malheur County,Oregon,30421,21.3625,15720.625


In [90]:
covid_df

Unnamed: 0,Month,Name,State,Cases,Deaths
0,2020-01-21,Snohomish,Washington,1,0.0
1,2020-01-22,Snohomish,Washington,1,0.0
2,2020-01-23,Snohomish,Washington,1,0.0
3,2020-01-24,Cook,Illinois,1,0.0
4,2020-01-24,Snohomish,Washington,1,0.0
...,...,...,...,...,...
1050253,2021-02-20,Sweetwater,Wyoming,3645,34.0
1050254,2021-02-20,Teton,Wyoming,3318,9.0
1050255,2021-02-20,Uinta,Wyoming,2024,12.0
1050256,2021-02-20,Washakie,Wyoming,876,26.0


In [105]:
modmonth_covid_df = covid_df.copy()
modmonth_covid_df['Month'] = modmonth_covid_df['Month'].str.slice(0,-3)

Covid_monthly = modmonth_covid_df.groupby(['Name','State','Month'])['Cases'].sum().reset_index()
Covid_monthly = Covid_monthly.sort_values(by=['State', 'Name']).reset_index().drop(columns='index')


In [112]:
deaths_group = modmonth_covid_df.groupby(['Name','State','Month'])['Deaths'].sum().reset_index()
deaths_group = deaths_group.sort_values(by=['State', 'Name']).reset_index().drop(columns='index')
# percap_group = census_df.groupby(['Name', 'State'])['PerCapitaIncome'].sum().reset_index().sort_values(by=['State', 'Name'])
# per_cap_entry = census_df.groupby(['Name', 'State']).size().reset_index(name='PerCapEntry').sort_values(by=['State', 'Name'])
Covid_monthly['Deaths'] = deaths_group['Deaths']


In [113]:
Covid_monthly

Unnamed: 0,Name,State,Month,Cases,Deaths
0,Autauga,Alabama,2020-03,43,0.0
1,Autauga,Alabama,2020-04,731,45.0
2,Autauga,Alabama,2020-05,3660,107.0
3,Autauga,Alabama,2020-06,11110,228.0
4,Autauga,Alabama,2020-07,24241,551.0
...,...,...,...,...,...
37252,Weston,Wyoming,2020-10,2730,0.0
37253,Weston,Wyoming,2020-11,9688,12.0
37254,Weston,Wyoming,2020-12,13656,62.0
37255,Weston,Wyoming,2021-01,17420,114.0


In [118]:
Covid_monthly[(Covid_monthly['Name'] == 'Malheur') & (Covid_monthly['State'] == 'Oregon') & (Covid_monthly['Month'] == '2020-08')]

Unnamed: 0,Name,State,Month,Cases,Deaths
25897,Malheur,Oregon,2020-08,28163,459.0


In [119]:
Covid_monthly[(Covid_monthly['Name'] == 'Malheur') & (Covid_monthly['State'] == 'Oregon') & (Covid_monthly['Month'] == '2021-02')]

Unnamed: 0,Name,State,Month,Cases,Deaths
25903,Malheur,Oregon,2021-02,65951,1137.0


In [117]:
Covid_monthly[(Covid_monthly['Name'] == 'Malheur') & (Covid_monthly['State'] == 'Oregon') & (Covid_monthly['Month'] == '2021-02')]

Unnamed: 0,Name,State,Month,Cases,Deaths
25903,Malheur,Oregon,2021-02,65951,1137.0


In [None]:
Covid_summary