# Two-Week Average Positivity Rate by Zip Code

In [89]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
pd.set_option('display.max_columns', 100)

In [105]:
df = pd.read_csv('COVID-19_Cases__Tests__and_Deaths_by_ZIP_Code.csv')

In [106]:
df['ZIP Code'].unique()

array(['Unknown', '60601', '60602', '60603', '60604', '60605', '60606',
       '60607', '60608', '60609', '60611', '60610', '60612', '60613',
       '60614', '60619', '60827', '60615', '60621', '60616', '60617',
       '60640', '60652', '60622', '60618', '60628', '60620', '60623',
       '60624', '60625', '60626', '60666', '60629', '60630', '60631',
       '60632', '60661', '60633', '60634', '60642', '60636', '60637',
       '60638', '60639', '60641', '60643', '60644', '60645', '60646',
       '60647', '60649', '60651', '60653', '60654', '60655', '60656',
       '60657', '60659', '60660', '60707'], dtype=object)

In [107]:
# Removing zip codes that are not in chicago
df = df[(df['ZIP Code']!= 'Unknown')]

In [108]:
# Confining the data to just the last 2 weeks
df = df[(df['Week Number']==42)|(df['Week Number']==43)]

In [109]:
df

Unnamed: 0,ZIP Code,Week Number,Week Start,Week End,Cases - Weekly,Cases - Cumulative,Case Rate - Weekly,Case Rate - Cumulative,Tests - Weekly,Tests - Cumulative,Test Rate - Weekly,Test Rate - Cumulative,Percent Tested Positive - Weekly,Percent Tested Positive - Cumulative,Deaths - Weekly,Deaths - Cumulative,Death Rate - Weekly,Death Rate - Cumulative,Population,Row ID,ZIP Code Location
8,60601,43,10/18/2020,10/24/2020,16.0,273.0,109.0,1860.3,538.0,8137,3666,55448.0,0.0,0.0,0,6,0.0,40.9,14675,60601-43,POINT (-87.622844 41.886262)
11,60602,43,10/18/2020,10/24/2020,1.0,29.0,80.0,2331.2,32.0,898,2572,72186.5,0.1,0.1,0,0,0.0,0.0,1244,60602-43,POINT (-87.628309 41.883136)
23,60601,42,10/11/2020,10/17/2020,36.0,257.0,245.0,1751.3,738.0,7599,5029,51781.9,0.0,0.0,0,6,0.0,40.9,14675,60601-42,POINT (-87.622844 41.886262)
24,60602,42,10/11/2020,10/17/2020,5.0,28.0,402.0,2250.8,51.0,866,4100,69614.1,0.1,0.1,0,0,0.0,0.0,1244,60602-42,POINT (-87.628309 41.883136)
48,60603,42,10/11/2020,10/17/2020,1.0,15.0,85.0,1277.7,36.0,533,3066,45400.3,0.0,0.0,0,0,0.0,0.0,1174,60603-42,POINT (-87.625473 41.880112)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1955,60666,43,10/18/2020,10/24/2020,,,,,0.0,0,0,0.0,0.0,0.0,0,0,0.0,0.0,0,60666-43,POINT (-87.896371 41.979511)
1988,60707,42,10/11/2020,10/17/2020,45.0,680.0,105.0,1580.7,,4389,0,10202.5,0.0,0.1,0,13,0.0,30.2,43019,60707-42,POINT (-87.808283 41.921777)
1989,60707,43,10/18/2020,10/24/2020,29.0,709.0,67.0,1648.1,,4389,0,10202.5,0.0,0.1,0,13,0.0,30.2,43019,60707-43,POINT (-87.808283 41.921777)
2020,60827,42,10/11/2020,10/17/2020,4.0,116.0,14.0,405.9,,808,0,2827.4,0.0,0.1,0,8,0.0,28.0,28577,60827-42,POINT (-87.633087 41.650765)


In [94]:
# Calculate percentage to get a more granular percentage than that which is already provided in the dataset
two_wk_avg_pos = pd.DataFrame(df.groupby(['ZIP Code'])['Cases - Weekly'].sum()/df.groupby(['ZIP Code'])['Tests - Weekly'].sum())

In [95]:
# Convert to dataframe to merge into census dataframe
two_wk_avg_pos.reset_index(inplace=True)
two_wk_avg_pos.columns = ['ZCTA', 'posRate']

In [96]:
# Filling null value with 0 for zero confirmed cases in OHare (because no residents)
two_wk_avg_pos.posRate = two_wk_avg_pos.posRate.fillna('0')

In [97]:
# Removing duplicate zip codes
df = df.drop_duplicates(subset=['ZIP Code'], keep='first')

In [98]:
# Add in zip coordinates. I'll use this to later calculate distance to testing centers.
# Creating individual df with zip code and zip code location
coords = df[['ZIP Code', 'ZIP Code Location']]

In [99]:
# Renaming columns for merge
coords.columns = ['ZCTA', 'coords']

In [100]:
# merging 2-week average positivity df with zip coordinates
coords_posRate = pd.merge(two_wk_avg_pos, coords, on='ZCTA')

In [104]:
coords_posRate

Unnamed: 0,ZCTA,posRate,coords
0,60601,0.0407524,POINT (-87.622844 41.886262)
1,60602,0.0722892,POINT (-87.628309 41.883136)
2,60603,0.0149254,POINT (-87.625473 41.880112)
3,60604,0.0133333,POINT (-87.629029 41.878153)
4,60605,0.0373884,POINT (-87.623449 41.867824)
5,60606,0.0445104,POINT (-87.63676 41.882634)
6,60607,0.0382979,POINT (-87.652727 41.876104)
7,60608,0.056224,POINT (-87.670366 41.849879)
8,60609,0.0751295,POINT (-87.653382 41.812017)
9,60610,0.0348957,POINT (-87.63581 41.90455)


In [103]:
# Pickle data frame to merge later
import pickle
pickle_out = open('2_week_pos.pickle', 'wb')
pickle.dump(coords_posRate, pickle_out)
pickle_out.close()