# Testing Sites Data Frame

In [57]:
import pandas as pd
import numpy as np
pd.set_option('display.max_rows', 150)

In [58]:
df = pd.read_csv('COVID-19_Testing_Sites.csv')

In [59]:
print(df.shape)
df.head()

(132, 5)


Unnamed: 0,Facility,Phone,Address,Web Site,Location
0,ACCESS Kedzie Family Health Center,(800) 836-7633,"3229-3243 W 47th Pl Chicago, IL 60632",https://www.achn.net,
1,Howard Brown Health 63rd St,(773) 388-1600,"641 W 63rd St Chicago, IL 60621",https://howardbrown.org,POINT (-87.64124500000001 41.77981400000001)
2,Norwegian American Hospital,(773) 292-8363,"1044 N Francisco Ave Chicago, IL 60622",https://www.nahospital.org/,POINT (-87.699288 41.900481)
3,Aayu Clinics - Lakeview,(773) 227-3669,"1645 A W School St Chicago, IL 60657",https://www.aayuclinics.com/services-1,POINT (-87.670228 41.941508)
4,Michigan Avenue Primary Care,(312) 994-3000,"180 N Michigan Ave #1720 Chicago, IL 60601",https://www.michiganavenueprimarycare.com/covi...,POINT (-87.624569 41.885151)


In [60]:
df = df[['Facility', 'Address', 'Location']]

In [61]:
df.Location[0] = 'POINT (-87.705330 41.805860)'

In [62]:
# Dropping Howard Brown Health Mobile since there is no specific location attached to the testing site
df.dropna(inplace=True)
df.shape

(131, 3)

In [63]:
# import pickle with zip code coordinates
import pickle
zip_coords = pd.read_pickle(r'2_week_pos.pickle')

In [64]:
zip_coords.head()

Unnamed: 0,ZCTA,posRate,coords
0,60601,0.0407524,POINT (-87.622844 41.886262)
1,60602,0.0722892,POINT (-87.628309 41.883136)
2,60603,0.0149254,POINT (-87.625473 41.880112)
3,60604,0.0133333,POINT (-87.629029 41.878153)
4,60605,0.0373884,POINT (-87.623449 41.867824)


In [65]:
import geopandas as gpd
from scipy.spatial import cKDTree
from shapely.geometry import Point
from shapely import wkt

# Coercing each geometry column from string to geometry object
df['Location'] = df['Location'].apply(wkt.loads)
zip_coords['coords'] = zip_coords['coords'].apply(wkt.loads)

# Creating geometry dataframes
gpd1 = gpd.GeoDataFrame(df, geometry='Location')

gpd2 = gpd.GeoDataFrame(zip_coords, geometry='coords')

# Defining function for calculating nearest distance from each zip code to a testing center
def ckdnearest(gdA, gdB):
    nA = np.array(list(gdA.geometry.apply(lambda x: (x.x, x.y))))
    nB = np.array(list(gdB.geometry.apply(lambda x: (x.x, x.y))))
    btree = cKDTree(nB)
    dist, idx = btree.query(nA, k=1)
    gdf = pd.concat(
        [gdA.reset_index(drop=True), gdB.loc[idx, gdB.columns != 'geometry'].reset_index(drop=True),
         pd.Series(dist, name='dist')], axis=1)
    return gdf

# Reversing the order so that we get distance from zip codes to testing centers instead of testing 
# centers to zip codes
df_distance = ckdnearest(gpd2, gpd1)

In [66]:
df_distance.head()

Unnamed: 0,ZCTA,posRate,coords,Facility,Address,Location,dist
0,60601,0.0407524,POINT (-87.62284 41.88626),Near North Health Service Corporation: Winfiel...,"1276 N Clybourn Ave Chicago, IL 60610",POINT (-87.64184 41.90541),0.002052
1,60602,0.0722892,POINT (-87.62831 41.88314),Near North Health Service Corporation: Winfiel...,"1276 N Clybourn Ave Chicago, IL 60610",POINT (-87.64184 41.90541),0.004248
2,60603,0.0149254,POINT (-87.62547 41.88011),Near North Health Service Corporation: Winfiel...,"1276 N Clybourn Ave Chicago, IL 60610",POINT (-87.64184 41.90541),0.005119
3,60604,0.0133333,POINT (-87.62903 41.87815),Cedar Crosse Research Center,"800 S Wells St Suite M-15 Chicago, IL 60607",POINT (-87.63355 41.87140),0.008123
4,60605,0.0373884,POINT (-87.62345 41.86782),Clear Wellness Group,"1605 S Michigan Ave #1 Chicago, IL 60616",POINT (-87.62399 41.85967),0.008169


In [67]:
df_distance.shape

(59, 7)

In [68]:
# trim this down to only include distance, zip code, and posRate
dist_posRate = df_distance[['ZCTA', 'posRate', 'dist']]

In [69]:
import pickle
pickle_out = open('dist_posRate.pickle', 'wb')
pickle.dump(dist_posRate, pickle_out)
pickle_out.close()