#### DRAFT of methodology three (population-weighted centroids and average public transit time)

In [52]:
GOOGLE_TOKEN = ""


In [2]:
import pandas as pd
import numpy as np
import geopandas as gpd
import requests

In [40]:
from utils.get_data import get_lib_gdf, get_ct_gdf
from utils.plotting import creating_foliumn_map
from utils.acs_agg import acs_cleaning, acs_join

In [4]:
lib_file_path = "../data/library_locations.csv"
libs = get_lib_gdf(lib_file_path)

ct_file_path = "../data/census_tract_boundaries.geojson"
cts = get_ct_gdf(ct_file_path)

In [5]:
CRS = "EPSG:4326"
calc_CRS = "EPSG:3857"

In [6]:
libs = libs.to_crs(CRS)

In [7]:
cts.loc[:, 'tractce10'] = cts.loc[:, 'tractce10'].astype('int').astype('str')


#### tract-library assignments

In [8]:
gdf = cts.copy()
library_df = libs.copy()

In [9]:
# join in population-weighted centorids here (in CRS 4326)
# https://www.census.gov/geographies/reference-files/time-series/geo/centers-population.html

pw_centroids = pd.read_csv("https://www2.census.gov/geo/docs/reference/cenpop2020/tract/CenPop2020_Mean_TR17.txt")
pw_centroids = pw_centroids[pw_centroids.loc[:, 'COUNTYFP']==31]
pw_centroids.drop(columns=['STATEFP', 'COUNTYFP'], inplace=True)
pw_centroids.columns = map(str.lower, pw_centroids.columns)
pw_centroids.columns = pw_centroids.columns.str.strip()
pw_centroids.rename(columns = {'tractce':'tractce10'}, inplace=True)
pw_centroids.loc[:, 'tractce10'] = pw_centroids.loc[:, 'tractce10'].astype('str')

In [49]:
gdf = pd.merge(gdf, pw_centroids, on='tractce10', how='left')
gdf.loc[:, 'centroid'] = gpd.points_from_xy(gdf['longitude'], gdf['latitude'])
gdf = gdf.to_crs(calc_CRS)

In [11]:
library_df = library_df.to_crs(calc_CRS)
library_df.loc[:, 'bufferzone'] = library_df['geometry'].buffer(1609)
library_df.set_geometry('bufferzone', inplace=True)
library_df = library_df.to_crs(CRS)

In [12]:
# final result is the intersection dataset of census tracts within a bufferzone
intersect_df = gpd.sjoin(gdf, library_df, how='left', predicate='intersects')

#### calculate transit time

In [13]:
intersect_df.loc[:, 'lib_geometry'] = gpd.points_from_xy(intersect_df.loc[:, 'lon'],\
                                                            intersect_df.loc[:, 'lat'], crs=CRS) #.to_crs(calc_CRS)


In [14]:
ENDPOINT = "https://maps.googleapis.com/maps/api/distancematrix/json"

def transit_time(tract, library):
    try:
        slat = str(tract.y)
        slng = str(tract.x)
        elat = str(library.y)
        elng = str(library.x)

        url = ENDPOINT + "?origins=" + slat + "%2C" + slng + "&mode=transit" + "&destinations=" + elat + "%2C" + elng + "&key=" + GOOGLE_TOKEN
        response = requests.get(url)
        results = response.json()
        time = results.get('rows')[0].get('elements')[0].get('duration').get('value')
    except:
        time = np.nan
    return time

In [16]:
# code for getting intersect_df: 

intersect_df.loc[:, 'time'] = intersect_df.apply(lambda row: transit_time(row['lib_geometry'],\
                                                                                    row['centroid']), axis=1)
# load saved intersect_df
# intersect_df = pd.read_csv("../output/intersect_df.csv")

#### Remaining census tracts not overlapping the lib 1-mile bufferzone 

In [18]:
outside_df = intersect_df[intersect_df.isnull().any(axis=1)]
outside_df = outside_df[~outside_df.loc[:, 'namelsad10'].isin(['Census Tract 9800','Census Tract 7706.02'])].reset_index()


In [19]:
outside = intersect_df[intersect_df.isnull().any(axis=1)].loc[:, ['namelsad10',
       'geoid10', 'geometry_left', 'centroid']]

In [23]:
outside_gdf = gpd.GeoDataFrame(outside, geometry="centroid", crs=CRS)


In [21]:
library_df = library_df.to_crs(calc_CRS)
library_df.loc[:, 'bufferzone'] = library_df['geometry'].buffer(9654)
library_df.set_geometry('bufferzone', inplace=True)
library_df = library_df.to_crs(CRS)

In [24]:
intersect_df_remaining = gpd.sjoin(outside_gdf, library_df, how='left', predicate='intersects')

In [25]:
intersect_df_remaining

Unnamed: 0,namelsad10,geoid10,geometry_left,centroid,index_right,name,hours of operation,address,city,state,zip,phone,website,location,lat,lon,geometry
3,Census Tract 8412,17031841200,"MULTIPOLYGON (((-87.68813 41.85569, -87.68816 ...",POINT (-87.68193 41.85498),65.0,Gage Park,"Mon. & Wed., 10-6; Tues. & Thurs., Noon-8; Fri...",2807 W. 55th St.,Chicago,IL,60632.0,(312) 747-0032,https://www.chipublib.org/locations/29/,"(41.79357525220078, -87.69413004003073)",41.793575,-87.694130,POINT (-9762065.902 5130107.997)
3,Census Tract 8412,17031841200,"MULTIPOLYGON (((-87.68813 41.85569, -87.68816 ...",POINT (-87.68193 41.85498),80.0,Sherman Park,"Mon. & Wed., Noon-8; Tues. & Thurs., 10-6; Fri...",5440 S. Racine Ave.,Chicago,IL,60609.0,(312) 747-0477,https://www.chipublib.org/locations/64/,"(41.79476901885989, -87.65502837616037)",41.794769,-87.655028,POINT (-9757713.124 5130286.242)
3,Census Tract 8412,17031841200,"MULTIPOLYGON (((-87.68813 41.85569, -87.68816 ...",POINT (-87.68193 41.85498),33.0,Archer Heights,"Mon. & Wed., Noon-8; Tues. & Thurs., 10-6; Fri...",5055 S. Archer Ave.,Chicago,IL,60632.0,(312) 747-9241,https://www.chipublib.org/locations/5/,"(41.80110836194246, -87.72648385568911)",41.801108,-87.726484,POINT (-9765667.512 5131232.845)
3,Census Tract 8412,17031841200,"MULTIPOLYGON (((-87.68813 41.85569, -87.68816 ...",POINT (-87.68193 41.85498),21.0,Back of the Yards,"Mon. & Wed., 10-6; Tues. & Thurs., Noon-8; Fri...",2111 W. 47th St.,Chicago,IL,60609.0,(312) 747-9595,https://www.chipublib.org/locations/9/,"(41.808384423666354, -87.67756963874196)",41.808384,-87.677570,POINT (-9760222.406 5132319.436)
3,Census Tract 8412,17031841200,"MULTIPOLYGON (((-87.68813 41.85569, -87.68816 ...",POINT (-87.68193 41.85498),61.0,Brighton Park,Closed for building improvements,4314 S. Archer Ave.,Chicago,IL,60632.0,(312) 747-0666,https://www.chipublib.org/locations/14/,"(41.81516027827089, -87.70273371160454)",41.815160,-87.702734,POINT (-9763023.658 5133331.438)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
778,Census Tract 1103,17031110300,"MULTIPOLYGON (((-87.77812 41.97922, -87.77812 ...",POINT (-87.78314 41.97441),54.0,Mayfair,"Mon. & Wed., Noon-8; Tues. & Thurs., 10-6; Fri...",4400 W. Lawrence Ave.,Chicago,IL,60630.0,(312) 744-1254,https://www.chipublib.org/locations/49/,"(41.968242773953044, -87.737968778247)",41.968243,-87.737969,POINT (-9766946.008 5156223.552)
778,Census Tract 1103,17031110300,"MULTIPOLYGON (((-87.77812 41.97922, -87.77812 ...",POINT (-87.78314 41.97441),6.0,Albany Park,"Mon. & Wed., 10-6; Tues. & Thurs., Noon-8; Fri...",3401 W. Foster Ave.,Chicago,IL,60625.0,(773) 539-5450,https://www.chipublib.org/locations/3/,"(41.97557881655979, -87.71361314512697)",41.975579,-87.713613,POINT (-9764234.751 5157321.971)
778,Census Tract 1103,17031110300,"MULTIPOLYGON (((-87.77812 41.97922, -87.77812 ...",POINT (-87.78314 41.97441),9.0,Oriole Park,"Mon. & Wed., Noon-8; Tues. & Thurs., 10-6; Fri...",7454 W. Balmoral Ave.,Chicago,IL,60656.0,(312) 744-1965,https://www.chipublib.org/locations/57/,"(41.978098111742476, -87.81420029973704)",41.978098,-87.814200,POINT (-9775432.062 5157699.212)
778,Census Tract 1103,17031110300,"MULTIPOLYGON (((-87.77812 41.97922, -87.77812 ...",POINT (-87.78314 41.97441),41.0,Roden,"Mon. & Wed., 10-6; Tues. & Thurs., Noon-8; Fri...",6083 N. Northwest Hwy.,Chicago,IL,60631.0,(312) 744-1478,https://www.chipublib.org/locations/60/,"(41.99199760467718, -87.79820642998901)",41.991998,-87.798206,POINT (-9773651.632 5159780.799)


In [26]:
intersect_df_remaining.loc[:, 'lib_geometry'] = gpd.points_from_xy(intersect_df_remaining.loc[:, 'lon'],\
                                                            intersect_df_remaining.loc[:, 'lat'], crs=CRS) #.to_crs(calc_CRS)

intersect_df_remaining.loc[:, 'time'] = intersect_df_remaining.apply(lambda row: transit_time(row['lib_geometry'],\
                                                                                        row['centroid']), axis=1)


In [27]:
time_df = pd.DataFrame(intersect_df_remaining, columns=['namelsad10', 'name', 'time'])


In [28]:
filter_df = intersect_df_remaining.groupby('namelsad10', as_index=False).agg({'time':'min'})


In [30]:
outside_merge = pd.merge(filter_df, intersect_df_remaining, how='left', on=['namelsad10', 'time'])


In [31]:
outside_merge = outside_merge[["namelsad10", "name"]]


In [32]:
outside_df.drop(columns=['hours of operation',
                         'address',
                         'city',
                         'state',
                         'zip',
                         'phone',
                         'website',
                         'location',
                         'lat', 
                         'lon', 
                         'name'], inplace=True)

In [33]:
outside_intersect_df = pd.merge(outside_df, outside_merge, how='left', on=['namelsad10'])

In [35]:
outside_df = pd.merge(outside_intersect_df, library_df.drop(columns=["geometry"]), how='left', on=['name'])

In [36]:
intersect_df_all = intersect_df.append(outside_df, ignore_index=True)

  intersect_df_all = intersect_df.append(outside_df, ignore_index=True)


In [38]:
intersect_df_all.loc[:, 'lib_geometry'] = gpd.points_from_xy(intersect_df_all.loc[:, 'lon'], intersect_df_all.loc[:, 'lat'])


In [39]:
intersect_df_all.loc[:, 'time'] = intersect_df_all.apply(lambda row: transit_time(row['lib_geometry'],\
                                                                                        row['centroid']), axis=1)


In [41]:
min_intersect_df = intersect_df_all.groupby('namelsad10')[['time']].min().reset_index()

In [42]:
joined_df = min_intersect_df.merge(intersect_df, how='left', on=['namelsad10', 'time'])


In [43]:
joined_df.shape

(815, 29)

In [44]:
joined_df = joined_df[~joined_df["time"].isnull()]

In [45]:
joined_df.shape

(783, 29)

In [51]:
joined_df

Unnamed: 0,namelsad10,time,statefp10,name10,commarea_n,commarea,geoid10,notes,tractce10,countyfp10,...,state,zip,phone,website,location,lat,lon,geometry_right,lib_geometry,color_x
0,Census Tract 1001,1169.0,,,,,,,,,...,,,,,,,,,,pink
1,Census Tract 1002,452.0,17,1002,10,10,17031100200,,100200,031,...,IL,60631.0,(312) 744-1478,https://www.chipublib.org/locations/60/,"(41.99199760467718, -87.79820642998901)",41.991998,-87.798206,POINT (-9773651.632 5159780.799),POINT (-87.79821 41.99200),lightgreen
2,Census Tract 1003,700.0,17,1003,10,10,17031100300,,100300,031,...,IL,60631.0,(312) 744-1478,https://www.chipublib.org/locations/60/,"(41.99199760467718, -87.79820642998901)",41.991998,-87.798206,POINT (-9773651.632 5159780.799),POINT (-87.79821 41.99200),lightgreen
3,Census Tract 1004,1226.0,17,1004,10,10,17031100400,,100400,031,...,IL,60656.0,(312) 744-1965,https://www.chipublib.org/locations/57/,"(41.978098111742476, -87.81420029973704)",41.978098,-87.814200,POINT (-9775432.062 5157699.212),POINT (-87.81420 41.97810),pink
4,Census Tract 1005,387.0,17,1005,10,10,17031100500,,100500,031,...,IL,60656.0,(312) 744-1965,https://www.chipublib.org/locations/57/,"(41.978098111742476, -87.81420029973704)",41.978098,-87.814200,POINT (-9775432.062 5157699.212),POINT (-87.81420 41.97810),pink
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
809,Census Tract 8439,849.0,17,8439,42,42,17031843900,Small area in CA 43,843900,031,...,IL,60649.0,(312) 747-5281,https://www.chipublib.org/locations/66/,"(41.76254141905116, -87.56387694448408)",41.762541,-87.563877,POINT (-9747566.193 5125475.402),POINT (-87.56388 41.76254),lightblue
810,Census Tract 901,1706.0,,,,,,,,,...,,,,,,,,,,pink
811,Census Tract 902,696.0,,,,,,,,,...,,,,,,,,,,pink
812,Census Tract 903,811.0,,,,,,,,,...,,,,,,,,,,pink


In [50]:
creating_foliumn_map(joined_df, gdf)

IndexError: single positional indexer is out-of-bounds

In [47]:
acs_filepath = "../data/census_cook_county_dta.csv"
acs_data = acs_cleaning(acs_filepath)
agg_df = acs_join(acs_data, joined_df)


ValueError: cannot convert float NaN to integer