# Compile Beats Dataset

In [17]:
#library imports: 
import numpy as np
import pandas as pd
import geopandas as gpd
import geoplot
import libpysal as lp
import shapely

In [18]:
#Run helper script to get helper functions
%run Geoprocessing.ipynb

../raw_data/Boundaries - Census Blocks - 2010.geojson EPSG:3435


# Importation and Data Setup:

In [19]:
#Read Data
beats = read_spatial("../raw_data/Boundaries - Police Beats (current).geojson")
l = read_spatial("../raw_data/CTA_RailStations")
bus = read_spatial("../raw_data/CTA_BusStops")
metra = read_spatial("../raw_data/Metra_Stations")
streets = read_spatial("../raw_data/Street Center Lines.geojson")
bus_data = read_spatial("../intermediate_data/bus_geog")

#Separate business data
rest = bus_data[bus_data["category"]=="restaurant"]
bar = bus_data[bus_data["category"]=="bar"]
business = bus_data[bus_data["category"]=="business"]
daycare = bus_data[bus_data["category"]=="daycare"]
entertainment = bus_data[bus_data["category"]=="entertainment"]

../raw_data/Boundaries - Police Beats (current).geojson EPSG:3435
../raw_data/CTA_RailStations epsg:3435
../raw_data/CTA_BusStops EPSG:3435
../raw_data/Metra_Stations epsg:3435
../raw_data/Street Center Lines.geojson EPSG:3435
../intermediate_data/bus_geog EPSG:3435


# Aggregate Point to Beats Data

In [20]:
beats = join_to_beats(beats, "beat_num", l, "count_l_stops")
beats = join_to_beats(beats, "beat_num", bus, "count_bus_stops")
beats = join_to_beats(beats, "beat_num", metra, "count_metra_stops")
beats = join_to_beats(beats, "beat_num", rest, "count_restaurants")
beats = join_to_beats(beats, "beat_num", bar, "count_bars")
beats = join_to_beats(beats, "beat_num", daycare, "count_daycares")
beats = join_to_beats(beats, "beat_num", entertainment, "count_entertainment")
beats = join_to_beats(beats, "beat_num", business, "count_businesses")

# Handle Street Data

In [21]:
#Filter Out non-pedestrian Streets (ie highways):
#Syntax help: https://www.kite.com/python/answers/how-to-filter-a-pandas-dataframe-with-a-list-by-%60in%60-or-%60not-in%60-in-python
streets_do_not_want = ["ER", "EXPY", "HWY", "PKWY", "ORD", "XR", "TOLL"]
street_filter = ~streets.street_typ.isin(streets_do_not_want)
streets = streets[street_filter]

In [22]:
#Spatial operation to get all strings into exactly one polygon
#Help on overlay code here::https://stackoverflow.com/questions/60794994/geopandas-split-lines-at-intersection-with-polygons-retain-polygon-id-in-new
#Help with renaming after groupby: https://stackoverflow.com/questions/44416287/renaming-columns-after-group-by-and-sum-in-pandas-dataframe

#First: separate line segments so that they fall into exactly one police beat
#Second: same spatial join as above
#Third: Compute distance (in feet) of each segment
#Fourth/Fifth: Keep only the beat_num and distance columns
#Sixth:group by beat and sum all distances in the beat
#seventh:Add the column with the summarized distances back to the beat dataset


streets_separated = gpd.overlay(streets, beats, how="union", keep_geom_type=False).explode().reset_index(drop=True)
streets_separated = gpd.sjoin(streets_separated, 
                             beats[["beat_num", "geometry"]], 
                             how="inner", 
                             op='intersects')
streets_separated["distance"]= streets_separated["geometry"].length
streets_separated = streets_separated[["beat_num_left", "distance"]]
streets_separated.rename(columns={"beat_num_left":"beat_num"}, inplace=True)
streets_separated = streets_separated.groupby("beat_num").sum("distance").reset_index().rename(columns={'distance':'road_distance_ft'})
beats = beats.merge(streets_separated, how ="left", on="beat_num")

# Crosswalk to Beats From Blocks: 

In [29]:
#Create Crosswalk From Block to Beat:
#Read Block spatial data and population by block
#Then join the two
blocks = read_spatial("../raw_data/Boundaries - Census Blocks - 2010.geojson")
pop_10 = pd.read_csv("../raw_data/Population_by_2010_Census_Block.csv", 
                     dtype={"CENSUS BLOCK FULL":"object", "CENSUS BLOCK":"object"})
blocks = blocks.merge(pop_10, left_on="geoid10", right_on="CENSUS BLOCK FULL", how="left")

../raw_data/Boundaries - Census Blocks - 2010.geojson EPSG:3435


In [32]:
beats = convert_block_to_beat(blocks, "TOTAL POPULATION")

# Calculate Distance to Nearest:

In [33]:
beats["centroid"]=beats.centroid
beats = dist_to_nearest(beats, police_station, "centroid", "dist_to_police")
beats = dist_to_nearest(beats, hospital, "centroid", "dist_to_hospital")

# Spatial Lag

In [None]:
#NEED TO DO THIS once we get crime code

# Write Data

In [34]:
beats.columns
#ADD MORE HERE WHEN WE DO SPATIAL LAG!
beats_to_write = beats[['beat', 'beat_num', 'district', 'sector', 'count_l_stops',
                'count_bus_stops', 'count_metra_stops', 'count_restaurants',
       'count_bars', 'count_daycares', 'count_entertainment',
       'count_businesses', 'road_distance_ft', 'TOTAL POPULATION',
       'dist_to_police', 'dist_to_hospital']]

beats_to_write.to_csv("../intermediate_data/beats.csv")



Unnamed: 0,beat,beat_num,district,sector,count_l_stops,count_bus_stops,count_metra_stops,count_restaurants,count_bars,count_daycares,count_entertainment,count_businesses,road_distance_ft,TOTAL POPULATION,dist_to_police,dist_to_hospital
0,1,1713,17,1,3.0,26.0,0.0,63.0,9.0,6.0,1.0,92.0,169276.405792,13283.675264,5454.068890,3258.006066
1,0,3100,31,0,0.0,93.0,0.0,0.0,0.0,0.0,0.0,0.0,488603.452752,56.714879,14303.236540,5505.368479
2,5,1651,16,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,24133.489436,0.000000,38726.686536,25868.283862
3,1,1914,19,1,0.0,43.0,0.0,48.0,6.0,4.0,3.0,76.0,124959.498246,10907.560838,6630.602051,504.915576
4,1,1915,19,1,0.0,44.0,0.0,25.0,5.0,2.0,2.0,53.0,255537.190832,12773.552374,4192.196166,1499.099104
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
272,1,0314,03,1,0.0,32.0,1.0,4.0,0.0,2.0,0.0,31.0,146501.114666,6933.302291,6089.593215,3607.271850
273,2,0825,08,2,0.0,24.0,0.0,25.0,1.0,1.0,0.0,44.0,185910.945625,7765.801732,6275.886974,4468.936600
274,1,0313,03,1,0.0,29.0,0.0,11.0,0.0,3.0,0.0,26.0,122982.936047,5786.813132,5850.013614,2680.003108
275,2,0823,08,2,0.0,46.0,0.0,68.0,6.0,6.0,0.0,135.0,293583.479638,22290.925549,333.560711,5770.857541
