# Script to Process Geospatial data to the Police Beat

Gabe Morrison

In [1]:
#library imports: 
import numpy as np
import pandas as pd
import geopandas as gpd

In [2]:
def read_spatial(path):
    '''
    Function to read spatial data and converts to ESPG 3435
    Input: path to the data file
    Output: a gpd object 
    '''
    file=gpd.read_file(path)
    file=file.to_crs("EPSG:3435")
    return file

In [3]:
beats = read_spatial("../raw_data/Boundaries - Police Beats (current).geojson")
l = read_spatial("../raw_data/CTA_RailStations")
bus = read_spatial("../raw_data/CTA_BusStops")
metra = read_spatial("../raw_data/Metra_Stations")

In [4]:
def join_to_beats(polygon_data, poly_unique_id, other_file, new_column_name):
    '''
    A function that creates a new beats gpd with an updated column 
        to count the number of points from the other_file 
    Inputs:
        polygon_data (gpd): the beats gpd (as a polygon)
        poly_unique_id (string): the unique identifier of the polygon spatial data
        other_file (gpd): another point-based gpd. The number of points in each beat will 
                    be added to the beats as a new column
        new_column_name (string): the name of the column to be added to the beats dataframe
    Output:
        the polygon_data dataframe updated with the new column
    Note: .size suggestion from here: https://stackoverflow.com/questions/19384532/get-statistics-for-each-group-such-as-count-mean-etc-using-pandas-groupby
    '''
    spatial_join = gpd.sjoin(other_file, 
                             polygon_data[[poly_unique_id, "geometry"]], 
                             how="inner", 
                             op='intersects')
    count = spatial_join.groupby(poly_unique_id).size().reset_index(name=new_column_name)
    updated_polygon_data = polygon_data.merge(count, on=poly_unique_id, how="left")
    updated_polygon_data[new_column_name].fillna(0, inplace=True)
    return updated_polygon_data

In [5]:
#Run above function to add point data to the beats
beats = join_to_beats(beats, "beat_num", l, "count_l_stops")
beats = join_to_beats(beats, "beat_num", bus, "count_bus_stops")
beats = join_to_beats(beats, "beat_num", metra, "count_metra_stops")

In [6]:
beats

Unnamed: 0,beat,beat_num,district,sector,geometry,count_l_stops,count_bus_stops,count_metra_stops
0,1,1713,17,1,"MULTIPOLYGON (((1155178.845 1934389.237, 11551...",3.0,26.0,0.0
1,0,3100,31,0,"MULTIPOLYGON (((1120121.641 1934005.453, 11201...",0.0,93.0,0.0
2,5,1651,16,5,"MULTIPOLYGON (((1100218.515 1934337.688, 10991...",0.0,0.0,0.0
3,1,1914,19,1,"MULTIPOLYGON (((1171459.214 1932310.572, 11716...",0.0,43.0,0.0
4,1,1915,19,1,"MULTIPOLYGON (((1173557.904 1930964.812, 11737...",0.0,44.0,0.0
...,...,...,...,...,...,...,...,...
272,1,0314,03,1,"MULTIPOLYGON (((1187301.942 1865532.830, 11873...",0.0,32.0,1.0
273,2,0825,08,2,"MULTIPOLYGON (((1162653.112 1865493.004, 11626...",0.0,24.0,0.0
274,1,0313,03,1,"MULTIPOLYGON (((1181918.041 1865379.952, 11820...",0.0,29.0,0.0
275,2,0823,08,2,"MULTIPOLYGON (((1158738.406 1864742.269, 11587...",0.0,46.0,0.0
