In [6]:
# script goal: using geopandas, select all points within buffer distance of line
# and sum some data on them; goal is that it will be faster than using ESRI.

import os
import geopandas as gpd

crs_sacog = 2226

line_in = r"/Users/darrenconly/GIS/GeoJSON_samples/line_stockton_bl_wgs84.geojson"
pts_in = r"/Users/darrenconly/GIS/GeoJSON_samples/Sacramento_Dispatch_Data_2018.geojson"

# make dataframes and convert to CRS for SACOG region
df_line = gpd.read_file(line_in).to_crs(epsg=crs_sacog)
df_pts = gpd.read_file(pts_in).to_crs(epsg=crs_sacog)

df_line.head()


Unnamed: 0,geometry
0,"LINESTRING (6719105.253 1958799.477, 6717862.7..."


In [25]:
# select all points within buffer distance of line

buff_dist_ft = 500


# IMPORTANT: when doing the buffer, you must specify "unary_union",
# which converts the geoseries created by buffer() into a single polygon object
# If you don't do unary_union, the within() operator will only return False values.
buffer = df_line.buffer(buff_dist_ft).unary_union

df_selection = df_pts.loc[df_pts.geometry.within(buffer) == True]
df_selection.head()

Unnamed: 0,OBJECTID,Record_ID,Call_Type,Description,Reporting_Officer,Unit_ID,Report_Created,Location,Police_District,Beat,...,X_Coordinate,Y_Coordinate,Day_of_Week,Occurence_DateTime,Received_DateTime,Dispatch_DateTime,Enroute_DateTime,At_Scene_DateTime,Clear_DateTime,geometry
3,396,6817467,983,CHECK ON HAZARD,1021.0,1B79,N,STOCKTON BLVD / 14TH AVE,6,6B,...,6719063,1958742,Mon,2018-01-01T21:47:15,2018-01-01T21:47:15,2018-01-01T21:47:15,2018-01-01T21:47:15,2018-01-01T21:47:15,2018-01-01T21:53:24,POINT (6719059.225 1958743.828)
7,1010,6817994,TSTOP,TRAFFIC STOP,1004.0,1A77,N,STOCKTON BLVD / 11TH AVE,6,6B,...,6718676,1959603,Tue,2018-01-02T10:01:25,2018-01-02T10:01:25,2018-01-02T10:01:25,2018-01-02T10:01:25,2018-01-02T10:01:25,2018-01-02T10:12:24,POINT (6718672.225 1959604.828)
8,1021,6818414,SSTOP,SUBJECT STOP,1021.0,1B79,N,3631 STOCKTON BLVD,6,6B,...,6718862,1959435,Tue,2018-01-02T19:10:53,2018-01-02T19:10:53,2018-01-02T19:10:53,2018-01-02T19:10:53,2018-01-02T19:10:53,2018-01-02T19:46:33,POINT (6718858.225 1959436.828)
10,1270,6818232,415,DISTURBANCE-CLARIFY,,,N,8TH AVE / 45TH ST,6,6A,...,6717697,1960621,Tue,2018-01-02T16:01:41,2018-01-02T16:01:41,,,,2018-01-02T16:03:32,POINT (6717693.226 1960622.828)
14,1475,6820158,415,DISTURBANCE-CLARIFY,1016.0,1C65,N,4600 BROADWAY,6,6B,...,6718207,1961178,Wed,2018-01-04T02:37:42,2018-01-04T02:37:42,2018-01-04T02:40:44,2018-01-04T02:40:53,2018-01-04T02:48:14,2018-01-04T03:25:20,POINT (6718203.226 1961179.827)


In [28]:
# get aggregated data value of the points
df_selection.columns


Index(['OBJECTID', 'Record_ID', 'Call_Type', 'Description',
       'Reporting_Officer', 'Unit_ID', 'Report_Created', 'Location',
       'Police_District', 'Beat', 'Grid', 'X_Coordinate', 'Y_Coordinate',
       'Day_of_Week', 'Occurence_DateTime', 'Received_DateTime',
       'Dispatch_DateTime', 'Enroute_DateTime', 'At_Scene_DateTime',
       'Clear_DateTime', 'geometry'],
      dtype='object')