In [36]:
#Imports
import pandas as pd
import numpy as np
from collections import Counter

In [37]:
#Import dataset from CSV file
column_types = {
    "Crash Date": str,
    "Crash Time": str,     
    "Borough": str,      
    "Zip Code": str,  
    "Latitude": float,
    "Longitude": float, 
    "Location": str,
    "On Street Name": str, 
    "Cross Street Name": str, 
    "Off Street Name": str,         
    "Numbers of Persons injured": int,      
    "Numbers of Persons killed": int,         
    "Number of Pedestrians injured": int,   
    "Number of Pedestrians killed": int,         
    "Number of cyclists injured": int,    
    "Number of cyclists killed": int,         
    "Number of motorists injured": int,   
    "Number of motorists killed": int,       
    "Contributing Factor Vehicle 1": str,       
    "Contributing Factor Vehicle 2": str,  
    "Contributing Factor Vehicle 3": str,      
    "Contributing Factor Vehicle 4": str,    
    "Contributing Factor Vehicle 5": str,
    "Collision ID": int,
    "Vehicle Type Code 1": str,
    "Vehicle Type Code 2": str,
    "Vehicle Type Code 3": str,
    "Vehicle Type Code 4": str,
    "Vehicle Type Code 5": str
}
crashes = pd.read_csv("collisions.csv", dtype=column_types, low_memory=False)

In [38]:
#Define Grid over New York City
#Distance between these two points is about 71.43km,
#so about 50km lateral and 50km longitudinal
south_west_corner = (40.485347, -74.276931)
north_east_corner = (40.948379, -73.689515)
grid_dist = (north_east_corner[0] - south_west_corner[0]) / 500 #50km / 500 = 100m between each grid point

In [39]:
#For testing, remove when training
crashes = crashes.head(10000)

In [40]:
#Fitlter rows without latitude and longitude values or 0 values
crashes = crashes.dropna(subset=['LATITUDE', 'LONGITUDE'])
crashes = crashes[(crashes['LATITUDE'] != 0) & (crashes['LONGITUDE'] != 0)]

In [41]:
#Map lat and long in the dataset to the closest point on the grid
grid_points = []
for _, row in crashes.iterrows():
    lat = south_west_corner[0] + np.round((row['LATITUDE'] - south_west_corner[0]) / grid_dist) * grid_dist
    lon = south_west_corner[1] + np.round((row['LONGITUDE'] - south_west_corner[1]) / grid_dist) * grid_dist
    grid_points.append((lat, lon))
    print(lat, lon)

40.666855544 -73.866684648
40.683524696 -73.917618168
40.709454488 -73.956512856
40.867811432 -73.831494216
40.671485864 -73.89724476
40.751127368 -73.974108072
40.701119912 -73.888910184
40.676116184 -73.755556968
40.59647468 -74.001889992
40.666855544 -73.789821336
40.681672567999996 -73.975034136
40.651112456 -73.958364984
40.872441752 -73.904653272
40.783539608 -73.825011768
40.74927524 -73.993555416
40.74464492 -73.770373992
40.75390556 -73.885205928
40.803913016 -73.937065512
40.631665112 -74.088013944
40.623330536 -73.958364984
40.66592948 -73.984294776
40.687228952 -73.977812328
40.711306616 -73.950956472
40.817803976 -73.910209656
40.704824168 -73.90187508
40.784465672 -73.953734664
40.663151288 -73.960217112
40.720567256 -73.888910184
40.607587448 -74.138947464
40.8557726 -73.86946284
40.790022056 -73.939843704
40.64277788 -74.015780952
40.843733768 -73.924100616
40.894667288 -73.862054328
40.852068344 -73.9528086
40.862255048 -73.912987848
40.66592948 -73.900022952
40.766870

40.663151288 -73.848163368
40.780761416000004 -73.909283592
40.696489592 -73.962995304
40.617774151999996 -74.021337336
40.676116184 -73.914839976
40.627960856 -73.916692104
40.650186392 -73.930583064
40.81409972 -73.94817828000001
40.844659832000005 -73.914839976
40.715010872 -73.75370484
40.575175208 -74.104683096
40.634443304 -74.032450104
40.71223268 -73.899096888
40.842807704 -73.825011768
40.615922024 -74.08708788
40.80483908 -73.912061784
40.752053432000004 -74.000963928
40.867811432 -73.921322424
40.70297204 -73.964847432
40.853920472 -73.914839976
40.645556072 -73.987072968
40.653890648 -73.923174552
40.859476856 -73.904653272
40.835399192000004 -73.839828792
40.832621 -73.899096888
40.631665112 -73.9065054
40.68908108 -73.78611708
40.8557726 -73.88798412
40.773352904 -73.89261444
40.824286424 -73.873167096
40.829842808 -73.914839976
40.601105 -73.90187508
40.7400146 -73.791673464
40.865959304 -73.872241032
40.715936936 -73.987072968
40.82336036 -73.891688376
40.710380552 -73.

40.615922024 -73.983368712
40.678894376 -73.760187288
40.86040292 -73.894466568
40.711306616 -73.748148456
40.890036968000004 -73.91113572
40.80483908 -73.935213384
40.658520968 -73.890762312
40.586287976 -73.93428732
40.710380552 -73.955586792
40.836325256 -73.92502668
40.681672567999996 -74.004668184
40.663151288 -73.950030408
40.885406648 -73.900949016
40.516833176 -74.19728949600001
40.845585896 -73.91113572
40.72612364 -74.00374212
40.66129916 -73.92502668
40.71223268 -73.956512856
40.706676296 -73.923174552
40.825212488 -73.923174552
40.865959304 -73.868536776
40.586287976 -73.9528086
40.629812984 -73.935213384
40.766870456 -73.9065054
40.885406648 -73.834272408
40.623330536 -73.960217112
40.686302888 -73.92039636
40.606661384 -73.754630904
40.664077352 -73.734257496
40.669633736 -73.909283592
40.740940664 -73.90187508
40.719641192 -73.762039416
40.709454488 -73.937065512
40.708528424 -73.792599528
40.710380552 -73.98522084
40.88818484 -73.850015496
40.832621 -73.863906456
40.760

In [43]:
#Count how many accidents there are per point on the grid
accident_counts = Counter(grid_points)

for point, count in accident_counts.items():
    print(f"Grid Point {point}: {count} accidents")

Grid Point (40.666855544, -73.866684648): 1 accidents
Grid Point (40.683524696, -73.917618168): 1 accidents
Grid Point (40.709454488, -73.956512856): 2 accidents
Grid Point (40.867811432, -73.831494216): 2 accidents
Grid Point (40.671485864, -73.89724476): 2 accidents
Grid Point (40.751127368, -73.974108072): 5 accidents
Grid Point (40.701119912, -73.888910184): 1 accidents
Grid Point (40.676116184, -73.755556968): 1 accidents
Grid Point (40.59647468, -74.001889992): 1 accidents
Grid Point (40.666855544, -73.789821336): 2 accidents
Grid Point (40.681672567999996, -73.975034136): 2 accidents
Grid Point (40.651112456, -73.958364984): 1 accidents
Grid Point (40.872441752, -73.904653272): 1 accidents
Grid Point (40.783539608, -73.825011768): 2 accidents
Grid Point (40.74927524, -73.993555416): 1 accidents
Grid Point (40.74464492, -73.770373992): 2 accidents
Grid Point (40.75390556, -73.885205928): 1 accidents
Grid Point (40.803913016, -73.937065512): 4 accidents
Grid Point (40.631665112, -

In [None]:
#Create complete map containing all grid points, assign 0 where there is no entry in accident_counts. Use for NN training