# NYC Accidents Data Exploration

In [14]:
# Import dependencies
import folium
import numpy as np
import pandas as pd

# Load accident data.
accident_data = pd.read_csv('./data/NYPD_Motor_Vehicle_Collisions_sampled.csv')

In [23]:
accident_data.head()

Unnamed: 0,DATE,TIME,BOROUGH,ZIP CODE,LATITUDE,LONGITUDE,LOCATION,ON STREET NAME,CROSS STREET NAME,OFF STREET NAME,...,CONTRIBUTING FACTOR VEHICLE 2,CONTRIBUTING FACTOR VEHICLE 3,CONTRIBUTING FACTOR VEHICLE 4,CONTRIBUTING FACTOR VEHICLE 5,UNIQUE KEY,VEHICLE TYPE CODE 1,VEHICLE TYPE CODE 2,VEHICLE TYPE CODE 3,VEHICLE TYPE CODE 4,VEHICLE TYPE CODE 5
0,06/18/2016,5:20,BRONX,10456.0,40.824067,-73.90871,"(40.8240665, -73.9087095)",EAST 163 STREET,3 AVENUE,,...,Unspecified,,,,3463614,PASSENGER VEHICLE,,,,
1,06/18/2016,7:10,BRONX,10472.0,40.826916,-73.87203,"(40.8269163, -73.8720302)",METCALF AVENUE,WATSON AVENUE,,...,Unspecified,,,,3464214,PASSENGER VEHICLE,PASSENGER VEHICLE,,,
2,06/18/2016,7:20,,,40.701455,-73.98962,"(40.7014547, -73.9896203)",,,,...,Unspecified,,,,3463782,PASSENGER VEHICLE,PASSENGER VEHICLE,,,
3,06/18/2016,7:30,,,,,,47 STREET,,,...,Unspecified,,,,3465413,PASSENGER VEHICLE,OTHER,,,
4,06/18/2016,7:45,QUEENS,11422.0,40.665256,-73.735334,"(40.665256, -73.7353338)",SOUTH CONDUIT AVENUE,FRANCIS LEWIS BOULEVARD,,...,Unspecified,,,,3463318,PASSENGER VEHICLE,PASSENGER VEHICLE,,,


In [95]:
accident_data.describe()

Unnamed: 0,ZIP CODE,LATITUDE,LONGITUDE,NUMBER OF PERSONS INJURED,NUMBER OF PERSONS KILLED,NUMBER OF PEDESTRIANS INJURED,NUMBER OF PEDESTRIANS KILLED,NUMBER OF CYCLIST INJURED,NUMBER OF CYCLIST KILLED,NUMBER OF MOTORIST INJURED,NUMBER OF MOTORIST KILLED,UNIQUE KEY
count,720386.0,771401.0,771401.0,975764.0,975764.0,975764.0,975764.0,975764.0,975764.0,975764.0,975764.0,975764.0
mean,10808.078445,40.722982,-73.923256,0.255354,0.001224,0.053982,0.000691,0.020909,7.6e-05,0.191262,0.00046,2029945.753199
std,566.952546,0.07737,0.086025,0.656131,0.03676,0.246789,0.026545,0.151044,0.008708,0.663912,0.024059,1515381.600845
min,10000.0,40.498949,-74.254532,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,22.0
25%,10075.0,40.6691,-73.979237,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,245746.75
50%,11204.0,40.723494,-73.933938,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3122593.5
75%,11236.0,40.765579,-73.869941,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3367943.25
max,11697.0,40.912869,-73.700597,43.0,5.0,15.0,2.0,6.0,1.0,43.0,5.0,3612908.0


In [15]:
# Num rows in data.
print(accident_data.count())

DATE                             10000
TIME                             10000
BOROUGH                           6396
ZIP CODE                          6395
LATITUDE                          6687
LONGITUDE                         6687
LOCATION                          6687
ON STREET NAME                    6610
CROSS STREET NAME                 5197
OFF STREET NAME                   2174
NUMBER OF PERSONS INJURED        10000
NUMBER OF PERSONS KILLED         10000
NUMBER OF PEDESTRIANS INJURED    10000
NUMBER OF PEDESTRIANS KILLED     10000
NUMBER OF CYCLIST INJURED        10000
NUMBER OF CYCLIST KILLED         10000
NUMBER OF MOTORIST INJURED       10000
NUMBER OF MOTORIST KILLED        10000
CONTRIBUTING FACTOR VEHICLE 1     9950
CONTRIBUTING FACTOR VEHICLE 2     8320
CONTRIBUTING FACTOR VEHICLE 3      700
CONTRIBUTING FACTOR VEHICLE 4      196
CONTRIBUTING FACTOR VEHICLE 5       40
UNIQUE KEY                       10000
VEHICLE TYPE CODE 1               9779
VEHICLE TYPE CODE 2      

## Map data

In [123]:
# Map data.

# Starting coordinates to load map view.
NYC_coordinates = (40.7142700, -74.0059700)

# Create Map object.
map = folium.Map(location=NYC_coordinates,
                     zoom_start=12)

# Plot accidents.
# Limit number of points to plot for testing.
MAX_RECORDS = 1000
marker_cluster = folium.MarkerCluster().add_to(map)
for row in accident_data[0:MAX_RECORDS].iterrows():
    # Only plot point if lat/long is available.
    if (not np.isnan(row[1]['LATITUDE']) and not np.isnan(row[1]['LONGITUDE'])):
        accident_metadata = """
                <ul>
                    <li><strong>On street</strong>: {0}</li>
                    <li><strong>Cross street</strong>: {1}</li>
                    <li><strong>Reason</strong>: {2}</li>
                </ul>""".format(
            str(row[1]['ON STREET NAME']), str(row[1]['CROSS STREET NAME']),
            str(row[1]['CONTRIBUTING FACTOR VEHICLE 1']))
        iframe = folium.element.IFrame(html=accident_metadata, width=250, height=100)
        popup = folium.Popup(iframe, max_width=2650)
        folium.Marker(
                location = [row[1]['LATITUDE'], row[1]['LONGITUDE']],
                icon = folium.Icon(color='red', icon='asterisk'),
                popup=popup).add_to(marker_cluster)

map