In [1]:
# Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import seaborn as sns
import bokeh
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.plotting import figure
from bokeh.io import show, output_notebook, output_file
from bokeh.models import Legend
import folium
import random
from folium import plugins

In [2]:
# Retrieve "Motor Vehicle Collisions - Crashes" data
url = 'https://data.cityofnewyork.us/api/views/h9gi-nx95/rows.csv?accessType=DOWNLOAD'
df = pd.read_csv(url)
print(df.shape)
df.head()

  interactivity=interactivity, compiler=compiler, result=result)


(1673826, 29)


Unnamed: 0,CRASH DATE,CRASH TIME,BOROUGH,ZIP CODE,LATITUDE,LONGITUDE,LOCATION,ON STREET NAME,CROSS STREET NAME,OFF STREET NAME,...,CONTRIBUTING FACTOR VEHICLE 2,CONTRIBUTING FACTOR VEHICLE 3,CONTRIBUTING FACTOR VEHICLE 4,CONTRIBUTING FACTOR VEHICLE 5,COLLISION_ID,VEHICLE TYPE CODE 1,VEHICLE TYPE CODE 2,VEHICLE TYPE CODE 3,VEHICLE TYPE CODE 4,VEHICLE TYPE CODE 5
0,09/22/2016,16:07,,,40.788506,-73.98107,POINT (-73.98107 40.788506),WEST 84 STREET,,,...,Other Vehicular,Other Vehicular,Other Vehicular,,3525213,SPORT UTILITY / STATION WAGON,SPORT UTILITY / STATION WAGON,PASSENGER VEHICLE,PASSENGER VEHICLE,
1,10/11/2016,16:05,,,40.680244,-73.984146,POINT (-73.984146 40.680244),DOUGLASS STREET,,,...,Unspecified,,,,3539075,MOTORCYCLE,PASSENGER VEHICLE,,,
2,09/30/2016,10:21,,,40.688213,-73.919815,POINT (-73.919815 40.688213),BROADWAY,,,...,Driver Inattention/Distraction,,,,3530249,PASSENGER VEHICLE,,,,
3,10/13/2016,10:00,,,40.700665,-74.01542,POINT (-74.01542 40.700665),BROOKLYN BATTERY TUNNEL,,,...,Unspecified,,,,3543214,PASSENGER VEHICLE,PASSENGER VEHICLE,,,
4,09/23/2016,9:50,BROOKLYN,11226.0,40.64485,-73.96002,POINT (-73.96002 40.64485),OCEAN AVENUE,BEVERLEY ROAD,,...,Unspecified,,,,3526442,SPORT UTILITY / STATION WAGON,PASSENGER VEHICLE,,,


In [3]:
print(df.columns)

Index(['CRASH DATE', 'CRASH TIME', 'BOROUGH', 'ZIP CODE', 'LATITUDE',
       'LONGITUDE', 'LOCATION', 'ON STREET NAME', 'CROSS STREET NAME',
       'OFF STREET NAME', 'NUMBER OF PERSONS INJURED',
       'NUMBER OF PERSONS KILLED', 'NUMBER OF PEDESTRIANS INJURED',
       'NUMBER OF PEDESTRIANS KILLED', 'NUMBER OF CYCLIST INJURED',
       'NUMBER OF CYCLIST KILLED', 'NUMBER OF MOTORIST INJURED',
       'NUMBER OF MOTORIST KILLED', 'CONTRIBUTING FACTOR VEHICLE 1',
       'CONTRIBUTING FACTOR VEHICLE 2', 'CONTRIBUTING FACTOR VEHICLE 3',
       'CONTRIBUTING FACTOR VEHICLE 4', 'CONTRIBUTING FACTOR VEHICLE 5',
       'COLLISION_ID', 'VEHICLE TYPE CODE 1', 'VEHICLE TYPE CODE 2',
       'VEHICLE TYPE CODE 3', 'VEHICLE TYPE CODE 4', 'VEHICLE TYPE CODE 5'],
      dtype='object')


In [4]:
# Convert date column to datetime 
df['CRASH DATE'] = pd.to_datetime(df['CRASH DATE']) 

In [205]:
# Set date interval
year = 2019

interval_data = df.copy()

# The interval variable is used to set the new dates in the 
# interval_data DataFrame, so that only the ones with interval=true
# will be used.
interval_data = interval_data[interval_data['CRASH DATE'].dt.year == year]

In [206]:
print(df.shape, interval_data.shape)

(1673826, 29) (211470, 29)


In [207]:
# Create dataframe to use for bokeh plots, containing number of pedestrians, 
# cyclists, and motorists injured or killed in different time series
time_series_df = interval_data[['CRASH DATE', 'CRASH TIME', 'NUMBER OF PERSONS INJURED',
       'NUMBER OF PERSONS KILLED', 'NUMBER OF PEDESTRIANS INJURED',
       'NUMBER OF PEDESTRIANS KILLED', 'NUMBER OF CYCLIST INJURED',
       'NUMBER OF CYCLIST KILLED', 'NUMBER OF MOTORIST INJURED',
       'NUMBER OF MOTORIST KILLED']].copy()

In [208]:
# Create column to store hour of day
time_series_df['HOUR OF DAY'] = pd.to_datetime(time_series_df['CRASH TIME']).dt.hour.copy()

In [209]:
# Create column to store day of the week
time_series_df['WEEKDAY'] = pd.to_datetime(time_series_df['CRASH DATE']).dt.dayofweek.copy()

In [210]:
# Create column to store hour of the week
time_series_df['HOUR OF WEEK'] = time_series_df['WEEKDAY'] * 24 + time_series_df['HOUR OF DAY']

In [211]:
# Create column to store month 
time_series_df['MONTH'] = pd.to_datetime(time_series_df['CRASH DATE']).dt.month.copy()

In [212]:
# Merge number of injured and dead for pedestrians, persons, cyclists, and motorists
time_series_df['TOTAL'] = time_series_df['NUMBER OF PERSONS INJURED'] + time_series_df['NUMBER OF PERSONS KILLED']
time_series_df['PEDESTRIANS'] = time_series_df['NUMBER OF PEDESTRIANS INJURED'] + time_series_df['NUMBER OF PEDESTRIANS KILLED']
time_series_df['CYCLISTS'] = time_series_df['NUMBER OF CYCLIST INJURED'] + time_series_df['NUMBER OF CYCLIST KILLED']
time_series_df['MOTORISTS'] = time_series_df['NUMBER OF MOTORIST INJURED'] + time_series_df['NUMBER OF MOTORIST KILLED']

In [213]:
# Drop not needed columns
time_series_df = time_series_df.drop(['CRASH DATE', 'CRASH TIME', 'NUMBER OF PERSONS INJURED',
       'NUMBER OF PERSONS KILLED', 'NUMBER OF PEDESTRIANS INJURED',
       'NUMBER OF PEDESTRIANS KILLED', 'NUMBER OF CYCLIST INJURED',
       'NUMBER OF CYCLIST KILLED', 'NUMBER OF MOTORIST INJURED',
       'NUMBER OF MOTORIST KILLED'], axis=1)

In [214]:
# Reset indices after drop
time_series_df.reset_index(drop=True, inplace = True)

In [215]:
# The PERSON column is of type float, change to integer
time_series_df['TOTAL'] = time_series_df['TOTAL'].astype(int)
time_series_df.head()

Unnamed: 0,HOUR OF DAY,WEEKDAY,HOUR OF WEEK,MONTH,TOTAL,PEDESTRIANS,CYCLISTS,MOTORISTS
0,11,1,35,12,0,0,0,0
1,12,1,36,12,1,0,0,1
2,14,1,38,12,0,0,0,0
3,20,1,44,12,0,0,0,0
4,21,1,45,12,0,0,0,0


In [216]:
# Make base dataframe to store data for plots
# Make list to hold names for accidents
accident_types = list(time_series_df.columns[4:])
base_data = pd.DataFrame(columns = list(accident_types))

base_data.head()

Unnamed: 0,TOTAL,PEDESTRIANS,CYCLISTS,MOTORISTS


In [276]:
# Hour of day
hour_of_day_df = base_data.copy()
unique_hours = time_series_df['HOUR OF DAY'].unique().tolist()
unique_hours.sort()
# Range hours from 1-24 instead of 0-23 
unique_hours = [x+1 for x in unique_hours]
hour_of_day_df['HOUR OF DAY'] = unique_hours

# Days in week
day_in_week_df = base_data.copy()
unique_days = time_series_df['WEEKDAY'].unique().tolist()
unique_days.sort()
day_in_week_df['WEEKDAY'] = unique_days

# Hours in week
hour_of_week_df = base_data.copy()
unique_hours_week = time_series_df['HOUR OF WEEK'].unique().tolist()
unique_hours_week.sort()
# Range hours from 1-24 instead of 0-23 
unique_hours_week = [x+1 for x in unique_hours_week]
hour_of_week_df['HOUR OF WEEK'] = unique_hours_week

# Month in year
month_in_year_df = base_data.copy()
unique_months = time_series_df['MONTH'].unique().tolist()
unique_months.sort()
month_in_year_df['MONTH'] = unique_months

# Relative frequency
relative_freq = False

for accident_type in range(len(accident_types)):
    total_num_accidents_for_type = time_series_df[accident_types[accident_type]].sum()
    temp_hour_of_day_accident_type_df = time_series_df[['HOUR OF DAY', accident_types[accident_type]]].copy()
    temp_day_in_week_accident_type_df = time_series_df[['WEEKDAY', accident_types[accident_type]]].copy()
    temp_hour_in_week_accident_type_df = time_series_df[['HOUR OF WEEK', accident_types[accident_type]]].copy()
    temp_month_accident_type_df = time_series_df[['MONTH', accident_types[accident_type]]].copy()

    for hour in unique_hours:
        temp_df_hour = temp_hour_of_day_accident_type_df[temp_hour_of_day_accident_type_df['HOUR OF DAY'] == hour]
        num_accidents_for_hour = temp_df_hour[accident_types[accident_type]].sum()
        if relative_freq: 
            hour_of_day_df.iloc[hour-1,accident_type] = num_accidents_for_hour / total_num_accidents_for_type
        else:
            hour_of_day_df.iloc[hour-1,accident_type] = num_accidents_for_hour
    
    for day in unique_days:
        temp_df_day = temp_day_in_week_accident_type_df[temp_day_in_week_accident_type_df['WEEKDAY'] == day]
        num_accidents_for_day = temp_df_day[accident_types[accident_type]].sum()
        if relative_freq:
            day_in_week_df.iloc[day,accident_type] = num_accidents_for_day / total_num_accidents_for_type
        else:
            day_in_week_df.iloc[day,accident_type] = num_accidents_for_day
        
    for hour in unique_hours_week:
        temp_df_hour = temp_hour_in_week_accident_type_df[temp_hour_in_week_accident_type_df['HOUR OF WEEK'] == hour]
        num_accidents_for_hour = temp_df_hour[accident_types[accident_type]].sum()
        if relative_freq: 
            hour_of_week_df.iloc[hour-1,accident_type] = num_accidents_for_hour / total_num_accidents_for_type
        else:
            hour_of_week_df.iloc[hour-1,accident_type] = num_accidents_for_hour
        
    for month in unique_months:
        temp_df_month = temp_month_accident_type_df[temp_month_accident_type_df['MONTH'] == month]
        num_accidents_for_month = temp_df_month[accident_types[accident_type]].sum()
        if relative_freq:
            month_in_year_df.iloc[month-1,accident_type] = num_accidents_for_month / total_num_accidents_for_type
        else:
            month_in_year_df.iloc[month-1,accident_type] = num_accidents_for_month

days = ('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday')
for i, day in enumerate(day_in_week_df['WEEKDAY']):
    day_in_week_df.loc[i,'WEEKDAY'] = days[i]
    
months = ('January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December')
for i, day in enumerate(month_in_year_df['MONTH']):
    month_in_year_df.loc[i,'MONTH'] = months[i]

sources = []
hour_of_day_source = ColumnDataSource(hour_of_day_df)
sources.append(hour_of_day_df)
day_in_week_source = ColumnDataSource(day_in_week_df)
sources.append(day_in_week_df)
hour_of_week_source = ColumnDataSource(hour_of_week_df)
sources.append(hour_of_week_df)
month_in_year_source = ColumnDataSource(month_in_year_df)
sources.append(month_in_year_df)

In [278]:
# Prepare for Bokeh Plots
# Call output_notebook() to display bokeh in the notebook
output_notebook()

In [309]:
custom = ['#FF6666', '#6699FF', '#33CC00', '#FFCC33']
#palette = sns.color_palette("hls", len(accident_types))
palette = sns.color_palette(custom)
pal = palette.as_hex()
ps = []
x_range_hours_in_week = []
for i in range(int(hour_of_week_df.shape[0] / 12)+1):
    x_range_hours_in_week.append(i*12)

# Plot for "HOUR IN DAY"
p_hour_in_day = figure(plot_height = 400, plot_width = 850,
           title = 'Accidents for every Hour of the Day', 
           x_axis_label = 'Hour of the Day',
           y_axis_label = 'Relative Frequency', toolbar_location=None, tools="")
p_hour_in_day.xaxis.ticker = list(range(1, 24))
ps.append(p_hour_in_day)

# Plot for "DAYS IN WEEK"
p_days_in_week = figure(plot_height = 400, plot_width = 850, x_range=FactorRange(*days),
           title = 'Accidents for every Day of the Week', 
           x_axis_label = 'Day of the Week',
           y_axis_label = 'Relative Frequency', toolbar_location=None, tools="")
ps.append(p_days_in_week)

# Plot for "HOUR IN WEEK"
p_hour_in_week = figure(plot_height = 400, plot_width = 850, 
           title = 'Accidents for every Hour of the Week', 
           x_axis_label = 'Hour of the Week',
           y_axis_label = 'Relative Frequency', toolbar_location=None, tools="")
p_hour_in_week.xaxis.ticker = x_range_hours_in_week
ps.append(p_hour_in_week)

# Plot for "MONTHS IN YEAR"
p_month_in_year = figure(plot_height = 400, plot_width = 850, x_range=FactorRange(*months),
           title = 'Accidents for every Month of the Year', 
           x_axis_label = 'Month of the Year',
           y_axis_label = 'Relative Frequency', toolbar_location=None, tools="")
ps.append(p_month_in_year)
xs = ('HOUR OF DAY', 'WEEKDAY', 'HOUR OF WEEK', 'MONTH')

for j, p in enumerate(ps):
    bar = {}
    items = [] 

    for indx, i in enumerate(accident_types):
        bar[i] = p.vbar(x=xs[j], 
                        muted_alpha=0.03, 
                        fill_alpha=0.7,  
                        line_color=(0,0,0,0.0), 
                        muted=True, 
                        width=0.6, 
                        top=i, 
                        color=pal[indx], 
                        muted_color=pal[indx], 
                        source=sources[j])
        items.append((i, [bar[i]])) 

    legend = Legend(items=items, location=(0,220)) 
    p.add_layout(legend, 'left')
    p.legend.click_policy="mute"

[0, 12, 24, 36, 48, 60, 72, 84, 96, 108, 120, 132, 144, 156, 168]


In [310]:
for p in ps:
    show(p)

Weekly (monday, tuesday…)
Monthly (january, february…)
Hour in day (0-23)
Weekly and hourly (168 hours during week)


What to change above: 
- Month could be Jan, Feb...
- Change to match with tick
- Change to make hour of week make sense
- Add hover to see values

# Predictions

In [21]:
NY_coor = [40.730610, -73.935242]

In [22]:
pred_map = folium.Map(location=NY_coor, zoom_start = 12)
pred_map

In [23]:
def rgb_to_hex(rgb):
    hexi = '#%02X%02X%02X' % rgb
    return hexi

pred_map = folium.Map(location=NY_coor, zoom_start = 11)
pred_map

# Grid map
n = 60
m = 50
start_coor_X = NY_coor[1] - 0.077
start_coor_Y = NY_coor[0] - 0.06
spacing = 0.0035

coor = []

for i in range(n):
    for j in range(m):
        loc_X = start_coor_X + (i * spacing)
        loc_Y = start_coor_Y + (j * spacing)
        coor.append([loc_X, loc_Y])
        pred = int(np.round(np.random.uniform(low=0.5, high=1) * 255))
        col = (0,pred,0)
        col = rgb_to_hex(col)
        #print(col)
        folium.Circle(location=[loc_Y, loc_X],
          popup=("Number of injured and dead: {}".format(pred)),
          radius = 80,
          fill=True,
          fill_color=col,
          color = col,
          opacity = 0.8, fill_opacity=0.8).add_to(pred_map) 
pred_map

In [59]:
points = [
    {
        'time': '2017-06-02',
        'coordinates': coor[0]
    },
    {
        'time': '2017-07-02',
        'coordinates': coor[1]
    },
    {
        'time': '2017-08-02',
        'coordinates': coor[2]
    },
    {
        'time': '2017-09-02',
        'coordinates': coor[3]
    },
    {
        'time': '2017-10-02',
        'coordinates': coor[4]}
]

features = [
    {
        'type': 'Feature',
        'geometry': {
            'type': 'Point',
            'coordinates': point['coordinates'],
        },
        'properties': {
            'time': point['time'],
            'id': 'house',
            'icon': 'marker',
            'iconstyle': {
                'iconUrl': 'http://downloadicons.net/sites/default/files/small-house-with-a-chimney-icon-70053.png',
                'iconSize': [20, 20]
            }
        }
    } for point in points
]

features.append(
    {
        'type': 'Feature',
        'geometry': {
            'type': 'LineString',
            'coordinates': [
                    coor[0],
                    coor[1],
                    coor[2],
                    coor[3],
                    coor[4]
            ],
        },
        'properties': {
            'times': [
                '2017-06-02',
                '2017-07-02',
                '2017-08-02',
                '2017-09-02',
                '2017-10-02'
            ],
            'icon': 'circle',
            'iconstyle': {
                'fillColor': 'green',
                'fillOpacity': 0.6,
                'stroke': 'false',
                'radius': 5
            },
            'style': {'weight': 0},
            'id': 'man'
        }
    }
)

print(features)

m = folium.Map(
    location=NY_coor,
    zoom_start=10,
)

plugins.TimestampedGeoJson(
    {
        'type': 'FeatureCollection',
        'features': features
    },
    period='P1M',
    add_last_point=True,
    auto_play=False,
    loop=False,
    max_speed=1,
    loop_button=True,
    date_options='YYYY/MM/DD',
    time_slider_drag_update=True,
    duration='P2M'
).add_to(m)

m

[{'type': 'Feature', 'geometry': {'type': 'Point', 'coordinates': [-74.012242, 40.670609999999996]}, 'properties': {'time': '2017-06-02', 'id': 'house', 'icon': 'marker', 'iconstyle': {'iconUrl': 'http://downloadicons.net/sites/default/files/small-house-with-a-chimney-icon-70053.png', 'iconSize': [20, 20]}}}, {'type': 'Feature', 'geometry': {'type': 'Point', 'coordinates': [-74.012242, 40.67411]}, 'properties': {'time': '2017-07-02', 'id': 'house', 'icon': 'marker', 'iconstyle': {'iconUrl': 'http://downloadicons.net/sites/default/files/small-house-with-a-chimney-icon-70053.png', 'iconSize': [20, 20]}}}, {'type': 'Feature', 'geometry': {'type': 'Point', 'coordinates': [-74.012242, 40.677609999999994]}, 'properties': {'time': '2017-08-02', 'id': 'house', 'icon': 'marker', 'iconstyle': {'iconUrl': 'http://downloadicons.net/sites/default/files/small-house-with-a-chimney-icon-70053.png', 'iconSize': [20, 20]}}}, {'type': 'Feature', 'geometry': {'type': 'Point', 'coordinates': [-74.012242, 4