In [32]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from ipyleaflet import Map, Icon, Marker
from ipywidgets import interact
import ipywidgets as widgets
from datetime import datetime, timedelta
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn import metrics
import collections

In [2]:
crash_data = pd.read_csv(r"https://raw.githubusercontent.com/Clavum/Winningham_CS_Capstone/main/crash_data.csv") # Location of formatted data

In [3]:
# plt.figure(figsize=(6.1458, 8.03125))
# plt.axis([40.01, 40.12, -82.47, -82.36])
# plt.imshow(img, extent=[40.01, 40.12, -82.47, -82.36])
# plt.scatter('Latitude', 'Longitude', c='NumberOfUnits', data=crash_data)
# plt.xlabel('Latitude')
# plt.ylabel('Longitude')
#plt.annotate('local max', xy=(2, 1), xytext=(3, 1.5),
#             arrowprops=dict(facecolor='black', shrink=0.05),
#             )
# plt.show()

## Welcome to the Newark Traffic Reporting and Prediction System

### Historical (Descriptive) Traffic Data:
---

#### Chart 1: Traffic Incident Location Map

In [4]:
# Traffic incident location map

center = (40.065, -82.415)
m = Map(center=center, zoom=15)

for index, row in crash_data.iterrows():
    location = (row["Latitude"], row["Longitude"])
    icon = Icon(icon_url='https://raw.githubusercontent.com/Clavum/Winningham_CS_Capstone/main/images/marker.png', icon_size=[10, 10], icon_anchor=[5,5])
    marker = Marker(location=location, title=row["MannerOfCollision"], icon=icon, draggable=False)
    m.add_layer(marker);

display(m)

Map(center=[40.065, -82.415], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoo…

#### Chart 2: Pie Chart for Various Statistics

In [5]:
def pie_chart_function(statistic):
    # Get the statistic
    desired_statistic = statistic
    size_dict = {}
    total_count = 0
    for index, row in crash_data.iterrows():
        value = row[desired_statistic]
        try:
            size_dict[value] += 1
        except:  # Key doesn't exist yet
            size_dict[value] = 1
        total_count += 1
    labels = list(size_dict.keys())
    sizes = list(size_dict.values())
    
    for i in range(0, len(labels)):
        labels[i] = str(labels[i]) + " (" + str(sizes[i]) + ", " + "{0:.1f}%".format(sizes[i]/total_count * 100)  + ")"
    
    plt.pie(sizes, startangle=90)
    plt.legend(labels, loc="center left", bbox_to_anchor=(0.8, 0.5)) # Create legend in the middle right
    plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.

    plt.show()

In [6]:
def send_to_pie_chart(Statistic):
    pie_chart_function(Statistic)
    
list_statistics = ['CrashSeverity', 'NumberOfUnits', 'MannerOfCollision',
                   'Weather', 'LightCondition', 'RoadCondition', 'AnimalRelated',
                   'AlcoholRelated', 'SpeedRelated', 'TeenRelated']
    
interact(send_to_pie_chart, Statistic=list_statistics)

interactive(children=(Dropdown(description='Statistic', options=('CrashSeverity', 'NumberOfUnits', 'MannerOfCo…

<function __main__.send_to_pie_chart(Statistic)>

#### Chart 3: Incident Frequency by Month, Day, or Hour

In [7]:
def frequency_chart_function(time_range):
    frequency_dict = {}
    for index, row in crash_data.iterrows():
        incident_time = datetime.strptime(row["CrashDateTime"], "%m/%d/%Y %H:%M")
        time_value = 0
        if time_range == "Month":
            time_value = incident_time.month
        elif time_range == "Day":
            time_value = incident_time.weekday()
        elif time_range == "Hour":
            time_value = incident_time.hour
            
        try:
            frequency_dict[time_value] += 1
        except:
            frequency_dict[time_value] = 1
    ordered_frequencies = collections.OrderedDict(sorted(frequency_dict.items()))
    plt.figure(figsize=(len(list(frequency_dict.keys())) * 0.65, 4))
    plt.bar(range(len(ordered_frequencies)), list(ordered_frequencies.values()), align='center')
    plt.xticks(range(len(ordered_frequencies)), format_number_list(list(ordered_frequencies.keys()), time_range), rotation=40)
    plt.ylabel('Count')
    plt.show()
    
def format_number_list(number_list, time_range):  # Format numbers like 1, 5, or 7 as January, Saturday, or 7:00PM
    month_map = {1: "January", 2: "February", 3: "March", 4: "April", 5: "May", 6: "June", 7: "July", 8: "August",
                 9: "September", 10: "October", 11: "November", 12: "December"}
    day_map = {0: "Monday", 1: "Tuesday", 2: "Wednesday", 3: "Thursday", 4: "Friday", 5: "Saturday", 6: "Sunday"}
    hour_map = {0: "12:00AM", 1: "1:00AM", 2: "2:00AM", 3: "3:00AM", 4: "4:00AM", 5: "5:00AM", 6: "6:00AM", 
                7: "7:00AM", 8: "8:00AM", 9: "9:00AM", 10: "10:00AM", 11: "11:00AM", 12: "12:00PM", 13: "1:00PM",
                14: "2:00PM", 15: "3:00PM", 16: "4:00PM", 17: "5:00PM", 18: "6:00PM", 19: "7:00PM", 20: "8:00PM",
                21: "9:00PM", 22: "10:00PM", 23: "11:00PM"}
    if time_range == "Month":
        for i in range(len(number_list)):
            number_list[i] = month_map[number_list[i]]
    if time_range == "Day":
        for i in range(len(number_list)):
            number_list[i] = day_map[number_list[i]]
    if time_range == "Hour":
        for i in range(len(number_list)):
            number_list[i] = hour_map[number_list[i]]
    return number_list

In [8]:
def send_to_frequency_chart(TimeRange):
    frequency_chart_function(TimeRange)
    
list_time_range = ['Month', 'Day', 'Hour']
    
interact(send_to_frequency_chart, TimeRange=list_time_range)

interactive(children=(Dropdown(description='TimeRange', options=('Month', 'Day', 'Hour'), value='Month'), Outp…

<function __main__.send_to_frequency_chart(TimeRange)>

### Future (Predictive) Method

#### Projected Incidents in Time Range

In [52]:
def predict_incidents(period):
    date_series = crash_data.CrashDateTime
    date_series.sort_values()
    period_start = 0
    period_step = period
    current_period = 0
    incident_df = pd.DataFrame({"Period": [], "Count": []})
    for value in date_series:
        incident_time = datetime.strptime(value, "%m/%d/%Y %H:%M")
        if period_start == 0:
            period_start = incident_time
            incident_df.loc[0] = [0, 0]
        if incident_time < period_start + timedelta(days=period_step):
            incident_df.loc[current_period]["Count"] += 1
        else:
            current_period += 1
            period_start = period_start + timedelta(days=period_step)
            incident_df.loc[current_period] = [current_period, 1]
    
    X = incident_df.iloc[:, :-1].values
    y = incident_df.iloc[:, 1].values
    
    # Remove last period because it might not be full, for example, it might
    # include 10 days of data when it should include 20, thus skewing results
    X = X[:-1].copy()
    y = y[:-1].copy()
    regressor = LinearRegression()
    regressor.fit(X, y)
    future_count = round(float(regressor.coef_*(current_period+1)+regressor.intercept_))

    print("In the next", period_step, "days, there will be approximately", future_count, "incidents.\n\n")
    y_pred = regressor.predict(X)
    print('Error Analysis and Confidence (the lower the better):\n')
    print('Mean Absolute Error:', metrics.mean_absolute_error(y, y_pred))
    print('Mean Squared Error:', metrics.mean_squared_error(y, y_pred))

In [53]:
def send_to_predict_function(Period):
    predict_incidents(Period)
    
interact(send_to_predict_function, Period=widgets.IntSlider(min=1, max=30, step=1, value=7))

interactive(children=(IntSlider(value=7, description='Period', max=30, min=1), Output()), _dom_classes=('widge…

<function __main__.send_to_predict_function(Period)>

---
#### Made by Clavum Corporation
---