In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import requests
import gmaps
import os
import json
import sys
sys.path.insert(0, 'Data')
from config import g_key

#Store Part I results into DataFrame
#Load the data to a DataFrame
accident_data = pd.read_csv("Data/accident_data.csv", encoding="utf-8")
accident_data.head()

Unnamed: 0,ID,Source,TMC,Severity,Start_Time,End_Time,Start_Lat,Start_Lng,End_Lat,End_Lng,...,Roundabout,Station,Stop,Traffic_Calming,Traffic_Signal,Turning_Loop,Sunrise_Sunset,Civil_Twilight,Nautical_Twilight,Astronomical_Twilight
0,A-1,MapQuest,201.0,3,2016-02-08 05:46:00,2016-02-08 11:00:00,39.865147,-84.058723,,,...,False,False,False,False,False,False,Night,Night,Night,Night
1,A-2,MapQuest,201.0,2,2016-02-08 06:07:59,2016-02-08 06:37:59,39.928059,-82.831184,,,...,False,False,False,False,False,False,Night,Night,Night,Day
2,A-3,MapQuest,201.0,2,2016-02-08 06:49:27,2016-02-08 07:19:27,39.063148,-84.032608,,,...,False,False,False,False,True,False,Night,Night,Day,Day
3,A-4,MapQuest,201.0,3,2016-02-08 07:23:34,2016-02-08 07:53:34,39.747753,-84.205582,,,...,False,False,False,False,False,False,Night,Day,Day,Day
4,A-5,MapQuest,201.0,2,2016-02-08 07:39:07,2016-02-08 08:09:07,39.627781,-84.188354,,,...,False,False,False,False,True,False,Day,Day,Day,Day


In [16]:
# Data cleanup

# Filters out dates containing 2016, 2017, & 2019 -- too much data in dataset to handle with laptops
accident_data_revised = accident_data[~accident_data["Start_Time"].str.contains("2016")]
accident_data_revised = accident_data_revised[~accident_data_revised["Start_Time"].str.contains("2017")]
accident_data_revised = accident_data_revised[~accident_data_revised["Start_Time"].str.contains("2019")]

# Splits the Start_Time column into individual columns
accident_data_revised[["Date", "Time"]] = accident_data_revised["Start_Time"].str.split(expand=True)

# Renames columns to be more readable
accident_data_revised = accident_data_revised.rename(columns={"Start_Lat":"Lat", "Start_Lng":"Lng", "Weather_Condition":"Weather"})

# Filters and rearranges dataset to display most useful columns
accident_data_revised = accident_data_revised[["Date", "Time", "Lat", "Lng", "City", "State",
         "County", "Weather", "Temperature(F)", "Severity", "Nautical_Twilight"]]

# Iterates through all column data to find NaN values and adds them to a list
accident_data_revised.columns[accident_data_revised.isna().any()].tolist()

accident_data_revised = accident_data_revised.reset_index()

accident_data_revised.head(20)

Unnamed: 0,index,Date,Time,Lat,Lng,City,State,County,Weather,Temperature(F),Severity,Nautical_Twilight
0,626043,2018-12-31,23:54:51,40.740047,-73.818512,Flushing,NY,Queens,Heavy Rain,46.9,3,Night
1,626045,2018-12-31,22:40:11,40.83437,-73.864113,Bronx,NY,Bronx,Rain,46.4,3,Night
2,626355,2018-12-31,12:35:54,38.441975,-88.953049,Dix,IL,Jefferson,Light Rain,57.0,3,Day
3,626407,2018-12-31,22:32:27,30.241417,-97.726158,Austin,TX,Travis,Partly Cloudy,39.9,2,Night
4,626462,2018-12-31,23:49:55,32.77306,-96.744247,Dallas,TX,Dallas,Clear,43.0,2,Night
5,626463,2018-12-31,23:49:24,32.81842,-96.802391,Dallas,TX,Dallas,Clear,43.0,2,Night
6,626563,2018-12-31,18:46:51,39.902153,-104.988914,Denver,CO,Adams,Snow,8.6,3,Night
7,626574,2018-12-31,22:26:47,32.192139,-110.857933,Tucson,AZ,Pima,Light Rain,37.4,2,Night
8,626599,2018-12-31,22:40:29,35.008537,-105.664352,Moriarty,NM,Torrance,Light Snow,27.9,3,Night
9,626602,2018-12-31,22:37:23,35.423836,-108.312744,Continental Divide,NM,McKinley,Light Snow,25.0,3,Night


In [17]:
# Combine Lat/Lng and City/State
accident_data_revised['Coordinates'] = accident_data_revised['Lat'].astype(str).str.zfill(2) + ', ' + accident_data_revised['Lng'].astype(str).str.zfill(3)

accident_data_revised['Location'] = accident_data_revised['City'] + ", " + accident_data_revised['State']
accident_data_revised.head()

Unnamed: 0,index,Date,Time,Lat,Lng,City,State,County,Weather,Temperature(F),Severity,Nautical_Twilight,Coordinates,Location
0,626043,2018-12-31,23:54:51,40.740047,-73.818512,Flushing,NY,Queens,Heavy Rain,46.9,3,Night,"40.740047, -73.818512","Flushing, NY"
1,626045,2018-12-31,22:40:11,40.83437,-73.864113,Bronx,NY,Bronx,Rain,46.4,3,Night,"40.83437, -73.864113","Bronx, NY"
2,626355,2018-12-31,12:35:54,38.441975,-88.953049,Dix,IL,Jefferson,Light Rain,57.0,3,Day,"38.441975, -88.953049","Dix, IL"
3,626407,2018-12-31,22:32:27,30.241417,-97.726158,Austin,TX,Travis,Partly Cloudy,39.9,2,Night,"30.241417, -97.726158","Austin, TX"
4,626462,2018-12-31,23:49:55,32.77306,-96.744247,Dallas,TX,Dallas,Clear,43.0,2,Night,"32.77306, -96.744247","Dallas, TX"


In [18]:
# Add column to dataframe showing frequency of accidents at specific coordinates
accident_data_revised['frequency'] = accident_data_revised['Coordinates'].map(accident_data_revised['Coordinates'].value_counts())
accident_data_revised.head()

Unnamed: 0,index,Date,Time,Lat,Lng,City,State,County,Weather,Temperature(F),Severity,Nautical_Twilight,Coordinates,Location,frequency
0,626043,2018-12-31,23:54:51,40.740047,-73.818512,Flushing,NY,Queens,Heavy Rain,46.9,3,Night,"40.740047, -73.818512","Flushing, NY",1
1,626045,2018-12-31,22:40:11,40.83437,-73.864113,Bronx,NY,Bronx,Rain,46.4,3,Night,"40.83437, -73.864113","Bronx, NY",3
2,626355,2018-12-31,12:35:54,38.441975,-88.953049,Dix,IL,Jefferson,Light Rain,57.0,3,Day,"38.441975, -88.953049","Dix, IL",1
3,626407,2018-12-31,22:32:27,30.241417,-97.726158,Austin,TX,Travis,Partly Cloudy,39.9,2,Night,"30.241417, -97.726158","Austin, TX",1
4,626462,2018-12-31,23:49:55,32.77306,-96.744247,Dallas,TX,Dallas,Clear,43.0,2,Night,"32.77306, -96.744247","Dallas, TX",1


In [19]:
# Group coordinates by accident frequency
accident_data_sorted = accident_data_revised.sort_values(by ='frequency' , ascending=False)
accident_data_sorted.head()

Unnamed: 0,index,Date,Time,Lat,Lng,City,State,County,Weather,Temperature(F),Severity,Nautical_Twilight,Coordinates,Location,frequency
422301,1107358,2018-04-30,17:55:45,34.858925,-82.259857,Greenville,SC,Greenville,Clear,75.0,3,Day,"34.858925, -82.259857","Greenville, SC",324
85627,770684,2018-11-14,18:21:08,34.858925,-82.259857,Greenville,SC,Greenville,Light Rain,44.1,3,Night,"34.858925, -82.259857","Greenville, SC",324
169729,854786,2018-10-24,07:28:09,34.858925,-82.259857,Greenville,SC,Greenville,Clear,,3,Day,"34.858925, -82.259857","Greenville, SC",324
75379,760436,2018-11-09,18:02:50,34.858925,-82.259857,Greenville,SC,Greenville,Overcast,52.0,3,Day,"34.858925, -82.259857","Greenville, SC",324
75410,760467,2018-11-09,19:34:31,34.858925,-82.259857,Greenville,SC,Greenville,Overcast,52.0,3,Night,"34.858925, -82.259857","Greenville, SC",324


In [20]:
# Find unique coordinates per frequency (this will delete some rows of data)
coordinates_by_frequency = accident_data_sorted.drop_duplicates(subset=['Coordinates'])
coordinates_by_frequency.head()

Unnamed: 0,index,Date,Time,Lat,Lng,City,State,County,Weather,Temperature(F),Severity,Nautical_Twilight,Coordinates,Location,frequency
422301,1107358,2018-04-30,17:55:45,34.858925,-82.259857,Greenville,SC,Greenville,Clear,75.0,3,Day,"34.858925, -82.259857","Greenville, SC",324
401843,1086900,2018-06-20,07:43:54,34.833031,-82.296837,Greenville,SC,Greenville,Clear,82.9,3,Day,"34.833031, -82.296837","Greenville, SC",285
677624,1363078,2018-01-05,18:40:38,44.966118,-93.26992,Minneapolis,MN,Hennepin,Clear,-0.9,3,Night,"44.966118, -93.26992","Minneapolis, MN",250
492633,1177690,2018-04-03,09:36:09,37.808498,-122.366852,San Francisco,CA,San Francisco,Clear,55.0,3,Day,"37.808498, -122.366852","San Francisco, CA",223
59777,744834,2018-11-02,19:24:57,33.941364,-118.096634,Downey,CA,Los Angeles,Clear,72.0,3,Night,"33.941364, -118.096634","Downey, CA",217


In [25]:
total_rows=len(coordinates_by_frequency.axes[0])
print(total_rows)

416312


In [27]:
#Configure gmaps
gmaps.configure(api_key=g_key)

#Use the Lat and Lng as locations and frequency as the weight.
locations = coordinates_by_frequency[["Lat", "Lng"]]

#Drop any rows will null values
frequency = coordinates_by_frequency['frequency'].astype(float)
maxfrequency = frequency.max()                  

fig = gmaps.figure()

#Add Heatmap layer to map
heatmap_layer = gmaps.heatmap_layer(locations, weights=frequency,
                                    dissipating=False, max_intensity=maxfrequency,
                                    point_radius=.5)
fig.add_layer(heatmap_layer)
fig

Figure(layout=FigureLayout(height='420px'))

In [28]:
# filter accident_data_sorted to include only NC data

NC_accident_data = accident_data_sorted.query('State=="NC"')
NC_accident_data.head()

Unnamed: 0,index,Date,Time,Lat,Lng,City,State,County,Weather,Temperature(F),Severity,Nautical_Twilight,Coordinates,Location,frequency
374491,1059548,2018-06-06,13:45:18,35.766506,-78.735367,Raleigh,NC,Wake,Scattered Clouds,84.0,3,Day,"35.766506, -78.73536700000002","Raleigh, NC",197
594498,1279555,2018-03-23,14:02:14,35.766506,-78.735367,Raleigh,NC,Wake,Partly Cloudy,52.0,3,Day,"35.766506, -78.73536700000002","Raleigh, NC",197
82359,767416,2018-11-13,07:34:47,35.766506,-78.735367,Raleigh,NC,Wake,Overcast,44.1,3,Day,"35.766506, -78.73536700000002","Raleigh, NC",197
619901,1304958,2018-02-05,17:48:51,35.766506,-78.735367,Raleigh,NC,Wake,Partly Cloudy,41.0,3,Day,"35.766506, -78.73536700000002","Raleigh, NC",197
240366,925423,2018-09-27,11:18:36,35.766506,-78.735367,Raleigh,NC,Wake,Light Rain,73.0,3,Day,"35.766506, -78.73536700000002","Raleigh, NC",197


In [32]:
#Use the Lat and Lng as locations and frequency as the weight.
locations = NC_accident_data[["Lat", "Lng"]]

#Drop any rows will null values
frequency = NC_accident_data['frequency'].astype(float)
maxfrequency = frequency.max()                  

fig = gmaps.figure()

#Add Heatmap layer to map
heatmap_layer = gmaps.heatmap_layer(locations, weights=frequency,
                                    dissipating=False, max_intensity=maxfrequency,
                                    point_radius=0.05)
fig.add_layer(heatmap_layer)
plt.savefig('')
fig

Figure(layout=FigureLayout(height='420px'))

In [38]:
# filter accident_data_sorted to include only Wake County data

Wake_accident_data = NC_accident_data.query('County=="Wake"')
Wake_accident_data.head()

Unnamed: 0,index,Date,Time,Lat,Lng,City,State,County,Weather,Temperature(F),Severity,Nautical_Twilight,Coordinates,Location,frequency
374491,1059548,2018-06-06,13:45:18,35.766506,-78.735367,Raleigh,NC,Wake,Scattered Clouds,84.0,3,Day,"35.766506, -78.73536700000002","Raleigh, NC",197
594498,1279555,2018-03-23,14:02:14,35.766506,-78.735367,Raleigh,NC,Wake,Partly Cloudy,52.0,3,Day,"35.766506, -78.73536700000002","Raleigh, NC",197
82359,767416,2018-11-13,07:34:47,35.766506,-78.735367,Raleigh,NC,Wake,Overcast,44.1,3,Day,"35.766506, -78.73536700000002","Raleigh, NC",197
619901,1304958,2018-02-05,17:48:51,35.766506,-78.735367,Raleigh,NC,Wake,Partly Cloudy,41.0,3,Day,"35.766506, -78.73536700000002","Raleigh, NC",197
240366,925423,2018-09-27,11:18:36,35.766506,-78.735367,Raleigh,NC,Wake,Light Rain,73.0,3,Day,"35.766506, -78.73536700000002","Raleigh, NC",197


In [39]:
#Use the Lat and Lng as locations and frequency as the weight.
locations = Wake_accident_data[["Lat", "Lng"]]

#Drop any rows will null values
frequency = Wake_accident_data['frequency'].astype(float)
maxfrequency = frequency.max()                  

fig = gmaps.figure()

#Add Heatmap layer to map
heatmap_layer = gmaps.heatmap_layer(locations, weights=frequency,
                                    dissipating=False, max_intensity=maxfrequency,
                                    point_radius=0.005)
fig.add_layer(heatmap_layer)
fig

Figure(layout=FigureLayout(height='420px'))

In [40]:
# Find top five accident locations in Wake Co
Wake_hotspots = Wake_accident_data.drop_duplicates(subset=['Coordinates'])
Wake_hotspots.head()

Unnamed: 0,index,Date,Time,Lat,Lng,City,State,County,Weather,Temperature(F),Severity,Nautical_Twilight,Coordinates,Location,frequency
374491,1059548,2018-06-06,13:45:18,35.766506,-78.735367,Raleigh,NC,Wake,Scattered Clouds,84.0,3,Day,"35.766506, -78.73536700000002","Raleigh, NC",197
529856,1214913,2018-04-21,14:51:37,35.814922,-78.605362,Raleigh,NC,Wake,Partly Cloudy,66.9,2,Day,"35.814922, -78.605362","Raleigh, NC",189
68908,753965,2018-11-07,16:13:35,35.825504,-78.621483,Raleigh,NC,Wake,Scattered Clouds,71.1,3,Day,"35.82550399999999, -78.621483","Raleigh, NC",172
237784,922841,2018-09-26,17:56:29,35.835407,-78.669601,Raleigh,NC,Wake,Scattered Clouds,84.0,3,Day,"35.83540700000001, -78.669601","Raleigh, NC",170
177770,862827,2018-10-27,21:12:51,35.794537,-78.582642,Raleigh,NC,Wake,Overcast,53.1,3,Night,"35.794537, -78.582642","Raleigh, NC",165


In [42]:
#Add markers to map.
worst_five = [
    {'name': '#1: I-40WB Exit 239A to Cary and Asheboro', 'location': (35.766506, -78.735367), 'frequency': 197},
    {'name': '#2: Capital Blvd inbound at I-440 merge', 'location': (35.814922, -78.605362), 'frequency': 189},
    {'name': '#3: I-440WB at Wake Forest Rd', 'location': (35.825504, -78.621483), 'frequency': 172},
    {'name': '#4: I-440WB at Glenwood Ave', 'location': (35.835407, -78.669601), 'frequency': 170},
    {'name': '#5: New Bern Ave inbound at I-440WB', 'location': (35.794537, -78.582642), 'frequency': 165}
]

worst_locations = [marker['location'] for marker in worst_five]
info_box_template = """
<dl>
<dt>Name</dt><dd>{name}</dd>
<dt>Number frequency</dt><dd>{frequency}</dd>
</dl>
"""
marker_info = [info_box_template.format(**marker) for marker in worst_five]

marker_layer = gmaps.marker_layer(worst_locations, info_box_content=marker_info)
fig = gmaps.figure()
fig.add_layer(marker_layer)
fig

Figure(layout=FigureLayout(height='420px'))

In [21]:
# Find which coordinates had the most accidents (top 100)
#top_US_df = coordinates_by_frequency.nlargest(100,'frequency')
#top_US_df.head()
#top_US_df.iloc[0:100]

Unnamed: 0,index,Date,Time,Lat,Lng,City,State,County,Weather,Temperature(F),Severity,Nautical_Twilight,Coordinates,Location,frequency
422301,1107358,2018-04-30,17:55:45,34.858925,-82.259857,Greenville,SC,Greenville,Clear,75.0,3,Day,"34.858925, -82.259857","Greenville, SC",324
401843,1086900,2018-06-20,07:43:54,34.833031,-82.296837,Greenville,SC,Greenville,Clear,82.9,3,Day,"34.833031, -82.296837","Greenville, SC",285
677624,1363078,2018-01-05,18:40:38,44.966118,-93.26992,Minneapolis,MN,Hennepin,Clear,-0.9,3,Night,"44.966118, -93.26992","Minneapolis, MN",250
492633,1177690,2018-04-03,09:36:09,37.808498,-122.366852,San Francisco,CA,San Francisco,Clear,55.0,3,Day,"37.808498, -122.366852","San Francisco, CA",223
59777,744834,2018-11-02,19:24:57,33.941364,-118.096634,Downey,CA,Los Angeles,Clear,72.0,3,Night,"33.941364, -118.096634","Downey, CA",217
