In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import geopandas
import numpy as np
import requests
import gmaps
import os
import json
import seaborn as sns
import sys
sys.path.insert(0, 'Data')
from config import g_key
from shapely.geometry import Point
%matplotlib inline

#Store Part I results into DataFrame
#Load the data to a DataFrame
accident_data = pd.read_csv("Data/accident_data.csv", encoding="utf-8")
accident_data.head()

Unnamed: 0,ID,Source,TMC,Severity,Start_Time,End_Time,Start_Lat,Start_Lng,End_Lat,End_Lng,...,Roundabout,Station,Stop,Traffic_Calming,Traffic_Signal,Turning_Loop,Sunrise_Sunset,Civil_Twilight,Nautical_Twilight,Astronomical_Twilight
0,A-1,MapQuest,201.0,3,2016-02-08 05:46:00,2016-02-08 11:00:00,39.865147,-84.058723,,,...,False,False,False,False,False,False,Night,Night,Night,Night
1,A-2,MapQuest,201.0,2,2016-02-08 06:07:59,2016-02-08 06:37:59,39.928059,-82.831184,,,...,False,False,False,False,False,False,Night,Night,Night,Day
2,A-3,MapQuest,201.0,2,2016-02-08 06:49:27,2016-02-08 07:19:27,39.063148,-84.032608,,,...,False,False,False,False,True,False,Night,Night,Day,Day
3,A-4,MapQuest,201.0,3,2016-02-08 07:23:34,2016-02-08 07:53:34,39.747753,-84.205582,,,...,False,False,False,False,False,False,Night,Day,Day,Day
4,A-5,MapQuest,201.0,2,2016-02-08 07:39:07,2016-02-08 08:09:07,39.627781,-84.188354,,,...,False,False,False,False,True,False,Day,Day,Day,Day


In [7]:
# Data cleanup

# Filters out dates containing 2016, 2017, & 2019 -- too much data in dataset to handle with laptops
accident_data_revised = accident_data[~accident_data["Start_Time"].str.contains("2016")]
accident_data_revised = accident_data[~accident_data["Start_Time"].str.contains("2017")]
accident_data_revised = accident_data[~accident_data["Start_Time"].str.contains("2019")]


In [8]:
# Splits the Start_Time column into individual columns
accident_data_revised[["Date", "Time"]] = accident_data_revised["Start_Time"].str.split(expand=True)

In [9]:
# Renames columns to be more readable
accident_data_revised = accident_data_revised.rename(columns={"Start_Lat":"Lat", "Start_Lng":"Lng", "Weather_Condition":"Weather"})

# Filters and rearranges dataset to display most useful columns
accident_data_revised = accident_data_revised[["Date", "Time", "Lat", "Lng", "City", "State", "County", "Weather", "Temperature(F)", "Severity"]]

# Iterates through all column data to find NaN values and adds them to a list
accident_data_revised.columns[accident_data_revised.isna().any()].tolist()
# Output: ['City', 'Weather', 'Temperature(F)']
accident_data_revised.head(10)

Unnamed: 0,Date,Time,Lat,Lng,City,State,County,Weather,Temperature(F),Severity
0,2016-02-08,05:46:00,39.865147,-84.058723,Dayton,OH,Montgomery,Light Rain,36.9,3
1,2016-02-08,06:07:59,39.928059,-82.831184,Reynoldsburg,OH,Franklin,Light Rain,37.9,2
2,2016-02-08,06:49:27,39.063148,-84.032608,Williamsburg,OH,Clermont,Overcast,36.0,2
3,2016-02-08,07:23:34,39.747753,-84.205582,Dayton,OH,Montgomery,Mostly Cloudy,35.1,3
4,2016-02-08,07:39:07,39.627781,-84.188354,Dayton,OH,Montgomery,Mostly Cloudy,36.0,2
5,2016-02-08,07:44:26,40.10059,-82.925194,Westerville,OH,Franklin,Light Rain,37.9,3
6,2016-02-08,07:59:35,39.758274,-84.230507,Dayton,OH,Montgomery,Overcast,34.0,2
7,2016-02-08,07:59:58,39.770382,-84.194901,Dayton,OH,Montgomery,Overcast,34.0,3
8,2016-02-08,08:00:40,39.778061,-84.172005,Dayton,OH,Montgomery,Mostly Cloudy,33.3,2
9,2016-02-08,08:10:04,40.10059,-82.925194,Westerville,OH,Franklin,Light Rain,37.4,3


In [10]:
# Combine Lat/Lng and City/State
accident_data_revised['Coordinates'] = accident_data_revised['Lat'].astype(str).str.zfill(2) + ', ' + accident_data_revised['Lng'].astype(str).str.zfill(3)

accident_data_revised['Location'] = accident_data_revised['City'] + ", " + accident_data_revised['State']
accident_data_revised.head()

Unnamed: 0,Date,Time,Lat,Lng,City,State,County,Weather,Temperature(F),Severity,Coordinates,Location
0,2016-02-08,05:46:00,39.865147,-84.058723,Dayton,OH,Montgomery,Light Rain,36.9,3,"39.865147, -84.058723","Dayton, OH"
1,2016-02-08,06:07:59,39.928059,-82.831184,Reynoldsburg,OH,Franklin,Light Rain,37.9,2,"39.92805900000001, -82.831184","Reynoldsburg, OH"
2,2016-02-08,06:49:27,39.063148,-84.032608,Williamsburg,OH,Clermont,Overcast,36.0,2,"39.063148, -84.032608","Williamsburg, OH"
3,2016-02-08,07:23:34,39.747753,-84.205582,Dayton,OH,Montgomery,Mostly Cloudy,35.1,3,"39.747753, -84.20558199999998","Dayton, OH"
4,2016-02-08,07:39:07,39.627781,-84.188354,Dayton,OH,Montgomery,Mostly Cloudy,36.0,2,"39.627781, -84.188354","Dayton, OH"


In [16]:
# Get frequency count of incidents in each location
frequency = accident_data_revised['Location'].value_counts()
frequency.head(15)

Houston, TX          71078
Los Angeles, CA      46191
Charlotte, NC        46140
Dallas, TX           43240
Austin, TX           42005
Atlanta, GA          27883
Raleigh, NC          26778
Miami, FL            22969
Orlando, FL          20584
Baton Rouge, LA      18918
Seattle, WA          18693
Oklahoma City, OK    18309
Nashville, TN        17724
Sacramento, CA       17117
San Antonio, TX      15016
Name: Location, dtype: int64

In [14]:
accident_data_revised[accident_data_revised['Location'] == 'Houston, TX']

Unnamed: 0,Date,Time,Lat,Lng,City,State,County,Weather,Temperature(F),Severity,Coordinates,Location
261019,2016-11-30,16:08:57,29.716625,-95.551613,Houston,TX,Harris,Clear,66.2,2,"29.716625, -95.551613","Houston, TX"
261020,2016-11-30,15:52:28,29.859201,-95.403114,Houston,TX,Harris,Clear,66.9,2,"29.859201, -95.403114","Houston, TX"
261021,2016-11-30,16:37:00,29.845549,-95.384483,Houston,TX,Harris,Clear,66.2,2,"29.845549, -95.384483","Houston, TX"
261022,2016-11-30,16:36:34,29.758677,-95.374695,Houston,TX,Harris,Clear,66.2,3,"29.758677, -95.374695","Houston, TX"
261026,2016-11-30,16:55:32,29.731260,-95.402122,Houston,TX,Harris,Clear,64.4,3,"29.73126, -95.402122","Houston, TX"
...,...,...,...,...,...,...,...,...,...,...,...,...
2242793,2017-08-28,15:19:40,29.835968,-95.563920,Houston,TX,Harris,Heavy Rain,70.0,2,"29.835968, -95.56392","Houston, TX"
2243257,2017-08-29,12:38:25,29.886770,-95.549150,Houston,TX,Harris,Rain,72.0,2,"29.88677, -95.54915","Houston, TX"
2243263,2017-08-29,15:03:10,29.925419,-95.200690,Houston,TX,Harris,Light Rain,73.0,4,"29.925419, -95.20069","Houston, TX"
2243273,2017-08-29,18:23:25,29.783490,-95.474760,Houston,TX,Harris,,,2,"29.783490000000004, -95.47476","Houston, TX"


In [28]:
top_US_df = accident_data_revised.set_index(['Location'])
top_US_df.head()

Unnamed: 0_level_0,Date,Time,Lat,Lng,City,State,County,Weather,Temperature(F),Severity,Coordinates
Location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
"Dayton, OH",2016-02-08,05:46:00,39.865147,-84.058723,Dayton,OH,Montgomery,Light Rain,36.9,3,"39.865147, -84.058723"
"Reynoldsburg, OH",2016-02-08,06:07:59,39.928059,-82.831184,Reynoldsburg,OH,Franklin,Light Rain,37.9,2,"39.92805900000001, -82.831184"
"Williamsburg, OH",2016-02-08,06:49:27,39.063148,-84.032608,Williamsburg,OH,Clermont,Overcast,36.0,2,"39.063148, -84.032608"
"Dayton, OH",2016-02-08,07:23:34,39.747753,-84.205582,Dayton,OH,Montgomery,Mostly Cloudy,35.1,3,"39.747753, -84.20558199999998"
"Dayton, OH",2016-02-08,07:39:07,39.627781,-84.188354,Dayton,OH,Montgomery,Mostly Cloudy,36.0,2,"39.627781, -84.188354"


In [31]:
top_US_df.loc[(top_US_df['Location'] == 'Houston, TX'), 
              (top_US_df['Location'] == 'Los Angeles, CA'),
              (top_US_df['Location'] == 'Charlotte, NC'),
              (top_US_df['Location'] == 'Dallas, TX'),
              (top_US_df['Location'] == 'Austin, TX'),
              (top_US_df['Location'] == 'Atlanta, GA'),
              (top_US_df['Location'] == 'Raleigh, NC'),
              (top_US_df['Location'] == 'Miami, FL'),
              (top_US_df['Location'] == 'Orlando, FL'),
              (top_US_df['Location'] == 'Baton Rouge, LA'),
              (top_US_df['Location'] == 'Seattle, WA'),
              (top_US_df['Location'] == 'Oklahoma City, OK'),
              (top_US_df['Location'] == 'Nashville, TN'),
              (top_US_df['Location'] == 'Sacramento, CA'),
              (top_US_df['Location'] == 'San Antonio,TX')]
top_US_df.head()


KeyError: 'Location'

In [None]:
#To select rows whose column value is in list 
Location = ['Houston, TX', 'Los Angeles, CA', 'Charlotte, NC','Dallas, TX', 'Austin, TX', 'Atlanta, GA', 
            'Raleigh, NC', 'Miami, FL', 'Orlando, FL', 'Baton Rouge, LA', 'Seattle, WA',
            'Oklahoma City, OK', 'Nashville, TN', 'Sacramento, CA', 'San Antonio,TX']
accident_data_revised.Location.isin(Location)

top_US_df= accident_data_revised[accident_data_revised.Location.isin(Location))]
top_US_df.shape


In [20]:
#Create new DataFrame with top 15 cities
top_US_df = pd.DataFrame(accident_data_revised, index = ['Houston, TX', 'Los Angeles, CA', 'Charlotte, NC',
                                                         'Dallas, TX', 'Austin, TX', 'Atlanta, GA', 'Raleigh, NC',
                                                         'Miami, FL', 'Orlando, FL', 'Baton Rouge, LA', 'Seattle, WA',
                                                         'Oklahoma City, OK', 'Nashville, TN', 'Sacramento, CA', 'San Antonio,TX'])
top_US_df.head()

Unnamed: 0,Date,Time,Lat,Lng,City,State,County,Weather,Temperature(F),Severity,Coordinates,Location
"Houston, TX",,,,,,,,,,,,
"Los Angeles, CA",,,,,,,,,,,,
"Charlotte, NC",,,,,,,,,,,,
"Dallas, TX",,,,,,,,,,,,
"Austin, TX",,,,,,,,,,,,


In [19]:
#Create new DataFrame with top 15 cities
top_US_df = pd.DataFrame(accident_data_revised, columns = ['Date', 'Time', 'Location', 'County', 'Weather', 
                                                           'Temperature(F)', 'Severity', 'Coordinates'])

top_US_df = accident_data_revised.loc[accident_data_revised['Location'] == ['Houston, TX', 'Los Angeles, CA', 'Charlotte, NC',
                                                                           'Dallas, TX', 'Austin, TX', 'Atlanta, GA', 'Raleigh, NC',
                                                                           'Miami, FL', 'Orlando, FL', 'Baton Rouge, LA', 'Seattle, WA',
                                                                           'Oklahoma City, OK', 'Nashville, TN', 'Sacramento, CA', 'San Antonio,TX']
top_US_df.head()

SyntaxError: invalid syntax (<ipython-input-19-4cdfb11ce939>, line 9)

In [11]:
#Configure gmaps
gmaps.configure(api_key=g_key)
#Use the Lat and Lng as locations
locations = accident_data_revised[['Location']]
figure_layout = {
    'width': '400px',
    'height': '400px',
    'border': '1px solid black',
    'padding': '1px'
}
gmaps.figure(layout=figure_layout)
#accident_data_revised.plot(figsize=(20,10))

Figure(layout=FigureLayout(border='1px solid black', height='400px', padding='1px', width='400px'))

In [12]:
fig = gmaps.figure(map_type='HYBRID')
heatmap_layer = gmaps.heatmap_layer(locations)
fig.add_layer(heatmap_layer)
fig

Figure(layout=FigureLayout(height='420px'))

In [None]:
#Configure gmaps
gmaps.configure(api_key=g_key)

# Customize the size of the figure
figure_layout = {
    'width': '400px',
    'height': '300px',
    'border': '1px solid black',
    'padding': '1px',
    'margin': '0 auto 0 auto'
}
fig = gmaps.figure(layout=figure_layout)
# Assign the marker layer to a variable
markers = gmaps.marker_layer('Coordinates')
# Add the layer to the map
fig.add_layer(markers)
fig


In [None]:
#Add Heatmap layer to map.
heatmap_layer = gmaps.heatmap_layer(locations, weights=humidity,
                                    dissipating=False, max_intensity=maxhumidity,
                                    point_radius=2)
fig.add_layer(heatmap_layer)
fig

In [10]:
# Combining Lattitude and Longitude to create coordinates:
accident_data_revised['Coordinates'] = accident_data_revised[['Lat', 'Lng']].values.tolist()
accident_data_revised.head()

Unnamed: 0,ID,Date,Time,Lat,Lng,City,State,County,Weather,Temperature(F),Severity,Coordinates,Location
0,A-1,2016-02-08,05:46:00,39.865147,-84.058723,Dayton,OH,Montgomery,Light Rain,36.9,3,"[39.865147, -84.058723]","Dayton, OH"
1,A-2,2016-02-08,06:07:59,39.928059,-82.831184,Reynoldsburg,OH,Franklin,Light Rain,37.9,2,"[39.92805900000001, -82.831184]","Reynoldsburg, OH"
2,A-3,2016-02-08,06:49:27,39.063148,-84.032608,Williamsburg,OH,Clermont,Overcast,36.0,2,"[39.063148, -84.032608]","Williamsburg, OH"
3,A-4,2016-02-08,07:23:34,39.747753,-84.205582,Dayton,OH,Montgomery,Mostly Cloudy,35.1,3,"[39.747753, -84.20558199999998]","Dayton, OH"
4,A-5,2016-02-08,07:39:07,39.627781,-84.188354,Dayton,OH,Montgomery,Mostly Cloudy,36.0,2,"[39.627781, -84.188354]","Dayton, OH"


In [16]:
locations = accident_data_revised[['latitude', 'longitude']]
weights = earthquake_df['magnitude']
fig = gmaps.figure()
fig.add_layer(gmaps.heatmap_layer(locations, weights=weights))
fig
accident_data_revised.head()

AttributeError: module 'gmaps' has no attribute 'datasets'

In [11]:
#Change the coordinates to a geoPoint
accident_data_revised['Coordinates'] = accident_data_revised['Coordinates'].apply(Point)
accident_data_revised.head()

Unnamed: 0,ID,Date,Time,Lat,Lng,City,State,County,Weather,Temperature(F),Severity,Coordinates,Location
0,A-1,2016-02-08,05:46:00,39.865147,-84.058723,Dayton,OH,Montgomery,Light Rain,36.9,3,POINT (39.865147 -84.058723),"Dayton, OH"
1,A-2,2016-02-08,06:07:59,39.928059,-82.831184,Reynoldsburg,OH,Franklin,Light Rain,37.9,2,POINT (39.92805900000001 -82.83118399999999),"Reynoldsburg, OH"
2,A-3,2016-02-08,06:49:27,39.063148,-84.032608,Williamsburg,OH,Clermont,Overcast,36.0,2,POINT (39.063148 -84.032608),"Williamsburg, OH"
3,A-4,2016-02-08,07:23:34,39.747753,-84.205582,Dayton,OH,Montgomery,Mostly Cloudy,35.1,3,POINT (39.747753 -84.20558199999998),"Dayton, OH"
4,A-5,2016-02-08,07:39:07,39.627781,-84.188354,Dayton,OH,Montgomery,Mostly Cloudy,36.0,2,POINT (39.627781 -84.188354),"Dayton, OH"


In [12]:
# Convert the count df to geodf
accident_data_revised = geopandas.GeoDataFrame(accident_data_revised, geometry='Coordinates')
accident_data_revised.head()

Unnamed: 0,ID,Date,Time,Lat,Lng,City,State,County,Weather,Temperature(F),Severity,Coordinates,Location
0,A-1,2016-02-08,05:46:00,39.865147,-84.058723,Dayton,OH,Montgomery,Light Rain,36.9,3,POINT (39.86515 -84.05872),"Dayton, OH"
1,A-2,2016-02-08,06:07:59,39.928059,-82.831184,Reynoldsburg,OH,Franklin,Light Rain,37.9,2,POINT (39.92806 -82.83118),"Reynoldsburg, OH"
2,A-3,2016-02-08,06:49:27,39.063148,-84.032608,Williamsburg,OH,Clermont,Overcast,36.0,2,POINT (39.06315 -84.03261),"Williamsburg, OH"
3,A-4,2016-02-08,07:23:34,39.747753,-84.205582,Dayton,OH,Montgomery,Mostly Cloudy,35.1,3,POINT (39.74775 -84.20558),"Dayton, OH"
4,A-5,2016-02-08,07:39:07,39.627781,-84.188354,Dayton,OH,Montgomery,Mostly Cloudy,36.0,2,POINT (39.62778 -84.18835),"Dayton, OH"


In [59]:
# Change the coordinates to a geoPoint
accident_data_revised['Coordinates'] = accident_data_revised['Coordinates'].apply(Point)
accident_data_revised.head()

NameError: name 'Point' is not defined