In [8]:
# Dependencies
import gmaps
import pandas as pd
from matplotlib import pyplot as plt
from scipy.stats import pearsonr
from config import g_key

In [9]:
# Read main CSV data file
accidents_df = pd.read_csv("all.csv", low_memory=False)

In [10]:
# Overview of DataFrame
accidents_df

Unnamed: 0.1,Unnamed: 0,Accident_Index,1st_Road_Class,Accident_Severity,Year,Date,Day_of_Week,Latitude,Light_Conditions,Local_Authority_(District),...,Journey_Purpose_of_Driver,Junction_Location,make,model,Propulsion_Code,Sex_of_Driver,Towing_and_Articulation,Vehicle_Manoeuvre,Vehicle_Type,Was_Vehicle_Left_Hand_Drive
0,478587,201001BS70003,B,Slight,2010,2010-01-11,Monday,51.484087,Daylight,Kensington and Chelsea,...,Commuting to/from work,Mid Junction - on roundabout or on main road,CITROEN,BERLINGO FIRST 600,Petrol,Female,No tow/articulation,Turning right,Van / Goods 3.5 tonnes mgw or under,No
1,478588,201001BS70004,A,Slight,2010,2010-01-11,Monday,51.509212,Darkness - lights lit,Kensington and Chelsea,...,Journey as part of work,Mid Junction - on roundabout or on main road,RENAULT,SCENIC DYN DCI 130,Heavy oil,Male,No tow/articulation,Going ahead other,Car,No
2,478589,201001BS70007,Unclassified,Slight,2010,2010-01-02,Saturday,51.513314,Darkness - lights lit,Kensington and Chelsea,...,Other/Not known (2005-10),Mid Junction - on roundabout or on main road,NISSAN,PRIMERA SVE CVT,Petrol,Female,No tow/articulation,Going ahead right-hand bend,Car,No
3,478590,201001BS70007,Unclassified,Slight,2010,2010-01-02,Saturday,51.513314,Darkness - lights lit,Kensington and Chelsea,...,Other/Not known (2005-10),Mid Junction - on roundabout or on main road,MERCEDES,A140 ELEGANCE,Petrol,Female,No tow/articulation,Going ahead other,Car,No
4,478591,201001BS70008,A,Slight,2010,2010-01-04,Monday,51.484361,Darkness - lights lit,Kensington and Chelsea,...,Journey as part of work,Mid Junction - on roundabout or on main road,VAUXHALL,ZAFIRA ELEGANCE DTI,Heavy oil,Male,No tow/articulation,Turning right,Taxi/Private hire car,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1077648,1556235,2016984130916,B,Slight,2016,2016-10-28,Friday,55.058998,Darkness - lights lit,Dumfries and Galloway,...,Commuting to/from work,Cleared junction or waiting/parked at junction...,SEAT,IBIZA ECOMOTIVE TDI,Heavy oil,Female,No tow/articulation,Going ahead other,Car,No
1077649,1556236,2016984130916,B,Slight,2016,2016-10-28,Friday,55.058998,Darkness - lights lit,Dumfries and Galloway,...,Data missing or out of range,Cleared junction or waiting/parked at junction...,RENAULT,MEGANE DYNAMIQUE 16V,Petrol,Not known,No tow/articulation,Parked,Car,No
1077650,1556237,2016984131116,B,Slight,2016,2016-11-01,Tuesday,55.005033,Daylight,Dumfries and Galloway,...,Not known,Mid Junction - on roundabout or on main road,MINI,MINI COOPER,Petrol,Female,No tow/articulation,Going ahead other,Car,No
1077651,1556238,2016984131316,B,Slight,2016,2016-10-29,Saturday,54.989597,Darkness - lights lit,Dumfries and Galloway,...,Not known,Not at or within 20 metres of junction,VAUXHALL,VECTRA SXI 16V,Petrol,Male,No tow/articulation,Going ahead other,Car,No


In [11]:
# List of Columns
accidents_df.columns

Index(['Unnamed: 0', 'Accident_Index', '1st_Road_Class', 'Accident_Severity',
       'Year', 'Date', 'Day_of_Week', 'Latitude', 'Light_Conditions',
       'Local_Authority_(District)', 'Longitude', 'Number_of_Casualties',
       'Number_of_Vehicles', 'Road_Surface_Conditions', 'Road_Type',
       'Special_Conditions_at_Site', 'Speed_limit', 'Time',
       'Urban_or_Rural_Area', 'Weather_Conditions', 'Age_Band_of_Driver',
       'Age_of_Vehicle', 'Engine_Capacity_.CC.', 'Journey_Purpose_of_Driver',
       'Junction_Location', 'make', 'model', 'Propulsion_Code',
       'Sex_of_Driver', 'Towing_and_Articulation', 'Vehicle_Manoeuvre',
       'Vehicle_Type', 'Was_Vehicle_Left_Hand_Drive'],
      dtype='object')

In [12]:
# Get only the accidents in the Birmingham Local_Authority
birmingham_df = accidents_df[accidents_df["Local_Authority_(District)"] == "Birmingham"]
birmingham_df

Unnamed: 0.1,Unnamed: 0,Accident_Index,1st_Road_Class,Accident_Severity,Year,Date,Day_of_Week,Latitude,Light_Conditions,Local_Authority_(District),...,Journey_Purpose_of_Driver,Junction_Location,make,model,Propulsion_Code,Sex_of_Driver,Towing_and_Articulation,Vehicle_Manoeuvre,Vehicle_Type,Was_Vehicle_Left_Hand_Drive
55941,534528,201020D002713,Unclassified,Slight,2010,2010-01-03,Sunday,52.482901,Daylight,Birmingham,...,Other/Not known (2005-10),Mid Junction - on roundabout or on main road,NISSAN,PRIMERA S TD,Heavy oil,Male,No tow/articulation,Reversing,Car,No
55942,534529,201020D004613,Unclassified,Slight,2010,2010-01-02,Saturday,52.473343,Darkness - lights lit,Birmingham,...,Other/Not known (2005-10),Not at or within 20 metres of junction,NISSAN,MICRA SLX,Petrol,Male,No tow/articulation,Parked,Car,No
55943,534530,201020D006513,Unclassified,Slight,2010,2010-01-04,Monday,52.470252,Daylight,Birmingham,...,Other/Not known (2005-10),Not at or within 20 metres of junction,PEUGEOT,206 SW XT,Petrol,Male,No tow/articulation,Waiting to go - held up,Car,No
55944,534531,201020D006913,Unclassified,Slight,2010,2010-01-05,Tuesday,52.477604,Darkness - lights lit,Birmingham,...,Other/Not known (2005-10),Approaching junction or waiting/parked at junc...,AUDI,A3 SPECIAL EDITION,Petrol,Male,No tow/articulation,Going ahead other,Car,No
55945,534532,201020D008813,Unclassified,Slight,2010,2010-01-05,Tuesday,52.484683,Darkness - lights lit,Birmingham,...,Other/Not known (2005-10),Approaching junction or waiting/parked at junc...,PEUGEOT,406 L HDI (90),Heavy oil,Male,No tow/articulation,Slowing or stopping,Car,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
971978,1450565,2016200180973,B,Slight,2016,2016-12-31,Saturday,52.553219,Darkness - lights lit,Birmingham,...,Not known,Not at or within 20 metres of junction,TOYOTA,YARIS T3,Petrol,Male,No tow/articulation,Waiting to go - held up,Car,No
971979,1450566,2016200180973,B,Slight,2016,2016-12-31,Saturday,52.553219,Darkness - lights lit,Birmingham,...,Not known,Not at or within 20 metres of junction,VAUXHALL,CORSA SXI,Petrol,Not known,No tow/articulation,Waiting to go - held up,Car,No
971980,1450567,2016200182792,A,Fatal,2016,2016-07-06,Wednesday,52.540955,Daylight,Birmingham,...,Other,Approaching junction or waiting/parked at junc...,VOLKSWAGEN,PASSAT SE TDI,Heavy oil,Male,No tow/articulation,Going ahead other,Car,No
971985,1450572,2016200184751,A,Slight,2016,2016-06-05,Sunday,52.465962,Daylight,Birmingham,...,Not known,Approaching junction or waiting/parked at junc...,TOYOTA,YARIS GS,Petrol,Male,No tow/articulation,Going ahead other,Car,No


In [13]:
# Need to convert Accident Severity to Numerical Values for the purpose of plotting heat map

#Just take the Latitude, Longitudue and Severity columns
birmingham_heatmap_df = birmingham_df[["Latitude","Longitude","Accident_Severity"]]
birmingham_heatmap_df

# Add a column to represent accident severity as a numerical value

Slight = 100
Serious = 200
Fatal = 300

birmingham_heatmap_df["Accident_Weight"] = ""

condition = ()

birmingham_heatmap_df.loc[birmingham_heatmap_df["Accident_Severity"] == "Slight", "Accident_Weight"] = 1
birmingham_heatmap_df.loc[birmingham_heatmap_df["Accident_Severity"] == "Serious", "Accident_Weight"] = 10
birmingham_heatmap_df.loc[birmingham_heatmap_df["Accident_Severity"] == "Fatal", "Accident_Weight"] = 100
birmingham_heatmap_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._set_item(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


Unnamed: 0,Latitude,Longitude,Accident_Severity,Accident_Weight
55941,52.482901,-1.761121,Slight,1
55942,52.473343,-1.854218,Slight,1
55943,52.470252,-1.828907,Slight,1
55944,52.477604,-1.815186,Slight,1
55945,52.484683,-1.800872,Slight,1
...,...,...,...,...
971978,52.553219,-1.885369,Slight,1
971979,52.553219,-1.885369,Slight,1
971980,52.540955,-1.844602,Fatal,100
971985,52.465962,-1.877603,Slight,1


Gave the Accident Severities Numerical weights in a new column so they could be plotted

In [15]:
# Configure gmaps with API key
gmaps.configure(api_key=g_key)

# Create initial figure
fig = gmaps.figure()

# Create humidity Heatmap layer
locations = birmingham_heatmap_df[["Latitude", "Longitude"]]
humidity_heat_layer = gmaps.heatmap_layer(locations)
fig.add_layer(humidity_heat_layer)

# Had marker layer but removed as far too many markers! (21,504)

#Visualize
fig

Figure(layout=FigureLayout(height='420px'))

Created a google maps plot with a heatmap layer based on just the locations.
The plot indicates that from 2010-2016 almost the entirety of Birmingham has had accidents and indicates a greater number of accidents happen in the areas closer to the city center.

This is likely due to the traffic concentration increasing around the city center and from other plots it was determined that rush hour traffic increases the number of accidents. As the city center is the central hub for business in Birmingham this means that the traffic concentration around the city center would be very high during commuting hours.

In [16]:
# Configure gmaps with API key
gmaps.configure(api_key=g_key)

# Create initial figure
fig = gmaps.figure()

# Create humidity Heatmap layer
locations = birmingham_heatmap_df[["Latitude", "Longitude"]]
humidity_heat_layer = gmaps.heatmap_layer(locations, weights=birmingham_heatmap_df["Accident_Weight"])
fig.add_layer(humidity_heat_layer)

# Had marker layer but removed as far too many markers! (21,504)

#Visualize
fig

Figure(layout=FigureLayout(height='420px'))

The accident locations heatmap was repeated, however this time the Accident Severities were added as weights. Again the map seems to indicate more serious and fatal accidents were concentrated in the city center. However this could be just a repeat of the concentrations of accidents further enhanced by the severity. So this map is not clear.

A marker layer was also attempted, but as there were 21,504 accidents, the markers put too much processing strain on the system and covered almost the entirety of Birmingham. Due to the processing load I was unable to zoom in to a level where the markers were spread out enough. So I discarded the marker layer.

In [17]:
## Heatmap for ALL Accidents
# Configure gmaps with API key
gmaps.configure(api_key=g_key)

# Create initial figure
fig = gmaps.figure()

# Create humidity Heatmap layer
locations = accidents_df[["Latitude", "Longitude"]]
humidity_heat_layer = gmaps.heatmap_layer(locations)
fig.add_layer(humidity_heat_layer)

#Visualize
fig

Figure(layout=FigureLayout(height='420px'))

This is a plot of all of the accidents in the filtered data set (years from 2010-2016) and therefore covers most of the United Kingdom.

It is evident that most of the accidents are concentrated in Large Cities and Towns (Urban areas) where there is a large population and therefore concentration of traffic, which would have led to a greater number of accidents.