In [1]:
# Dependencies and Setup
%matplotlib notebook
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import scipy.stats as sts
from scipy.stats import linregress
import numpy as np
import seaborn as sns

In [2]:
#Read in cleased data file
file_6 = "Resources/time_bin.csv"
time_df = pd.read_csv(file_6)

time_df.head()

Unnamed: 0,Index,Year,Date,Day,Time,Severity,Casualties,Location,Gender,Time_f,Time_Bin
0,201001BS70003,2010,2010-01-11,Monday,07:30,Slight,1,Mid Junction - on roundabout or on main road,Female,7,Morning Commute
1,201001BS70004,2010,2010-01-11,Monday,18:35,Slight,1,Mid Junction - on roundabout or on main road,Male,18,Evening Commute
2,201001BS70007,2010,2010-01-02,Saturday,21:21,Slight,1,Mid Junction - on roundabout or on main road,Female,21,Late PM
3,201001BS70007,2010,2010-01-02,Saturday,21:21,Slight,1,Mid Junction - on roundabout or on main road,Female,21,Late PM
4,201001BS70008,2010,2010-01-04,Monday,20:35,Slight,1,Mid Junction - on roundabout or on main road,Male,20,Late PM


In [3]:
accident_sev_tod = time_df.groupby(['Time_Bin','Severity'])['Index'].nunique().unstack().reset_index()
sorted_df=accident_sev_tod.sort_values(by=['Fatal','Serious','Slight'], ascending=False)

bar1 = sorted_df['Fatal']
bar2 = sorted_df['Serious']
bar3 = sorted_df['Slight']

r = sorted_df['Time_Bin']

sum12 = bar1+bar2

plt.figure(figsize=(10,6))
plt.title('Time of the day vs Accidents')
plt.xticks(rotation = 50, horizontalalignment="right")

plt.xlabel("Time of the day")
plt.ylabel("Accident counts")
plt.bar(r,bar1,color='red', label='Fatal')
plt.bar(r,bar2, bottom=bar1,color='orange', label='Serious')
plt.bar(r,bar3,bottom=sum12,color='green',label='Slight')
plt.legend(loc="upper right")
plt.savefig("Images/Accident_distribution_time_of_day.png", bbox_inches = "tight")
plt.show()

<IPython.core.display.Javascript object>

In [5]:
accident_counts = time_df.groupby(['Time_Bin', 'Severity']).size()
accident_counts = accident_counts.unstack(['Severity'])
acc_counts_dropped = accident_counts.drop(accident_counts.columns[[1,2]],axis=1)
total=acc_counts_dropped['Fatal'].sum()
acc_counts_dropped['Percent Fatality']=round(acc_counts_dropped['Fatal']/total*100, 2)
final_acc_df = acc_counts_dropped.reset_index()
plt.figure(figsize=(8,6))
explode = [0.05,0.05,0.05,0.05,0.05]
my_data = acc_counts_dropped['Percent Fatality'].tolist()
my_labels = final_acc_df['Time_Bin'].tolist()
plt.pie(my_data,labels=my_labels,autopct='%1.1f%%', explode=explode)
plt.rcParams['font.size'] = 13
plt.title('Accident Fatality Percentage by Time of Day')
plt.savefig("Images/Accident_Fatality_%_by_Time_of_Day.png", bbox_inches = "tight")
plt.show()

<IPython.core.display.Javascript object>

In [6]:
accident_sev_loc = time_df.groupby(['Location','Severity'])['Index'].nunique().unstack().reset_index()
sorted_df=accident_sev_loc.sort_values(by=['Fatal','Serious','Slight'], ascending=False)

bar1 = sorted_df['Fatal']
bar2 = sorted_df['Serious']
bar3 = sorted_df['Slight']

r = sorted_df['Location']

sum12 = bar1+bar2

plt.figure(figsize=(10,6))
plt.title('Location on road vs Accidents')
plt.xticks(rotation = 50, horizontalalignment="right")

plt.xlabel("Location on the road")
plt.ylabel("Accident counts")
plt.bar(r,bar1,color='red', label='Fatal')
plt.bar(r,bar2, bottom=bar1,color='orange', label='Serious')
plt.bar(r,bar3,bottom=sum12,color='green',label='Slight')
plt.legend(loc="upper right")
plt.savefig("Images/Accident_distribution_location_on_road.png", bbox_inches = "tight")
plt.show()

<IPython.core.display.Javascript object>

In [7]:
accident_sev_gen = time_df.groupby(['Gender','Severity'])['Index'].nunique().unstack().reset_index()
sorted_df=accident_sev_gen.sort_values(by=['Fatal','Serious','Slight'], ascending=False)

bar1 = sorted_df['Fatal']
bar2 = sorted_df['Serious']
bar3 = sorted_df['Slight']

r = sorted_df['Gender']

sum12 = bar1+bar2

plt.figure(figsize=(10,6))
plt.title('Gender vs Accidents')
plt.xticks(rotation = 50, horizontalalignment="right")

plt.xlabel("Gender")
plt.ylabel("Accident counts")
plt.bar(r,bar1,color='red', label='Fatal')
plt.bar(r,bar2, bottom=bar1,color='orange', label='Serious')
plt.bar(r,bar3,bottom=sum12,color='green',label='Slight')
plt.legend(loc="upper right")
plt.savefig("Images/Accident_distribution_Gender.png", bbox_inches = "tight")
plt.show()

<IPython.core.display.Javascript object>

In [8]:
accident_sev_time = time_df.groupby(['Time_f','Severity'])['Index'].nunique().unstack().reset_index()
sorted_df=accident_sev_time.sort_values(by=['Fatal','Serious','Slight'], ascending=False)

bar1 = sorted_df['Fatal']
bar2 = sorted_df['Serious']
bar3 = sorted_df['Slight']

r = sorted_df['Time_f']

sum12 = bar1+bar2

plt.figure(figsize=(10,6))
plt.title('Time of the day vs Accidents')
plt.xticks(rotation = 50, horizontalalignment="right")

plt.xlabel("Time of the day")
plt.ylabel("Accident counts")
plt.bar(r,bar1,color='red', label='Fatal')
plt.bar(r,bar2, bottom=bar1,color='orange', label='Serious')
plt.bar(r,bar3,bottom=sum12,color='green',label='Slight')
plt.legend(loc="upper right")
plt.savefig("Images/Accident_distribution_time_of_day_hrs.png", bbox_inches = "tight")
plt.show()

<IPython.core.display.Javascript object>

In [9]:
accident_sev_yr = time_df.groupby(['Year','Severity'])['Index'].nunique().unstack().reset_index()
sorted_df=accident_sev_yr.sort_values(by=['Fatal','Serious','Slight'], ascending=False)

bar1 = sorted_df['Fatal']
bar2 = sorted_df['Serious']
bar3 = sorted_df['Slight']

r = sorted_df['Year']

sum12 = bar1+bar2

plt.figure(figsize=(10,6))
plt.title('Year vs Accidents')
plt.xticks(rotation = 50, horizontalalignment="right")

plt.xlabel("Year")
plt.ylabel("Accident counts")
plt.bar(r,bar1,color='red', label='Fatal')
plt.bar(r,bar2, bottom=bar1,color='orange', label='Serious')
plt.bar(r,bar3,bottom=sum12,color='green',label='Slight')
plt.legend(loc="upper left")
plt.savefig("Images/Accident_distribution_year.png", bbox_inches = "tight")
plt.show()

<IPython.core.display.Javascript object>

In [20]:
#Read in data file
file_7 = "Resources/traffic_vol_2010_to2016.csv"
vol_df = pd.read_csv(file_7)

vol_df.head(7)

Unnamed: 0,Year,Cars and Taxis,Light Commercial\nVehicles 1,Heavy Goods Vehicles 2,Motorcycles,Buses & Coaches,All motor vehicles
0,2010,241.9,41.4,16.4,2.9,3.2,305.8
1,2011,244.3,42.0,16.0,2.9,3.0,308.2
2,2012,245.5,42.2,15.6,2.9,2.8,309.0
3,2013,246.6,43.8,15.8,2.8,2.9,311.9
4,2014,253.5,46.6,16.2,2.9,2.9,322.2
5,2015,258.1,48.9,16.8,2.9,2.8,329.6
6,2016,263.9,51.7,17.0,3.0,2.6,338.2


In [10]:
time_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1077653 entries, 0 to 1077652
Data columns (total 11 columns):
 #   Column      Non-Null Count    Dtype 
---  ------      --------------    ----- 
 0   Index       1077653 non-null  object
 1   Year        1077653 non-null  int64 
 2   Date        1077653 non-null  object
 3   Day         1077653 non-null  object
 4   Time        1077653 non-null  object
 5   Severity    1077653 non-null  object
 6   Casualties  1077653 non-null  int64 
 7   Location    1077653 non-null  object
 8   Gender      1077653 non-null  object
 9   Time_f      1077653 non-null  int64 
 10  Time_Bin    1077653 non-null  object
dtypes: int64(3), object(8)
memory usage: 90.4+ MB


In [11]:
#Read in cleased data file
file_7 = "Resources/all.csv"
all_df = pd.read_csv(file_7)

all_df.info()

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1077653 entries, 0 to 1077652
Data columns (total 33 columns):
 #   Column                       Non-Null Count    Dtype  
---  ------                       --------------    -----  
 0   Unnamed: 0                   1077653 non-null  int64  
 1   Accident_Index               1077653 non-null  object 
 2   1st_Road_Class               1077653 non-null  object 
 3   Accident_Severity            1077653 non-null  object 
 4   Year                         1077653 non-null  int64  
 5   Date                         1077653 non-null  object 
 6   Day_of_Week                  1077653 non-null  object 
 7   Latitude                     1077653 non-null  float64
 8   Light_Conditions             1077653 non-null  object 
 9   Local_Authority_(District)   1077653 non-null  object 
 10  Longitude                    1077653 non-null  float64
 11  Number_of_Casualties         1077653 non-null  int64  
 12  Number_of_Vehicles           1077653 non-n

In [15]:
accident_sev_dist = all_df.groupby(['Local_Authority_(District)','Accident_Severity'])['Accident_Index'].nunique().unstack().reset_index()
sorted_df=accident_sev_dist.sort_values(by=['Fatal','Serious','Slight'], ascending=True)

bar1 = sorted_df['Fatal']
#bar2 = sorted_df['Serious']
#bar3 = sorted_df['Slight']

r = sorted_df['Local_Authority_(District)']

#sum12 = bar1+bar2

plt.figure(figsize=(30,80))
plt.title('Local Authority (District) vs Fatal Accidents')
plt.xticks(rotation = 0, horizontalalignment="right")

plt.xlabel("Number of Fatal accidents")
plt.ylabel("Local_Authority_(District)")
plt.barh(r,bar1,color='red', label='Fatal')
#plt.barh(r,bar2, bottom=bar1,color='green', label='Serious')
#plt.barh(r,bar3,bottom=sum12,color='orange',label='Slight')
plt.legend(loc="upper right")
plt.savefig("Images/Fatal_Accident_distribution_Local_Authority_(District).png", bbox_inches = "tight")
plt.show()

<IPython.core.display.Javascript object>

In [12]:
accident_sev_dist = all_df.groupby(['Local_Authority_(District)','Accident_Severity'])['Accident_Index'].nunique().unstack().reset_index()
sorted_df=accident_sev_dist.sort_values(by=['Serious','Slight'], ascending=True)

bar1 = sorted_df['Serious']
#bar2 = sorted_df['Serious']
#bar3 = sorted_df['Slight']

r = sorted_df['Local_Authority_(District)']

#sum12 = bar1+bar2

plt.figure(figsize=(30,80))
plt.title('Local Authority (District) vs Serious Accidents')
plt.xticks(rotation = 0, horizontalalignment="right")

plt.xlabel("Number of Serious accidents")
plt.ylabel("Local_Authority_(District)")
plt.barh(r,bar1,color='orange', label='Serious')
#plt.barh(r,bar2, bottom=bar1,color='green', label='Serious')
#plt.barh(r,bar3,bottom=sum12,color='orange',label='Slight')
plt.legend(loc="upper right")
plt.savefig("Images/Serious_Accident_distribution_Local_Authority_(District).png", bbox_inches = "tight")
plt.show()

<IPython.core.display.Javascript object>

In [13]:
accident_sev_dist = all_df.groupby(['Local_Authority_(District)','Accident_Severity'])['Accident_Index'].nunique().unstack().reset_index()
sorted_df=accident_sev_dist.sort_values(by=['Slight'], ascending=True)

bar1 = sorted_df['Slight']
#bar2 = sorted_df['Serious']
#bar3 = sorted_df['Slight']

r = sorted_df['Local_Authority_(District)']

#sum12 = bar1+bar2

plt.figure(figsize=(30,80))
plt.title('Local Authority (District) vs Slight Accidents')
plt.xticks(rotation = 0, horizontalalignment="right")

plt.xlabel("Number of Slight accidents")
plt.ylabel("Local_Authority_(District)")
plt.barh(r,bar1,color='green', label='Slight')
#plt.barh(r,bar2, bottom=bar1,color='green', label='Serious')
#plt.barh(r,bar3,bottom=sum12,color='orange',label='Slight')
plt.legend(loc="upper right")
plt.savefig("Images/Slight_Accident_distribution_Local_Authority_(District).png", bbox_inches = "tight")
plt.show()

<IPython.core.display.Javascript object>

In [22]:
accident_sev_dist

Accident_Severity,Local_Authority_(District),Fatal,Serious,Slight
0,Aberdeen City,31,376,1077
1,Aberdeenshire,103,740,1568
2,Adur,7,151,658
3,Allerdale,30,171,1079
4,Amber Valley,17,200,1127
...,...,...,...,...
375,Wychavon,30,220,1211
376,Wycombe,26,283,1510
377,Wyre,17,264,1052
378,Wyre Forest,20,141,967


In [14]:
accident_sev_type = all_df.groupby(['Vehicle_Type','Accident_Severity'])['Accident_Index'].nunique().unstack().reset_index()
sorted_df=accident_sev_type.sort_values(by=['Fatal','Serious','Slight'], ascending=False)

bar1 = sorted_df['Fatal']
bar2 = sorted_df['Serious']
bar3 = sorted_df['Slight']

r = sorted_df['Vehicle_Type']

sum12 = bar1+bar2

plt.figure(figsize=(10,6))
plt.title('Vehicle Type vs Accidents')
plt.xticks(rotation = 50, horizontalalignment="right")

plt.xlabel("Vehicle_Type")
plt.ylabel("Accident counts")
plt.bar(r,bar1,color='red', label='Fatal')
plt.bar(r,bar2, bottom=bar1,color='orange', label='Serious')
plt.bar(r,bar3,bottom=sum12,color='green',label='Slight')
plt.legend(loc="upper right")
plt.savefig("Images/Accident_distribution_Vehicle_Type.png", bbox_inches = "tight")
plt.show()

<IPython.core.display.Javascript object>

In [21]:
accident_sev_type

Accident_Severity,Vehicle_Type,Fatal,Serious,Slight
0,Agricultural vehicle,13.0,45.0,142.0
1,Bus or coach (17 or more pass seats),57.0,531.0,3961.0
2,Car,6932.0,81481.0,569104.0
3,Electric motorcycle,,8.0,27.0
4,Goods 7.5 tonnes mgw and over,218.0,694.0,3180.0
5,Goods over 3.5t. and under 7.5t,55.0,296.0,1595.0
6,Goods vehicle - unknown weight,2.0,30.0,179.0
7,Minibus (8 - 16 passenger seats),27.0,216.0,1110.0
8,Motorcycle - unknown cc,3.0,60.0,156.0
9,Motorcycle 125cc and under,225.0,6572.0,24312.0


In [15]:
accident_sev_age = all_df.groupby(['Age_Band_of_Driver','Accident_Severity'])['Accident_Index'].nunique().unstack().reset_index()
sorted_df=accident_sev_age.sort_values(by=['Fatal','Serious','Slight'], ascending=False)

bar1 = sorted_df['Fatal']
bar2 = sorted_df['Serious']
bar3 = sorted_df['Slight']

r = sorted_df['Age_Band_of_Driver']

sum12 = bar1+bar2

plt.figure(figsize=(10,6))
plt.title('Age Band vs Accidents')
plt.xticks(rotation = 50, horizontalalignment="right")

plt.xlabel("Age Band")
plt.ylabel("Accident counts")
plt.bar(r,bar1,color='red', label='Fatal')
plt.bar(r,bar2, bottom=bar1,color='orange', label='Serious')
plt.bar(r,bar3,bottom=sum12,color='green',label='Slight')
plt.legend(loc="upper right")
plt.savefig("Images/Accident_distribution_Age_Band.png", bbox_inches = "tight")
plt.show()

<IPython.core.display.Javascript object>