<h1>UK ROAD ACCIDENT DATA ANALYSIS</h1>

<h2>INCLUSIVE YEAR 2019-2022</h2>

<H3>Analyst : Mark Anthony D. Trijo</H3>
<hr>

<h2>Data Preparation</h2>

<h3>Importing Libraries</h3>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import f_oneway
import warnings
warnings.filterwarnings('ignore')

<h2>Data Frame</h2>
<hr>

In [2]:
accident = pd.read_csv('datasets//accident_data.csv')

In [3]:
accident

Unnamed: 0,Index,Accident_Severity,Accident Date,Latitude,Light_Conditions,District Area,Longitude,Number_of_Casualties,Number_of_Vehicles,Road_Surface_Conditions,Road_Type,Urban_or_Rural_Area,Weather_Conditions,Vehicle_Type
0,200701BS64157,Serious,5/6/2019,51.506187,Darkness - lights lit,Kensington and Chelsea,-0.209082,1,2,Dry,Single carriageway,Urban,Fine no high winds,Car
1,200701BS65737,Serious,2/7/2019,51.495029,Daylight,Kensington and Chelsea,-0.173647,1,2,Wet or damp,Single carriageway,Urban,Raining no high winds,Car
2,200701BS66127,Serious,26-08-2019,51.517715,Darkness - lighting unknown,Kensington and Chelsea,-0.210215,1,3,Dry,,Urban,,Taxi/Private hire car
3,200701BS66128,Serious,16-08-2019,51.495478,Daylight,Kensington and Chelsea,-0.202731,1,4,Dry,Single carriageway,Urban,Fine no high winds,Bus or coach (17 or more pass seats)
4,200701BS66837,Slight,3/9/2019,51.488576,Darkness - lights lit,Kensington and Chelsea,-0.192487,1,2,Dry,,Urban,,Other vehicle
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
660674,201091NM01760,Slight,18-02-2022,57.374005,Daylight,Highland,-3.467828,2,1,Dry,Single carriageway,Rural,Fine no high winds,Car
660675,201091NM01881,Slight,21-02-2022,57.232273,Darkness - no lighting,Highland,-3.809281,1,1,Frost or ice,Single carriageway,Rural,Fine no high winds,Car
660676,201091NM01935,Slight,23-02-2022,57.585044,Daylight,Highland,-3.862727,1,3,Frost or ice,Single carriageway,Rural,Fine no high winds,Car
660677,201091NM01964,Serious,23-02-2022,57.214898,Darkness - no lighting,Highland,-3.823997,1,2,Wet or damp,Single carriageway,Rural,Fine no high winds,Motorcycle over 500cc


<h3>Converting Data Field into DATE TIME data type</h3>

In [4]:
accident['Accident Date'] = pd.to_datetime(accident['Accident Date'], dayfirst = True, errors = "coerce")

<h3>Converting Data Field into category data type</h3>

In [5]:
accident.dtypes

Index                              object
Accident_Severity                  object
Accident Date              datetime64[ns]
Latitude                          float64
Light_Conditions                   object
District Area                      object
Longitude                         float64
Number_of_Casualties                int64
Number_of_Vehicles                  int64
Road_Surface_Conditions            object
Road_Type                          object
Urban_or_Rural_Area                object
Weather_Conditions                 object
Vehicle_Type                       object
dtype: object

In [6]:
accident['Accident_Severity'] = accident['Accident_Severity'].astype('category')
accident['Light_Conditions'] = accident['Light_Conditions'].astype('category')
accident['District Area'] = accident['District Area'].astype('category')
accident['Number_of_Casualties'] = accident['Number_of_Casualties'].astype('category')
accident['Number_of_Vehicles'] = accident['Number_of_Vehicles'].astype('category')
accident['Road_Surface_Conditions'] = accident['Road_Surface_Conditions'].astype('category')
accident['Road_Type'] = accident['Road_Type'].astype('category')
accident['Urban_or_Rural_Area'] = accident['Urban_or_Rural_Area'].astype('category')
accident['Weather_Conditions'] = accident['Weather_Conditions'].astype('category')
accident['Vehicle_Type'] = accident['Vehicle_Type'].astype('category')

<h3>Data Cleansing</h3>

In [7]:
accident.isnull().sum()

Index                           0
Accident_Severity               0
Accident Date              395672
Latitude                       25
Light_Conditions                0
District Area                   0
Longitude                      26
Number_of_Casualties            0
Number_of_Vehicles              0
Road_Surface_Conditions       726
Road_Type                    4520
Urban_or_Rural_Area            15
Weather_Conditions          14128
Vehicle_Type                    0
dtype: int64

In [8]:
accident['Accident Date'] = accident['Accident Date'].fillna(accident['Accident Date'].mean())
accident['Latitude'] = accident['Latitude'].fillna(accident['Latitude'].mean())
accident['Longitude'] = accident['Longitude'].fillna(accident['Longitude'].mean())
accident['Road_Surface_Conditions'] = accident['Road_Surface_Conditions'].fillna(accident['Road_Surface_Conditions'].mode()[0])
accident['Road_Type'] = accident['Road_Type'].fillna(accident['Road_Type'].mode()[0])
accident['Weather_Conditions'] = accident['Weather_Conditions'].fillna(accident['Weather_Conditions'].mode()[0])
accident['Urban_or_Rural_Area'] = accident['Urban_or_Rural_Area'].fillna(accident['Urban_or_Rural_Area'].mode()[0])

<h2>1. Accident Severity by Weather Conditions</h2>

In [9]:
accident_severity_by_weather = accident.groupby('Weather_Conditions')['Accident_Severity'].value_counts()

In [10]:
accident_severity_by_weather

Weather_Conditions     Accident_Severity
Fine + high winds      Slight                 7134
                       Serious                1245
                       Fatal                   175
Fine no high winds     Slight               454521
                       Serious               73285
                       Fatal                  7207
Fog or mist            Slight                 2963
                       Serious                 483
                       Fatal                    82
Other                  Slight                15184
                       Serious                1801
                       Fatal                   165
Raining + high winds   Slight                 8209
                       Serious                1261
                       Fatal                   145
Raining no high winds  Slight                69380
                       Serious                9468
                       Fatal                   848
Snowing + high winds   Slight            

<h2>2. Accident Severity by Time of Day
</h2>

In [32]:
accident['Hour'] = accident['Accident Date'].dt.hour
severity_by_hour = accident.groupby('Hour')['Accident_Severity'].value_counts()
severity_by_hour

Hour  Accident_Severity
0     Slight               226210
      Serious               35293
      Fatal                  3504
16    Slight               337591
      Serious               52924
      Fatal                  5157
Name: count, dtype: int64

<h2>3. Accidents by Road Type
</h2>

In [33]:
accidents_by_road_type = accident.groupby('Road_Type').size()
accidents_by_road_type

Road_Type
Dual carriageway       99424
One way street         13559
Roundabout             43992
Single carriageway    496663
Slip road               7041
dtype: int64

<h2>4. Accidents by Urban or Rural Areas
</h2>

In [34]:
accidents_by_area = accident.groupby('Urban_or_Rural_Area').size()
accidents_by_area

Urban_or_Rural_Area
Rural          238990
Unallocated        11
Urban          421678
dtype: int64

<h2>5. Number of Casualties vs. Accident Severity
</h2>

In [35]:
casualties_by_severity = accident.groupby('Accident_Severity')['Number_of_Casualties'].count()
casualties_by_severity

Accident_Severity
Fatal        8661
Serious     88217
Slight     563801
Name: Number_of_Casualties, dtype: int64

<h2>6. Accident Frequency by District Area
</h2>

In [36]:
accidents_by_district = accident.groupby('District Area').size()
accidents_by_district

District Area
Aberdeen City    1323
Aberdeenshire    1930
Adur              619
Allerdale        1128
Alnwick           232
                 ... 
Wychavon         1361
Wycombe          1729
Wyre             1238
Wyre Forest       969
York             1897
Length: 422, dtype: int64

<h2>7. Vehicle Type vs. Accident Severity
</h2>

In [37]:
severity_by_vehicle_type = accident.groupby(['Vehicle_Type','Accident_Severity']).count()
severity_by_vehicle_type

Unnamed: 0_level_0,Unnamed: 1_level_0,Index,Accident Date,Latitude,Light_Conditions,District Area,Longitude,Number_of_Casualties,Number_of_Vehicles,Road_Surface_Conditions,Road_Type,Urban_or_Rural_Area,Weather_Conditions,Hour
Vehicle_Type,Accident_Severity,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Agricultural vehicle,Fatal,21,21,21,21,21,21,21,21,21,21,21,21,21
Agricultural vehicle,Serious,282,282,282,282,282,282,282,282,282,282,282,282,282
Agricultural vehicle,Slight,1644,1644,1644,1644,1644,1644,1644,1644,1644,1644,1644,1644,1644
Bus or coach (17 or more pass seats),Fatal,325,325,325,325,325,325,325,325,325,325,325,325,325
Bus or coach (17 or more pass seats),Serious,3373,3373,3373,3373,3373,3373,3373,3373,3373,3373,3373,3373,3373
Bus or coach (17 or more pass seats),Slight,22180,22180,22180,22180,22180,22180,22180,22180,22180,22180,22180,22180,22180
Car,Fatal,6577,6577,6577,6577,6577,6577,6577,6577,6577,6577,6577,6577,6577
Car,Serious,66461,66461,66461,66461,66461,66461,66461,66461,66461,66461,66461,66461,66461
Car,Slight,424954,424954,424954,424954,424954,424954,424954,424954,424954,424954,424954,424954,424954
Data missing or out of range,Fatal,0,0,0,0,0,0,0,0,0,0,0,0,0


<h2>8. Impact of Light Conditions on Accident Severity</h2>

In [38]:
accidents_by_light_condition = accident.groupby(['Light_Conditions','Accident_Severity']).count()
accidents_by_light_condition

Unnamed: 0_level_0,Unnamed: 1_level_0,Index,Accident Date,Latitude,District Area,Longitude,Number_of_Casualties,Number_of_Vehicles,Road_Surface_Conditions,Road_Type,Urban_or_Rural_Area,Weather_Conditions,Vehicle_Type,Hour
Light_Conditions,Accident_Severity,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Darkness - lighting unknown,Fatal,68,68,68,68,68,68,68,68,68,68,68,68,68
Darkness - lighting unknown,Serious,794,794,794,794,794,794,794,794,794,794,794,794,794
Darkness - lighting unknown,Slight,5622,5622,5622,5622,5622,5622,5622,5622,5622,5622,5622,5622,5622
Darkness - lights lit,Fatal,1860,1860,1860,1860,1860,1860,1860,1860,1860,1860,1860,1860,1860
Darkness - lights lit,Serious,19130,19130,19130,19130,19130,19130,19130,19130,19130,19130,19130,19130,19130
Darkness - lights lit,Slight,108345,108345,108345,108345,108345,108345,108345,108345,108345,108345,108345,108345,108345
Darkness - lights unlit,Fatal,45,45,45,45,45,45,45,45,45,45,45,45,45
Darkness - lights unlit,Serious,360,360,360,360,360,360,360,360,360,360,360,360,360
Darkness - lights unlit,Slight,2138,2138,2138,2138,2138,2138,2138,2138,2138,2138,2138,2138,2138
Darkness - no lighting,Fatal,1612,1612,1612,1612,1612,1612,1612,1612,1612,1612,1612,1612,1612


<h2>9. Number of Vehicles Involved in Accidents by Severity
</h2>

In [39]:
accidents_by_vehicles_severity = accident.groupby(['Number_of_Vehicles', 'Accident_Severity']).size()
accidents_by_vehicles_severity

Number_of_Vehicles  Accident_Severity
1                   Fatal                  3885
                    Serious               38940
                    Slight               157962
2                   Fatal                  3467
                    Serious               41578
                    Slight               346950
3                   Fatal                   900
                    Serious                5808
                    Slight                46098
4                   Fatal                   272
                    Serious                1340
                    Slight                 9688
5                   Fatal                    70
                    Serious                 326
                    Slight                 2068
6                   Fatal                    28
                    Serious                 121
                    Slight                  609
7                   Fatal                    17
                    Serious                  63
  

<h2>10.Impact of Road Surface Conditions on Accident Frequency</h2>

In [40]:
accidents_by_surface_condition = accident.groupby('Road_Surface_Conditions').size()
accidents_by_surface_condition

Road_Surface_Conditions
Dry                     448547
Flood over 3cm. deep      1017
Frost or ice             18517
Snow                      5890
Wet or damp             186708
dtype: int64

<h2>11.Weather Conditions and Number of Casualties</h2>

In [41]:
casualties_by_weather = accident.groupby(['Weather_Conditions'])['Number_of_Casualties'].count()
casualties_by_weather

Weather_Conditions
Fine + high winds          8554
Fine no high winds       535013
Fog or mist                3528
Other                     17150
Raining + high winds       9615
Raining no high winds     79696
Snowing + high winds        885
Snowing no high winds      6238
Name: Number_of_Casualties, dtype: int64

<h2>12.Accidents by Road Surface and Accident Severity</h2>

In [42]:
accidents_by_surface_severity = accident.groupby(['Road_Surface_Conditions', 'Accident_Severity']).size()
accidents_by_surface_severity

Road_Surface_Conditions  Accident_Severity
Dry                      Fatal                  5790
                         Serious               61708
                         Slight               381049
Flood over 3cm. deep     Fatal                    23
                         Serious                 152
                         Slight                  842
Frost or ice             Fatal                   193
                         Serious                2007
                         Slight                16317
Snow                     Fatal                    35
                         Serious                 565
                         Slight                 5290
Wet or damp              Fatal                  2620
                         Serious               23785
                         Slight               160303
dtype: int64

<h2>13.Casualties by Road Type</h2>

In [43]:
casualties_by_road_type = accident.groupby('Road_Type')['Number_of_Casualties'].count()
casualties_by_road_type

Road_Type
Dual carriageway       99424
One way street         13559
Roundabout             43992
Single carriageway    496663
Slip road               7041
Name: Number_of_Casualties, dtype: int64

<h2>14.Accidents by Weather and Light Conditions</h2>

In [44]:
accidents_by_weather_light = accident.groupby(['Weather_Conditions', 'Light_Conditions']).size()
accidents_by_weather_light

Weather_Conditions     Light_Conditions           
Fine + high winds      Darkness - lighting unknown        59
                       Darkness - lights lit            1852
                       Darkness - lights unlit            51
                       Darkness - no lighting            796
                       Daylight                         5796
Fine no high winds     Darkness - lighting unknown      5333
                       Darkness - lights lit           93958
                       Darkness - lights unlit          1745
                       Darkness - no lighting          25251
                       Daylight                       408726
Fog or mist            Darkness - lighting unknown        65
                       Darkness - lights lit             921
                       Darkness - lights unlit            37
                       Darkness - no lighting            862
                       Daylight                         1643
Other                  Darkness - 

<h2>15.Accidents by Urban or Rural Areas and Road Type</h2>

In [45]:
accidents_by_area_road_type = accident.groupby(['Urban_or_Rural_Area', 'Road_Type']).size()
accidents_by_area_road_type

Urban_or_Rural_Area  Road_Type         
Rural                Dual carriageway       48715
                     One way street          1193
                     Roundabout             15545
                     Single carriageway    169243
                     Slip road               4294
Unallocated          Dual carriageway           1
                     One way street             0
                     Roundabout                 1
                     Single carriageway         9
                     Slip road                  0
Urban                Dual carriageway       50708
                     One way street         12366
                     Roundabout             28446
                     Single carriageway    327411
                     Slip road               2747
dtype: int64

<h2>16.Correlation Between Vehicle Type and Road Type</h2>

In [46]:
vehicle_by_road_type = accident.groupby(['Vehicle_Type', 'Road_Type']).size()
vehicle_by_road_type

Vehicle_Type                         Road_Type         
Agricultural vehicle                 Dual carriageway        293
                                     One way street           49
                                     Roundabout              129
                                     Single carriageway     1449
                                     Slip road                27
                                                           ...  
Van / Goods 3.5 tonnes mgw or under  Dual carriageway       5107
                                     One way street          723
                                     Roundabout             2294
                                     Single carriageway    25675
                                     Slip road               361
Length: 80, dtype: int64

<h2>16.Number of Accidents per Vehicle Type in Urban vs Rural Areas</h2>

In [47]:
accidents_by_vehicle_area = accident.groupby(['Vehicle_Type', 'Urban_or_Rural_Area']).size()
accidents_by_vehicle_area

Vehicle_Type                           Urban_or_Rural_Area
Agricultural vehicle                   Rural                     675
                                       Unallocated                 0
                                       Urban                    1272
Bus or coach (17 or more pass seats)   Rural                    9025
                                       Unallocated                 2
                                       Urban                   16851
Car                                    Rural                  181922
                                       Unallocated                 8
                                       Urban                  316062
Data missing or out of range           Rural                       0
                                       Unallocated                 0
                                       Urban                       6
Goods 7.5 tonnes mgw and over          Rural                    6156
                                       Unall

<h2>17.Total Number of Accidents by Severity</h2>

In [48]:
accident_severity_count = accident['Accident_Severity'].value_counts()
accident_severity_count

Accident_Severity
Slight     563801
Serious     88217
Fatal        8661
Name: count, dtype: int64

<h2>18.Total Number of Accidents by Light Conditions</h2>

In [49]:
light_conditions_count = accident['Light_Conditions'].value_counts()
light_conditions_count


Light_Conditions
Daylight                       484880
Darkness - lights lit          129335
Darkness - no lighting          37437
Darkness - lighting unknown      6484
Darkness - lights unlit          2543
Name: count, dtype: int64

<h2>19.Accidents by Time of Day (Hour)</h2>

In [50]:
accidents_by_hour = accident['Hour'].value_counts().sort_index()
accidents_by_hour


Hour
0     265007
16    395672
Name: count, dtype: int64

<h2>20.Distribution of Accidents by Weather Conditions</h2>

In [51]:
weather_conditions_count = accident['Weather_Conditions'].value_counts()
weather_conditions_count

Weather_Conditions
Fine no high winds       535013
Raining no high winds     79696
Other                     17150
Raining + high winds       9615
Fine + high winds          8554
Snowing no high winds      6238
Fog or mist                3528
Snowing + high winds        885
Name: count, dtype: int64