<h1>---Importing Necessary Files---</h1>

In [1]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from scipy.stats import f_oneway

<hr>
<h1>---Making a DataFrame for the Dataset---</h1>

In [2]:
ukroadaccident = pd.read_csv('datasets\\uk_road_accident.csv')

<hr>
<h1>---Checking if the DataFrame is Working---</h1>

In [3]:
ukroadaccident

Unnamed: 0,Index,Accident_Severity,Accident Date,Latitude,Light_Conditions,District Area,Longitude,Number_of_Casualties,Number_of_Vehicles,Road_Surface_Conditions,Road_Type,Urban_or_Rural_Area,Weather_Conditions,Vehicle_Type
0,200701BS64157,Serious,5/6/2019,51.506187,Darkness - lights lit,Kensington and Chelsea,-0.209082,1,2,Dry,Single carriageway,Urban,Fine no high winds,Car
1,200701BS65737,Serious,2/7/2019,51.495029,Daylight,Kensington and Chelsea,-0.173647,1,2,Wet or damp,Single carriageway,Urban,Raining no high winds,Car
2,200701BS66127,Serious,26-08-2019,51.517715,Darkness - lighting unknown,Kensington and Chelsea,-0.210215,1,3,Dry,,Urban,,Taxi/Private hire car
3,200701BS66128,Serious,16-08-2019,51.495478,Daylight,Kensington and Chelsea,-0.202731,1,4,Dry,Single carriageway,Urban,Fine no high winds,Bus or coach (17 or more pass seats)
4,200701BS66837,Slight,3/9/2019,51.488576,Darkness - lights lit,Kensington and Chelsea,-0.192487,1,2,Dry,,Urban,,Other vehicle
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
660674,201091NM01760,Slight,18-02-2022,57.374005,Daylight,Highland,-3.467828,2,1,Dry,Single carriageway,Rural,Fine no high winds,Car
660675,201091NM01881,Slight,21-02-2022,57.232273,Darkness - no lighting,Highland,-3.809281,1,1,Frost or ice,Single carriageway,Rural,Fine no high winds,Car
660676,201091NM01935,Slight,23-02-2022,57.585044,Daylight,Highland,-3.862727,1,3,Frost or ice,Single carriageway,Rural,Fine no high winds,Car
660677,201091NM01964,Serious,23-02-2022,57.214898,Darkness - no lighting,Highland,-3.823997,1,2,Wet or damp,Single carriageway,Rural,Fine no high winds,Motorcycle over 500cc


<h1>---Cleaning the Inconsistencies on the Accident date---</h1>

In [4]:
ukroadaccident['Accident Date'] = ukroadaccident['Accident Date'].astype('str')
ukroadaccident['Accident Date'] = ukroadaccident['Accident Date'].str.strip()
ukroadaccident['Accident Date'] = ukroadaccident['Accident Date'].str.replace('/', '-')

<h1>---Coverting the Data Type of the Accident Date---</h1>

In [5]:
ukroadaccident['Accident Date'] = pd.to_datetime(ukroadaccident['Accident Date'], dayfirst=True, errors='coerce')

<h1>---Extracting New Columns from Accident Date---</h1>

In [6]:
ukroadaccident['Year'] = ukroadaccident['Accident Date'].dt.year
ukroadaccident['Month_Number'] = ukroadaccident['Accident Date'].dt.month
ukroadaccident['Month'] = ukroadaccident['Accident Date'].dt.month_name()
ukroadaccident['Day'] = ukroadaccident['Accident Date'].dt.day
ukroadaccident['DayofWeek'] = ukroadaccident['Accident Date'].dt.dayofweek  # Monday=0, Sunday=6

<hr>
<h1>---Checking for Null Values---</h1>

In [7]:
ukroadaccident.isnull().sum()

Index                          0
Accident_Severity              0
Accident Date                  0
Latitude                      25
Light_Conditions               0
District Area                  0
Longitude                     26
Number_of_Casualties           0
Number_of_Vehicles             0
Road_Surface_Conditions      726
Road_Type                   4520
Urban_or_Rural_Area           15
Weather_Conditions         14128
Vehicle_Type                   0
Year                           0
Month_Number                   0
Month                          0
Day                            0
DayofWeek                      0
dtype: int64

<h1>---Fixing the Null Values---</h1>

<h3>-Numerical Null Values-</h3>

In [8]:
ukroadaccident['Latitude'].mean()

np.float64(52.553865761110956)

In [9]:
ukroadaccident['Latitude'] = ukroadaccident['Latitude'].fillna(ukroadaccident['Latitude'].mean())

<p>---------------------------------------------</p>

In [10]:
ukroadaccident['Longitude'].mean()

np.float64(-1.431210368502073)

In [11]:
ukroadaccident['Longitude'] = ukroadaccident['Longitude'].fillna(ukroadaccident['Longitude'].mean())

<h3>-Categorical Null Values-</h3>

In [12]:
ukroadaccident['Road_Surface_Conditions'] = ukroadaccident['Road_Surface_Conditions'].fillna('unaccounted')

<p>---------------------------------------------</p>

In [13]:
ukroadaccident['Road_Type'] = ukroadaccident['Road_Type'].fillna('unaccounted')

<p>---------------------------------------------</p>

In [14]:
ukroadaccident['Urban_or_Rural_Area'].mode()

0    Urban
Name: Urban_or_Rural_Area, dtype: object

In [15]:
ukroadaccident['Urban_or_Rural_Area'] = ukroadaccident['Urban_or_Rural_Area'].fillna(ukroadaccident['Urban_or_Rural_Area'].mode()[0])

<p>---------------------------------------------</p>

In [16]:
ukroadaccident['Weather_Conditions'] = ukroadaccident['Weather_Conditions'].fillna('unaccounted')

<h1>---Checking if there are still Null Values---</h1>

In [17]:
ukroadaccident.isnull().sum()

Index                      0
Accident_Severity          0
Accident Date              0
Latitude                   0
Light_Conditions           0
District Area              0
Longitude                  0
Number_of_Casualties       0
Number_of_Vehicles         0
Road_Surface_Conditions    0
Road_Type                  0
Urban_or_Rural_Area        0
Weather_Conditions         0
Vehicle_Type               0
Year                       0
Month_Number               0
Month                      0
Day                        0
DayofWeek                  0
dtype: int64

<hr>
<h1>---Checking the Data Type---</h1>

In [18]:
ukroadaccident.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 660679 entries, 0 to 660678
Data columns (total 19 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   Index                    660679 non-null  object        
 1   Accident_Severity        660679 non-null  object        
 2   Accident Date            660679 non-null  datetime64[ns]
 3   Latitude                 660679 non-null  float64       
 4   Light_Conditions         660679 non-null  object        
 5   District Area            660679 non-null  object        
 6   Longitude                660679 non-null  float64       
 7   Number_of_Casualties     660679 non-null  int64         
 8   Number_of_Vehicles       660679 non-null  int64         
 9   Road_Surface_Conditions  660679 non-null  object        
 10  Road_Type                660679 non-null  object        
 11  Urban_or_Rural_Area      660679 non-null  object        
 12  Weather_Conditio

<h1>---Fixing the Data Type---</h1>

In [19]:
ukroadaccident['Index'] = ukroadaccident['Index'].astype('category') 
ukroadaccident['Accident_Severity'] = ukroadaccident['Accident_Severity'].astype('category')
ukroadaccident['Light_Conditions'] = ukroadaccident['Light_Conditions'].astype('category')
ukroadaccident['District Area'] = ukroadaccident['District Area'].astype('category')
ukroadaccident['Road_Surface_Conditions'] = ukroadaccident['Road_Surface_Conditions'].astype('category') 
ukroadaccident['Road_Type'] = ukroadaccident['Road_Type'].astype('category') 
ukroadaccident['Urban_or_Rural_Area'] = ukroadaccident['Urban_or_Rural_Area'].astype('category') 
ukroadaccident['Weather_Conditions'] = ukroadaccident['Weather_Conditions'].astype('category') 
ukroadaccident['Vehicle_Type'] = ukroadaccident['Vehicle_Type'].astype('category')
ukroadaccident['Month'] = ukroadaccident['Month'].astype('category')
ukroadaccident['Year'] = ukroadaccident['Year'].astype('category')

<h1>---Checking again---</h1>

In [20]:
ukroadaccident.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 660679 entries, 0 to 660678
Data columns (total 19 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   Index                    660679 non-null  category      
 1   Accident_Severity        660679 non-null  category      
 2   Accident Date            660679 non-null  datetime64[ns]
 3   Latitude                 660679 non-null  float64       
 4   Light_Conditions         660679 non-null  category      
 5   District Area            660679 non-null  category      
 6   Longitude                660679 non-null  float64       
 7   Number_of_Casualties     660679 non-null  int64         
 8   Number_of_Vehicles       660679 non-null  int64         
 9   Road_Surface_Conditions  660679 non-null  category      
 10  Road_Type                660679 non-null  category      
 11  Urban_or_Rural_Area      660679 non-null  category      
 12  Weather_Conditio

<hr>
<h1>---20 Questions and Insights---</h1>
<hr>

<h1>~EDA~</h1>

<h1>1. What is the most usual vehicle type?</h1>

In [21]:
ukroadaccident['Vehicle_Type'].mode()

0    Car
Name: Vehicle_Type, dtype: category
Categories (16, object): ['Agricultural vehicle', 'Bus or coach (17 or more pass seats)', 'Car', 'Data missing or out of range', ..., 'Pedal cycle', 'Ridden horse', 'Taxi/Private hire car', 'Van / Goods 3.5 tonnes mgw or under']

<h2><strong>Insight: </strong> <i>According to the result, the most frequent/usual type of vehicle involve in accidents is car.</i></h2>

<hr>
<h1>2. What is the usual light condition during the accidents?</h1>

In [22]:
ukroadaccident['Light_Conditions'].mode()

0    Daylight
Name: Light_Conditions, dtype: category
Categories (5, object): ['Darkness - lighting unknown', 'Darkness - lights lit', 'Darkness - lights unlit', 'Darkness - no lighting', 'Daylight']

<h2> <strong>Insight: </strong> <i>The result shows that accidents are usually happening during daylight.</i> </h2>

<hr>
<h1>3. In terms of road surface conditions, what is the the most frequent?</h1>

In [55]:
ukroadaccident['Road_Surface_Conditions'].mode()

0    Dry
Name: Road_Surface_Conditions, dtype: category
Categories (6, object): ['Dry', 'Flood over 3cm. deep', 'Frost or ice', 'Snow', 'Wet or damp', 'unaccounted']

<h2> <strong>Insight: </strong> <i>Based on the result, dry road surface condition are prone to accidents.</i> </h2>

<hr>
<h1>4. How many number of vehicles are mostly being involved during accidents?</h1>

In [24]:
ukroadaccident['Number_of_Vehicles'].mode()

0    2
Name: Number_of_Vehicles, dtype: int64

<h2> <strong>Insight: </strong> <i>The result shows that two vehicles are mostly involved during road accidents.</i> </h2>

<hr>
<h1>5. What type of road does the accidents usually happen? </h1>

In [25]:
ukroadaccident['Road_Type'].mode()

0    Single carriageway
Name: Road_Type, dtype: category
Categories (6, object): ['Dual carriageway', 'One way street', 'Roundabout', 'Single carriageway', 'Slip road', 'unaccounted']

<h2> <strong>Insight: </strong> <i>According to the result, accidents usually happen on a single carriageway road.</i> </h2>

<h1>6. How many record of accidents per year?</h1>

In [26]:
ukroadaccident['Year'].value_counts()

Year
2019    182115
2020    170591
2021    163554
2022    144419
Name: count, dtype: int64

<h2> <strong>Insight: </strong> <i>The result shows that the year 2019, had the highest record of accidents with 182,115. The second most hightest is the year 2020 with 170,591 records. Next is 2021 with 163,554 records. While the year that got the least records is 2022 with 144,419. </i> </h2>

<h1>7. In terms of weather conditions, when does the least and most accidents record?</h1>

In [27]:
ukroadaccident['Weather_Conditions'].value_counts()

Weather_Conditions
Fine no high winds       520885
Raining no high winds     79696
Other                     17150
unaccounted               14128
Raining + high winds       9615
Fine + high winds          8554
Snowing no high winds      6238
Fog or mist                3528
Snowing + high winds        885
Name: count, dtype: int64

<h2> <strong>Insight: </strong> <i>Based on the result, accidents usually happen during 'fine no high winds' weather condition, then the least accident record happens during 'snowing + high winds'.</i> </h2>

<h1>~Aggregation~</h1>

<h1>8. What is the total number of accidents per road type?</h1>

In [28]:
ukroadaccident.groupby('Road_Type').size()

Road_Type
Dual carriageway       99424
One way street         13559
Roundabout             43992
Single carriageway    492143
Slip road               7041
unaccounted             4520
dtype: int64

<h2> <strong>Insight: </strong> <i>We can see from the result that the road type that had the highest number of accidents is the Single Carriageway with 496,663 records. Followed by Dual Carriageway with 99,424 records. The third is Roundabout that had 43,992 accident record, then the One way street had 13,559 records, and the road type that had the least record is the Slip road with 7,041 records.</i> </h2>

<h1>9. What is the average of number of casualties involved in accidents by accident severity?</h1>

In [29]:
ukroadaccident.groupby('Accident_Severity')['Number_of_Casualties'].mean()

Accident_Severity
Fatal      1.903129
Serious    1.467280
Slight     1.331402
Name: Number_of_Casualties, dtype: float64

<h2> <strong>Insight: </strong> <i>The average of accident severity in terms of fatal is 1.903129 (highest), serious is 1.467280, and slight is 1.331402 (lowest).</i> </h2>

<h1>10. How do accident severities vary across different light conditions?</h1>

In [62]:
ukroadaccident.groupby(['Accident_Severity', 'Light_Conditions']).size().unstack()

Light_Conditions,Darkness - lighting unknown,Darkness - lights lit,Darkness - lights unlit,Darkness - no lighting,Daylight
Accident_Severity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Fatal,68,1860,45,1612,5076
Serious,794,19130,360,7174,60759
Slight,5622,108345,2138,28651,419045


<h2> <strong>Insight: </strong> <i>We can conclude that all the three types of accident severity happens the most during the daylight, while the three of them also happens the least during darkness - lights unlit. </i> </h2>

<h1>11. What is the distribution of accidents by road type and road surface condition?</h1>

In [61]:
ukroadaccident.groupby(['Road_Type', 'Road_Surface_Conditions']).size().unstack()

Road_Surface_Conditions,Dry,Flood over 3cm. deep,Frost or ice,Snow,Wet or damp,unaccounted
Road_Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dual carriageway,66205,302,2408,909,29533,67
One way street,10068,5,183,76,3195,32
Roundabout,30698,22,745,232,12209,86
Single carriageway,332698,672,14918,4585,138743,527
Slip road,4714,9,178,60,2074,6
unaccounted,3438,7,85,28,954,8


<h2> <strong>Insight: </strong> <i>The result shows that all of the road types had the same highest number of accidents in terms of road surface conditions which is dry, they also had the same least number of accidents which is flood over 3cm. deep (road surface condition). </i> </h2>

<h1>12. What type of area does accidents happen the most?</h1>

In [32]:
ukroadaccident.groupby('Urban_or_Rural_Area').size()

Urban_or_Rural_Area
Rural          238990
Unallocated        11
Urban          421678
dtype: int64

<h2> <strong>Insight: </strong> <i>According to the result, accidents usually happen at urban areas.</i> </h2>

<h1>13. What month does accidents usually happens?</h1>

In [33]:
ukroadaccident.groupby('Month').size()

Month
April        51744
August       53913
December     51836
February     49491
January      52872
July         57445
June         56481
March        54086
May          56352
November     60424
October      59580
September    56455
dtype: int64

<h2> <strong>Insight: </strong> <i>The result shows that accidents usually happen during the month of November.</i> </h2>

<h1>14. What is the distribution of accidents by accident severity and vehicle type?</h1>

In [60]:
ukroadaccident.groupby(['Accident_Severity','Vehicle_Type']).size().unstack()

Vehicle_Type,Agricultural vehicle,Bus or coach (17 or more pass seats),Car,Data missing or out of range,Goods 7.5 tonnes mgw and over,Goods over 3.5t. and under 7.5t,Minibus (8 - 16 passenger seats),Motorcycle 125cc and under,Motorcycle 50cc and under,Motorcycle over 125cc and up to 500cc,Motorcycle over 500cc,Other vehicle,Pedal cycle,Ridden horse,Taxi/Private hire car,Van / Goods 3.5 tonnes mgw or under
Accident_Severity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Fatal,21,325,6577,0,216,67,29,189,95,105,339,70,6,0,155,467
Serious,282,3373,66461,0,2321,857,276,2031,1014,1014,3457,767,39,0,1771,4554
Slight,1644,22180,424954,6,14770,5172,1671,13049,6494,6537,21861,4800,152,4,11368,29139


<h2> <strong>Insight: </strong> <i>According to the result, car is the vehicle type that got the highest accident of all the three types of accident severity, while ridden horse has the least.</i> </h2>

<h1>~Correlation~</h1>

<h1>15. Is there a correlation between the number of casualties and number of vehicles?</h1>

In [36]:
ukroadaccident['Number_of_Casualties'].corr(ukroadaccident['Number_of_Vehicles'])

np.float64(0.2288888612692756)

<h2> <strong>Insight: </strong> <i>The result shows that number of casualties and number of vehicles has no correlation.</i> </h2>

<h1>16. Is there a correlation betweem latitude number of casualties?</h1>

In [37]:
ukroadaccident['Latitude'].corr(ukroadaccident['Number_of_Casualties'])

np.float64(0.032200686625906395)

<h2> <strong>Insight: </strong> <i>Based on the result, there is no correlation between laltitude and number of casualties.</i> </h2>

<h1>17. Is there a correlation between longitude and number of casualties?</h1>

In [38]:
ukroadaccident['Longitude'].corr(ukroadaccident['Number_of_Casualties'])

np.float64(-0.0404056457884545)

<h2> <strong>Insight: </strong> <i>The result shows that there is no correlation between longitude and number of casualties.</i> </h2>

<h1>18. Is there a significant difference between accident severity and number of vehicles?</h1>

In [39]:
ukroadaccident['Accident_Severity'].unique()

['Serious', 'Slight', 'Fatal']
Categories (3, object): ['Fatal', 'Serious', 'Slight']

In [40]:
vehiserious = ukroadaccident[ukroadaccident['Accident_Severity'] == 'Serious']['Number_of_Vehicles']
vehislight = ukroadaccident[ukroadaccident['Accident_Severity'] == 'Slight']['Number_of_Vehicles']
vehifatal = ukroadaccident[ukroadaccident['Accident_Severity'] == 'Fatal']['Number_of_Vehicles']

In [41]:
result, pvalue = f_oneway(vehiserious, vehislight, vehifatal)
pvalue

np.float64(0.0)

<h2> <strong>Insight: </strong> <i>The result shows that there is extremely significance between the accident severity and number of vehicles, it shows that they are very related to each other.</i> </h2>

<h1>19. Is there a significant difference between the area and number of casualties?</h1>

In [42]:
ukroadaccident['Urban_or_Rural_Area'].unique()

['Urban', 'Rural', 'Unallocated']
Categories (3, object): ['Rural', 'Unallocated', 'Urban']

In [43]:
urbancasualty = ukroadaccident[ukroadaccident['Urban_or_Rural_Area'] == 'Urban']['Number_of_Casualties']
ruralcasualty = ukroadaccident[ukroadaccident['Urban_or_Rural_Area'] == 'Rural']['Number_of_Casualties']
unallocatedcasualty = ukroadaccident[ukroadaccident['Urban_or_Rural_Area'] == 'Unallocated']['Number_of_Casualties']

In [44]:
result, pvalue = f_oneway(urbancasualty, ruralcasualty, unallocatedcasualty)
pvalue

np.float64(0.0)

<h2> <strong>Insight: </strong> <i>According to the result, there is a significant difference between the areas and the number of casualties.</i> </h2>

<h1>20. Is there a significant difference between light conditions and latitude?</h1>

In [45]:
ukroadaccident['Light_Conditions'].unique()

['Darkness - lights lit', 'Daylight', 'Darkness - lighting unknown', 'Darkness - lights unlit', 'Darkness - no lighting']
Categories (5, object): ['Darkness - lighting unknown', 'Darkness - lights lit', 'Darkness - lights unlit', 'Darkness - no lighting', 'Daylight']

In [46]:
lighta = ukroadaccident[ukroadaccident['Light_Conditions'] == 'Darkness - lights lit']['Latitude']
lightb = ukroadaccident[ukroadaccident['Light_Conditions'] == 'Daylight']['Latitude']
lightc = ukroadaccident[ukroadaccident['Light_Conditions'] == 'Darkness - lighting unknown']['Latitude']
lightd = ukroadaccident[ukroadaccident['Light_Conditions'] == 'Darkness - lights unlit']['Latitude']
lighte = ukroadaccident[ukroadaccident['Light_Conditions'] == 'Darkness - no lighting']['Latitude']

In [47]:
result, pvalue = f_oneway(lighta, lightb, lightc, lightd, lighte)
pvalue

np.float64(1.224998791423201e-27)

<h2> <strong>Insight: </strong> <i>The result shows that the difference between light conditions and latitude is very significant, mean the light conditions has an extreme effect on the latitude.</i> </h2>

<hr>
<h1>---Additional Five Questions about Accident Date---</h1>

<h2>-UniVariate-</h2>

<h1>21. Which day of the week had the most accidents record?</h1>

In [48]:
ukroadaccident['DayofWeek'].value_counts()

DayofWeek
5    107178
2     99558
3     99511
4     97900
1     94550
6     89302
0     72680
Name: count, dtype: int64

<h2> <strong>Insight: </strong> <i>Based on the result, accidents mostly occurs on Saturday, and accidents happen the least during Mondays.</i> </h2>

<h2>-BiVariate-</h2>

<h1>22. What is the number of accidents per month in each year?</h1>

In [49]:
accidentpermonth = ukroadaccident.groupby(['Month', 'Year']).size()
accidentpermonth.unstack()

Year,2019,2020,2021,2022
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
April,14125,13394,12715,11510
August,15044,13366,13415,12088
December,14708,13794,13709,9625
February,13253,14353,10950,10935
January,15355,14133,13417,9967
July,15862,14630,14300,12653
June,15528,14205,13936,12812
March,15049,13494,13202,12341
May,15833,14336,13811,12372
November,16559,14770,15473,13622


<h2> <strong>Insight 1: </strong> <i>Based on their percentage equivalence, <strong>November is consistenly the peak month of accidents</strong> across all four years with two consequtive above 9% rate (2021, and 2022). Hoevwer, October also has high percentage with three ratings of 9% and above but November slightly beats it in most years.</i></h2>
<h2> <strong>Insight 2: </strong> <i><strong>February has the lowest percentage</strong> of accidents especially 2021 at just <strong>6.7%.</strong> It suggests that road accidents tend to be more common in October, while February is generally a safer month.</i></h2>
<h2> <strong>Insight 3: </strong> <i>Most of the months had high road accident rate on the year 2019, and we could see from the numbers that on the years 2020-2022, the number had decreased and we can conclude that this is  perhaps because of the pandemic.</i> </h2>

<h1>23. What is the state of accident severity across years?</h1>

In [50]:
ukroadaccident.groupby(['Accident_Severity', 'Year']).size().unstack()

Year,2019,2020,2021,2022
Accident_Severity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Fatal,2714,2341,2057,1549
Serious,24322,23121,21997,18777
Slight,155079,145129,139500,124093


<h2> <strong>Insight 1: </strong> <i>From the number itself, it shows that deadly crashes are rare compared to other types of accident severity. <strong>Fatal accidents are only around 1% of all cases.</strong> They even decreased slightly over the years, from 1.49% in 2019 down to 1.07% in 2022.</i></h2>
<h2> <strong>Insight 2: </strong> <i>On the other hand, <strong>slight accidents had the highest consistent record at around 85% every year</strong>.</i> </h2>
<h2> <strong>Insight 3: </strong> <i><strong>Seious accidents</strong> are in the middle of the percentage, <strong>staying between 13–14%</strong>.</i></h2>
<h2> <strong>Insight 4: </strong> <i>Accidents across the years were slight and less harmful, a small portion are serious, and very few cases are fatal.</i></h2>

<h2>-MultiVariate-</h2>

<h1>24. What is the average casualties per month, year, and road surface conditions?</h1>

In [54]:
ukroadaccident.groupby(['Month', 'Year', 'Road_Surface_Conditions'])['Number_of_Casualties'].mean().unstack()

Unnamed: 0_level_0,Road_Surface_Conditions,Dry,Flood over 3cm. deep,Frost or ice,Snow,Wet or damp,unaccounted
Month,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
April,2019,1.360358,1.0,1.2,,1.385888,1.0
April,2020,1.339197,1.538462,1.560606,1.442308,1.44278,1.0
April,2021,1.353704,1.0,1.0,2.0,1.420137,1.181818
April,2022,1.357578,1.0,1.5,1.25,1.485714,1.181818
August,2019,1.383017,1.636364,1.0,1.0,1.458948,1.304348
August,2020,1.375027,1.578947,1.0,2.0,1.450013,1.304348
August,2021,1.385929,2.090909,,1.0,1.463757,1.0
August,2022,1.379569,1.75,1.5,2.0,1.415216,1.4
December,2019,1.286649,1.531915,1.35085,1.181818,1.422541,1.0
December,2020,1.335708,1.708333,1.345857,1.252427,1.401117,1.142857


<h2> <strong>Insight 1: </strong> <i>The <strong>average casualties are slightly higher on wet/damp roads (around 1.40–1.47)</strong> compared to dry roads (around 1.30–1.36) across almost months and years.</i> </h2>
<h2> <strong>Insight 2: </strong> <i>The <strong>"Flood over 3cm deep" condition</strong> often shows averages <strong>above 1.5 and sometimes above 2.0</strong> casualties per accident.</i> </h2>
<h2> <strong></strong> <i>Averages of <strong>frost/ice and snow condition varies</strong>frost/ice and snow condition varies a lot, both of them has usually around <strong>1.0</strong> average but there are times that it spikes to above <strong>2.0</strong> (October 2021 has the highest value with 2.2).</i> </h2>
<h2> <strong>Insight 4: </strong> <i>Most accidents happen on dry roads but accidents on wet and flooded roads are a bit more dangerous. Snow and ice road surface condition tend to happen less frequent but it is the most dangerous and causes more casualties when they do happen.</i> </h2>

<h1>25. What is the distribution of accidents by year, month, and rural/urban area?</h1>

In [64]:
ukroadaccident.groupby(['Urban_or_Rural_Area','Year', 'Month']).size().unstack()

Unnamed: 0_level_0,Month,April,August,December,February,January,July,June,March,May,November,October,September
Urban_or_Rural_Area,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Rural,2019,5169,6001,5499,4874,5807,6047,5835,5350,5802,5974,5518,5682
Rural,2020,4752,5204,5167,5226,5182,5415,5105,4947,5184,5251,5415,5143
Rural,2021,4555,5256,5106,4144,4925,5277,4943,4391,4873,5456,5260,4830
Rural,2022,3956,4537,3589,3896,3626,4476,4317,4118,4266,4671,4593,4380
Unallocated,2019,0,0,2,0,1,1,1,0,0,0,0,0
Unallocated,2020,1,0,1,0,0,0,1,1,2,0,0,0
Unallocated,2021,0,0,0,0,0,0,0,0,0,0,0,0
Unallocated,2022,0,0,0,0,0,0,0,0,0,0,0,0
Urban,2019,8956,9043,9207,8379,9547,9814,9692,9699,10031,10585,10010,9589
Urban,2020,8641,8162,8626,9127,8951,9215,9099,8546,9150,9519,10269,9289


<h2> <strong>Insight 1: </strong> <i>In all months and years, accidents mostly happens in <strong>urban areas at 76%-80%.</strong> While accidents in <strong>rural areas are lower with 19%-23%.</strong></i></h2>
<h2> <strong>Insight 2: </strong> <i>The highest percentage in <strong>urban areas</strong> was in <strong>November 2021 at 80.16%</strong>, followed by both September 2021 and October 2020 at 80.15%. While the <strong>lowest was April 2019 (76.56%).</strong></i></h2>
<h2> <strong>Insight 3: </strong> <i>In <strong>rural areas</strong>, the lowest percentage was <strong>19.84%</strong> on November 2021, while the highest was on April 2019 at <strong>23.44%.</strong></i></h2>
<h2> <strong>Insight 4: </strong> <i>Most accidents happen in urban areas (cities) regardless of the month and year. Rural accidents are fewer but the still have a peak month (April).</i></h2>