<h1>---Importing Necessary Files---</h1>

In [1]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from scipy.stats import f_oneway

<hr>
<h1>---Making a DataFrame for the Dataset---</h1>

In [2]:
ukroadaccident = pd.read_csv('datasets\\uk_road_accident.csv')

<hr>
<h1>---Checking if the DataFrame is Working---</h1>

In [3]:
ukroadaccident

Unnamed: 0,Index,Accident_Severity,Accident Date,Latitude,Light_Conditions,District Area,Longitude,Number_of_Casualties,Number_of_Vehicles,Road_Surface_Conditions,Road_Type,Urban_or_Rural_Area,Weather_Conditions,Vehicle_Type
0,200701BS64157,Serious,5/6/2019,51.506187,Darkness - lights lit,Kensington and Chelsea,-0.209082,1,2,Dry,Single carriageway,Urban,Fine no high winds,Car
1,200701BS65737,Serious,2/7/2019,51.495029,Daylight,Kensington and Chelsea,-0.173647,1,2,Wet or damp,Single carriageway,Urban,Raining no high winds,Car
2,200701BS66127,Serious,26-08-2019,51.517715,Darkness - lighting unknown,Kensington and Chelsea,-0.210215,1,3,Dry,,Urban,,Taxi/Private hire car
3,200701BS66128,Serious,16-08-2019,51.495478,Daylight,Kensington and Chelsea,-0.202731,1,4,Dry,Single carriageway,Urban,Fine no high winds,Bus or coach (17 or more pass seats)
4,200701BS66837,Slight,3/9/2019,51.488576,Darkness - lights lit,Kensington and Chelsea,-0.192487,1,2,Dry,,Urban,,Other vehicle
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
660674,201091NM01760,Slight,18-02-2022,57.374005,Daylight,Highland,-3.467828,2,1,Dry,Single carriageway,Rural,Fine no high winds,Car
660675,201091NM01881,Slight,21-02-2022,57.232273,Darkness - no lighting,Highland,-3.809281,1,1,Frost or ice,Single carriageway,Rural,Fine no high winds,Car
660676,201091NM01935,Slight,23-02-2022,57.585044,Daylight,Highland,-3.862727,1,3,Frost or ice,Single carriageway,Rural,Fine no high winds,Car
660677,201091NM01964,Serious,23-02-2022,57.214898,Darkness - no lighting,Highland,-3.823997,1,2,Wet or damp,Single carriageway,Rural,Fine no high winds,Motorcycle over 500cc


<hr>
<h1>---Checking for Null Values---</h1>

In [4]:
ukroadaccident.isnull().sum()

Index                          0
Accident_Severity              0
Accident Date                  0
Latitude                      25
Light_Conditions               0
District Area                  0
Longitude                     26
Number_of_Casualties           0
Number_of_Vehicles             0
Road_Surface_Conditions      726
Road_Type                   4520
Urban_or_Rural_Area           15
Weather_Conditions         14128
Vehicle_Type                   0
dtype: int64

<h1>---Fixing the Null Values---</h1>

<h3>-Numerical Null Values-</h3>

In [5]:
ukroadaccident['Latitude'].mean()

52.553865761110956

In [6]:
ukroadaccident['Latitude'] = ukroadaccident['Latitude'].fillna(ukroadaccident['Latitude'].mean())

<p>---------------------------------------------</p>

In [7]:
ukroadaccident['Longitude'].mean()

-1.4312103685020727

In [8]:
ukroadaccident['Longitude'] = ukroadaccident['Longitude'].fillna(ukroadaccident['Longitude'].mean())

<h3>-Categorical Null Values-</h3>

In [20]:
ukroadaccident['Road_Surface_Conditions'] = ukroadaccident['Road_Surface_Conditions'].fillna('unaccounted')

<p>---------------------------------------------</p>

In [21]:
ukroadaccident['Road_Type'] = ukroadaccident['Road_Type'].fillna('unaccounted')

<p>---------------------------------------------</p>

In [13]:
ukroadaccident['Urban_or_Rural_Area'].mode()

0    Urban
Name: Urban_or_Rural_Area, dtype: object

In [14]:
ukroadaccident['Urban_or_Rural_Area'] = ukroadaccident['Urban_or_Rural_Area'].fillna(ukroadaccident['Urban_or_Rural_Area'].mode()[0])

<p>---------------------------------------------</p>

In [22]:
ukroadaccident['Weather_Conditions'] = ukroadaccident['Weather_Conditions'].fillna('unaccounted')

<h1>---Checking if there are still Null Values---</h1>

In [23]:
ukroadaccident.isnull().sum()

Index                      0
Accident_Severity          0
Accident Date              0
Latitude                   0
Light_Conditions           0
District Area              0
Longitude                  0
Number_of_Casualties       0
Number_of_Vehicles         0
Road_Surface_Conditions    0
Road_Type                  0
Urban_or_Rural_Area        0
Weather_Conditions         0
Vehicle_Type               0
dtype: int64

<hr>
<h1>---Checking the Data Type---</h1>

In [18]:
ukroadaccident.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 660679 entries, 0 to 660678
Data columns (total 14 columns):
 #   Column                   Non-Null Count   Dtype  
---  ------                   --------------   -----  
 0   Index                    660679 non-null  object 
 1   Accident_Severity        660679 non-null  object 
 2   Accident Date            660679 non-null  object 
 3   Latitude                 660679 non-null  float64
 4   Light_Conditions         660679 non-null  object 
 5   District Area            660679 non-null  object 
 6   Longitude                660679 non-null  float64
 7   Number_of_Casualties     660679 non-null  int64  
 8   Number_of_Vehicles       660679 non-null  int64  
 9   Road_Surface_Conditions  660679 non-null  object 
 10  Road_Type                660679 non-null  object 
 11  Urban_or_Rural_Area      660679 non-null  object 
 12  Weather_Conditions       660679 non-null  object 
 13  Vehicle_Type             660679 non-null  object 
dtypes: f

<hr>
<h1>---Creating a column---</h1>
<h3>The accident date is all together so I want to break them by month and by year.</h3>

In [34]:
ukroadaccident['Accident Date'] = pd.to_datetime(ukroadaccident['Accident Date'], dayfirst = True, errors='coerce')

<h1>---Extracting date infomation using pandas datetime---</h1>

In [37]:
# ukroadaccident['Year'] = ukroadaccident['Accident Date'].dt.year()
# ukroadaccident['Month_Number'] = ukroadaccident['Accident Date'].dt.month
# ukroadaccident['Month Name'] = ukroadaccident['Accident Date'].dt.month_name()
# ukroadaccident['Day'] = ukroadaccident['Accident Date'].dt.day
# ukroadaccident['DayofWeek'] = ukroadaccident['Accident Date'].dt.dayofweek #Monday=0, Sunday=6

<h1>---Changing the Data Type---</h1>

In [35]:
ukroadaccident['Index'] = ukroadaccident['Index'].astype('category')
ukroadaccident['Accident_Severity'] = ukroadaccident['Accident_Severity'].astype('category')
ukroadaccident['Accident Date'] = ukroadaccident['Accident Date'].astype('datetime64[ns]')
ukroadaccident['Light_Conditions'] = ukroadaccident['Light_Conditions'].astype('category')
ukroadaccident['District Area'] = ukroadaccident['District Area'].astype('category')
ukroadaccident['Road_Surface_Conditions'] = ukroadaccident['Road_Surface_Conditions'].astype('category')
ukroadaccident['Road_Type'] = ukroadaccident['Road_Type'].astype('category')
ukroadaccident['Urban_or_Rural_Area'] = ukroadaccident['Urban_or_Rural_Area'].astype('category')
ukroadaccident['Weather_Conditions'] = ukroadaccident['Weather_Conditions'].astype('category')
ukroadaccident['Vehicle_Type'] = ukroadaccident['Vehicle_Type'].astype('category')
ukroadaccident['Month'] = ukroadaccident['Month'].astype('category')
ukroadaccident['Year'] = ukroadaccident['Year'].astype('category')

<h1>---Checking the Updated Data Type---</h1>

In [31]:
ukroadaccident.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 660679 entries, 0 to 660678
Data columns (total 15 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   Index                    660679 non-null  category      
 1   Accident_Severity        660679 non-null  category      
 2   Accident Date            265007 non-null  datetime64[ns]
 3   Latitude                 660679 non-null  float64       
 4   Light_Conditions         660679 non-null  category      
 5   District Area            660679 non-null  category      
 6   Longitude                660679 non-null  float64       
 7   Number_of_Casualties     660679 non-null  int64         
 8   Number_of_Vehicles       660679 non-null  int64         
 9   Road_Surface_Conditions  660679 non-null  category      
 10  Road_Type                660679 non-null  category      
 11  Urban_or_Rural_Area      660679 non-null  category      
 12  Weather_Conditio

<hr>
<h1>---20 Questions and Insights---</h1>
<hr>

<h1>~EDA~</h1>

<h1>1. What is the most usual vehicle type?</h1>

In [None]:
ukroadaccident['Vehicle_Type'].mode()

<h2><strong>Insight: </strong> <i>According to the result, the most frequent/usual type of vehicle involve in accidents is car.</i></h2>

<hr>
<h1>2. What is the usual light condition during the accidents?</h1>

In [None]:
ukroadaccident['Light_Conditions'].mode()

<h2> <strong>Insight: </strong> <i>The result shows that accidents are usually happening during daylight.</i> </h2>

<hr>
<h1>3. In terms of road surface conditions, what is the the most frequent?</h1>

In [None]:
ukroadaccident['Road_Surface_Conditions'].mode()

<h2> <strong>Insight: </strong> <i>Based on the result, dry road surface condition are prone to accidents.</i> </h2>

<hr>
<h1>4. How many number of vehicles are mostly being involved during accidents?</h1>

In [None]:
ukroadaccident['Number_of_Vehicles'].mode()

<h2> <strong>Insight: </strong> <i>The result shows that two vehicles are mostly involved during road accidents.</i> </h2>

<hr>
<h1>5. What type of road does the accidents usually happen? </h1>

In [None]:
ukroadaccident['Road_Type'].mode()

<h2> <strong>Insight: </strong> <i>According to the result, accidents usually happen on a single carriageway road.</i> </h2>

<h1>6. How many record of accidents per year?</h1>

In [None]:
ukroadaccident['Year'].value_counts()

<h2> <strong>Insight: </strong> <i>The result shows that the year 2019, had the highest record of accidents with 182,115. The second most hightest is the year 2020 with 170,591 records. Next is 2021 with 163,554 records. While the year that got the least records is 2022 with 144,419. </i> </h2>

<h1>7. In terms of weather conditions, when does the least and most accidents record?</h1>

In [None]:
ukroadaccident['Weather_Conditions'].value_counts()

<h2> <strong>Insight: </strong> <i>Based on the result, accidents usually happen during 'fine no high winds' weather condition, then the least accident record happens during 'snowing + high winds'.</i> </h2>

<h1>~Aggregation~</h1>

<h1>8. What is the total number of accidents per road type?</h1>

In [None]:
ukroadaccident.groupby('Road_Type').size()

<h2> <strong>Insight: </strong> <i>We can see from the result that the road type that had the highest number of accidents is the Single Carriageway with 496,663 records. Followed by Dual Carriageway with 99,424 records. The third is Roundabout that had 43,992 accident record, then the One way street had 13,559 records, and the road type that had the least record is the Slip road with 7,041 records.</i> </h2>

<h1>9. What is the average of number of casualties involved in accidents by accident severity?</h1>

In [None]:
ukroadaccident.groupby('Accident_Severity')['Number_of_Casualties'].mean()

<h2> <strong>Insight: </strong> <i>The average of accident severity in terms of fatal is 1.903129 (highest), serious is 1.467280, and slight is 1.331402 (lowest).</i> </h2>

<h1>10. How do accident severities vary across different light conditions?</h1>

In [None]:
ukroadaccident.groupby(['Accident_Severity', 'Light_Conditions']).size()

<h2> <strong>Insight: </strong> <i>We can conclude that all the three types of accident severity happens the most during the daylight, while the three of them also happens the least during darkness - lights unlit. </i> </h2>

<h1>11. What is the distribution of accidents by road type and road surface condition?</h1>

In [None]:
ukroadaccident.groupby(['Road_Type', 'Road_Surface_Conditions']).size()

<h2> <strong>Insight: </strong> <i>The result shows that all of the road types had the same highest number of accidents in terms of road surface conditions which is dry, they also had the same least number of accidents which is flood over 3cm. deep (road surface condition). </i> </h2>

<h1>12. What type of area does accidents happen the most?</h1>

In [None]:
ukroadaccident.groupby('Urban_or_Rural_Area').size()

<h2> <strong>Insight: </strong> <i>According to the result, accidents usually happen at urban areas.</i> </h2>

<h1>13. What month does accidents usually happens?</h1>

In [None]:
ukroadaccident.groupby('Month').size()

<h2> <strong>Insight: </strong> <i>The result shows that accidents usually happen during the month of November.</i> </h2>

<h1>14. What is the distribution of accidents by accident severity and vehicle type? Look for the most type of vehicle.</h1>

In [None]:
ukroadaccident.groupby(['Accident_Severity','Vehicle_Type']).size()

<h2> <strong>Insight: </strong> <i>According to the result, car is the vehicle type that got the highest accident of all the three types of accident severity.</i> </h2>

<h1>~Correlation~</h1>

In [None]:
ukroadaccident.info()

<h1>15. Is there a correlation between the number of casualties and number of vehicles?</h1>

In [None]:
ukroadaccident['Number_of_Casualties'].corr(ukroadaccident['Number_of_Vehicles'])

<h2> <strong>Insight: </strong> <i>The result shows that number of casualties and number of vehicles has no correlation.</i> </h2>

<h1>16. Is there a correlation betweem latitude number of casualties?</h1>

In [None]:
ukroadaccident['Latitude'].corr(ukroadaccident['Number_of_Casualties'])

<h2> <strong>Insight: </strong> <i>Based on the result, there is no correlation between laltitude and number of casualties.</i> </h2>

<h1>17. Is there a correlation between longitude and number of casualties?</h1>

In [None]:
ukroadaccident['Longitude'].corr(ukroadaccident['Number_of_Casualties'])

<h2> <strong>Insight: </strong> <i>The result shows that there is no correlation between longitude and number of casualties.</i> </h2>

<h1>18. Is there a significant difference between accident severity and number of vehicles?</h1>

In [None]:
ukroadaccident['Accident_Severity'].unique()

In [None]:
vehiserious = ukroadaccident[ukroadaccident['Accident_Severity'] == 'Serious']['Number_of_Vehicles']
vehislight = ukroadaccident[ukroadaccident['Accident_Severity'] == 'Slight']['Number_of_Vehicles']
vehifatal = ukroadaccident[ukroadaccident['Accident_Severity'] == 'Fatal']['Number_of_Vehicles']

In [None]:
result, pvalue = f_oneway(vehiserious, vehislight, vehifatal)
pvalue

<h2> <strong>Insight: </strong> <i>The result shows that there is extremely significance between the accident severity and number of vehicles, it shows that they are very related to each other.</i> </h2>

<h1>19. Is there a significant difference between the area and number of casualties?</h1>

In [None]:
ukroadaccident['Urban_or_Rural_Area'].unique()

In [None]:
urbancasualty = ukroadaccident[ukroadaccident['Urban_or_Rural_Area'] == 'Urban']['Number_of_Casualties']
ruralcasualty = ukroadaccident[ukroadaccident['Urban_or_Rural_Area'] == 'Rural']['Number_of_Casualties']
unallocatedcasualty = ukroadaccident[ukroadaccident['Urban_or_Rural_Area'] == 'Unallocated']['Number_of_Casualties']

In [None]:
result, pvalue = f_oneway(urbancasualty, ruralcasualty, unallocatedcasualty)
pvalue

<h2> <strong>Insight: </strong> <i>According to the result, there is a significant difference between the areas and the number of casualties.</i> </h2>

<h1>20. Is there a significant difference between light conditions and latitude?</h1>

In [None]:
ukroadaccident['Light_Conditions'].unique()

In [None]:
lighta = ukroadaccident[ukroadaccident['Light_Conditions'] == 'Darkness - lights lit']['Latitude']
lightb = ukroadaccident[ukroadaccident['Light_Conditions'] == 'Daylight']['Latitude']
lightc = ukroadaccident[ukroadaccident['Light_Conditions'] == 'Darkness - lighting unknown']['Latitude']
lightd = ukroadaccident[ukroadaccident['Light_Conditions'] == 'Darkness - lights unlit']['Latitude']
lighte = ukroadaccident[ukroadaccident['Light_Conditions'] == 'Darkness - no lighting']['Latitude']

In [None]:
result, pvalue = f_oneway(lighta, lightb, lightc, lightd, lighte)
pvalue

<h2> <strong>Insight: </strong> <i>The result shows that the difference between light conditions and latitude is very significant, mean the light conditions has an extreme effect on the latitude.</i> </h2>