<h1>---Importing Necessary Files---</h1>

In [1]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from scipy.stats import f_oneway

<hr>
<h1>---Making a DataFrame for the Dataset---</h1>

In [2]:
ukroadaccident = pd.read_csv('datasets\\uk_road_accident.csv')

<hr>
<h1>---Checking if the DataFrame is Working---</h1>

In [3]:
ukroadaccident

Unnamed: 0,Index,Accident_Severity,Accident Date,Latitude,Light_Conditions,District Area,Longitude,Number_of_Casualties,Number_of_Vehicles,Road_Surface_Conditions,Road_Type,Urban_or_Rural_Area,Weather_Conditions,Vehicle_Type
0,200701BS64157,Serious,5/6/2019,51.506187,Darkness - lights lit,Kensington and Chelsea,-0.209082,1,2,Dry,Single carriageway,Urban,Fine no high winds,Car
1,200701BS65737,Serious,2/7/2019,51.495029,Daylight,Kensington and Chelsea,-0.173647,1,2,Wet or damp,Single carriageway,Urban,Raining no high winds,Car
2,200701BS66127,Serious,26-08-2019,51.517715,Darkness - lighting unknown,Kensington and Chelsea,-0.210215,1,3,Dry,,Urban,,Taxi/Private hire car
3,200701BS66128,Serious,16-08-2019,51.495478,Daylight,Kensington and Chelsea,-0.202731,1,4,Dry,Single carriageway,Urban,Fine no high winds,Bus or coach (17 or more pass seats)
4,200701BS66837,Slight,3/9/2019,51.488576,Darkness - lights lit,Kensington and Chelsea,-0.192487,1,2,Dry,,Urban,,Other vehicle
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
660674,201091NM01760,Slight,18-02-2022,57.374005,Daylight,Highland,-3.467828,2,1,Dry,Single carriageway,Rural,Fine no high winds,Car
660675,201091NM01881,Slight,21-02-2022,57.232273,Darkness - no lighting,Highland,-3.809281,1,1,Frost or ice,Single carriageway,Rural,Fine no high winds,Car
660676,201091NM01935,Slight,23-02-2022,57.585044,Daylight,Highland,-3.862727,1,3,Frost or ice,Single carriageway,Rural,Fine no high winds,Car
660677,201091NM01964,Serious,23-02-2022,57.214898,Darkness - no lighting,Highland,-3.823997,1,2,Wet or damp,Single carriageway,Rural,Fine no high winds,Motorcycle over 500cc


<hr>
<h1>---Checking for Null Values---</h1>

In [4]:
ukroadaccident.isnull().sum()

Index                          0
Accident_Severity              0
Accident Date                  0
Latitude                      25
Light_Conditions               0
District Area                  0
Longitude                     26
Number_of_Casualties           0
Number_of_Vehicles             0
Road_Surface_Conditions      726
Road_Type                   4520
Urban_or_Rural_Area           15
Weather_Conditions         14128
Vehicle_Type                   0
dtype: int64

<h1>---Fixing the Null Values---</h1>

<h3>-Numerical Null Values-</h3>

In [5]:
ukroadaccident['Latitude'].mean()

np.float64(52.553865761110956)

In [6]:
ukroadaccident['Latitude'] = ukroadaccident['Latitude'].fillna(ukroadaccident['Latitude'].mean())

<p>---------------------------------------------</p>

In [7]:
ukroadaccident['Longitude'].mean()

np.float64(-1.431210368502073)

In [8]:
ukroadaccident['Longitude'] = ukroadaccident['Longitude'].fillna(ukroadaccident['Longitude'].mean())

<h3>-Categorical Null Values-</h3>

In [9]:
ukroadaccident['Road_Surface_Conditions'].mode()

0    Dry
Name: Road_Surface_Conditions, dtype: object

In [10]:
ukroadaccident['Road_Surface_Conditions'] = ukroadaccident['Road_Surface_Conditions'].fillna(ukroadaccident['Road_Surface_Conditions'].mode()[0])

<p>---------------------------------------------</p>

In [11]:
ukroadaccident['Road_Type'].mode()

0    Single carriageway
Name: Road_Type, dtype: object

In [12]:
ukroadaccident['Road_Type'] = ukroadaccident['Road_Type'].fillna(ukroadaccident['Road_Type'].mode()[0])

<p>---------------------------------------------</p>

In [13]:
ukroadaccident['Urban_or_Rural_Area'].mode()

0    Urban
Name: Urban_or_Rural_Area, dtype: object

In [14]:
ukroadaccident['Urban_or_Rural_Area'] = ukroadaccident['Urban_or_Rural_Area'].fillna(ukroadaccident['Urban_or_Rural_Area'].mode()[0])

<p>---------------------------------------------</p>

In [15]:
ukroadaccident['Weather_Conditions'].mode()

0    Fine no high winds
Name: Weather_Conditions, dtype: object

In [16]:
ukroadaccident['Weather_Conditions'] = ukroadaccident['Weather_Conditions'].fillna(ukroadaccident['Weather_Conditions'].mode()[0])

<h1>---Checking if there are still Null Values---</h1>

In [17]:
ukroadaccident.isnull().sum()

Index                      0
Accident_Severity          0
Accident Date              0
Latitude                   0
Light_Conditions           0
District Area              0
Longitude                  0
Number_of_Casualties       0
Number_of_Vehicles         0
Road_Surface_Conditions    0
Road_Type                  0
Urban_or_Rural_Area        0
Weather_Conditions         0
Vehicle_Type               0
dtype: int64

<hr>
<h1>---Checking the Data Type---</h1>

In [18]:
ukroadaccident.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 660679 entries, 0 to 660678
Data columns (total 14 columns):
 #   Column                   Non-Null Count   Dtype  
---  ------                   --------------   -----  
 0   Index                    660679 non-null  object 
 1   Accident_Severity        660679 non-null  object 
 2   Accident Date            660679 non-null  object 
 3   Latitude                 660679 non-null  float64
 4   Light_Conditions         660679 non-null  object 
 5   District Area            660679 non-null  object 
 6   Longitude                660679 non-null  float64
 7   Number_of_Casualties     660679 non-null  int64  
 8   Number_of_Vehicles       660679 non-null  int64  
 9   Road_Surface_Conditions  660679 non-null  object 
 10  Road_Type                660679 non-null  object 
 11  Urban_or_Rural_Area      660679 non-null  object 
 12  Weather_Conditions       660679 non-null  object 
 13  Vehicle_Type             660679 non-null  object 
dtypes: f

<h1>---Changing the Data Type---</h1>

In [19]:
ukroadaccident['Index'] = ukroadaccident['Index'].astype('category')
ukroadaccident['Accident_Severity'] = ukroadaccident['Accident_Severity'].astype('category')
ukroadaccident['Accident Date'] = ukroadaccident['Accident Date'].astype('datetime64[ns]')
ukroadaccident['Light_Conditions'] = ukroadaccident['Light_Conditions'].astype('category')
ukroadaccident['District Area'] = ukroadaccident['District Area'].astype('category')
ukroadaccident['Road_Surface_Conditions'] = ukroadaccident['Road_Surface_Conditions'].astype('category')
ukroadaccident['Road_Type'] = ukroadaccident['Road_Type'].astype('category')
ukroadaccident['Urban_or_Rural_Area'] = ukroadaccident['Urban_or_Rural_Area'].astype('category')
ukroadaccident['Weather_Conditions'] = ukroadaccident['Weather_Conditions'].astype('category')
ukroadaccident['Vehicle_Type'] = ukroadaccident['Vehicle_Type'].astype('category')

<h1>---Checking the Updated Data Type---</h1>

In [20]:
ukroadaccident.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 660679 entries, 0 to 660678
Data columns (total 14 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   Index                    660679 non-null  category      
 1   Accident_Severity        660679 non-null  category      
 2   Accident Date            660679 non-null  datetime64[ns]
 3   Latitude                 660679 non-null  float64       
 4   Light_Conditions         660679 non-null  category      
 5   District Area            660679 non-null  category      
 6   Longitude                660679 non-null  float64       
 7   Number_of_Casualties     660679 non-null  int64         
 8   Number_of_Vehicles       660679 non-null  int64         
 9   Road_Surface_Conditions  660679 non-null  category      
 10  Road_Type                660679 non-null  category      
 11  Urban_or_Rural_Area      660679 non-null  category      
 12  Weather_Conditio

<hr>
<h1>---Creating a column---</h1>
<h3>The accident date is all together so I want to break them by month and by year.</h3>

In [21]:
ukroadaccident['Accident Date'] = pd.to_datetime(ukroadaccident['Accident Date'])
ukroadaccident['Month'] = ukroadaccident['Accident Date'].dt.month_name()
ukroadaccident

Unnamed: 0,Index,Accident_Severity,Accident Date,Latitude,Light_Conditions,District Area,Longitude,Number_of_Casualties,Number_of_Vehicles,Road_Surface_Conditions,Road_Type,Urban_or_Rural_Area,Weather_Conditions,Vehicle_Type,Month
0,200701BS64157,Serious,2019-05-06,51.506187,Darkness - lights lit,Kensington and Chelsea,-0.209082,1,2,Dry,Single carriageway,Urban,Fine no high winds,Car,May
1,200701BS65737,Serious,2019-02-07,51.495029,Daylight,Kensington and Chelsea,-0.173647,1,2,Wet or damp,Single carriageway,Urban,Raining no high winds,Car,February
2,200701BS66127,Serious,2019-08-26,51.517715,Darkness - lighting unknown,Kensington and Chelsea,-0.210215,1,3,Dry,Single carriageway,Urban,Fine no high winds,Taxi/Private hire car,August
3,200701BS66128,Serious,2019-08-16,51.495478,Daylight,Kensington and Chelsea,-0.202731,1,4,Dry,Single carriageway,Urban,Fine no high winds,Bus or coach (17 or more pass seats),August
4,200701BS66837,Slight,2019-03-09,51.488576,Darkness - lights lit,Kensington and Chelsea,-0.192487,1,2,Dry,Single carriageway,Urban,Fine no high winds,Other vehicle,March
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
660674,201091NM01760,Slight,2022-02-18,57.374005,Daylight,Highland,-3.467828,2,1,Dry,Single carriageway,Rural,Fine no high winds,Car,February
660675,201091NM01881,Slight,2022-02-21,57.232273,Darkness - no lighting,Highland,-3.809281,1,1,Frost or ice,Single carriageway,Rural,Fine no high winds,Car,February
660676,201091NM01935,Slight,2022-02-23,57.585044,Daylight,Highland,-3.862727,1,3,Frost or ice,Single carriageway,Rural,Fine no high winds,Car,February
660677,201091NM01964,Serious,2022-02-23,57.214898,Darkness - no lighting,Highland,-3.823997,1,2,Wet or damp,Single carriageway,Rural,Fine no high winds,Motorcycle over 500cc,February


<h3><strong>Note:</strong> I tried first to create the column 'Year' but then I realized that the column 'Month' should comes first so I replaced the word 'year' with 'month_name()' instead, but then when I run it, the year column was still there and I have researched that pandas doesn’t overwrite the old column. I was supposed to set it aside and move on that's why i created this note but on second thought, I am quite bothered by it so I am going to fix it hehehe now.</h3>

In [23]:
ukroadaccident['Accident Date'] = pd.to_datetime(ukroadaccident['Accident Date'])
ukroadaccident['Year'] = ukroadaccident['Accident Date'].dt.year
ukroadaccident

Unnamed: 0,Index,Accident_Severity,Accident Date,Latitude,Light_Conditions,District Area,Longitude,Number_of_Casualties,Number_of_Vehicles,Road_Surface_Conditions,Road_Type,Urban_or_Rural_Area,Weather_Conditions,Vehicle_Type,Month,Year
0,200701BS64157,Serious,2019-05-06,51.506187,Darkness - lights lit,Kensington and Chelsea,-0.209082,1,2,Dry,Single carriageway,Urban,Fine no high winds,Car,May,2019
1,200701BS65737,Serious,2019-02-07,51.495029,Daylight,Kensington and Chelsea,-0.173647,1,2,Wet or damp,Single carriageway,Urban,Raining no high winds,Car,February,2019
2,200701BS66127,Serious,2019-08-26,51.517715,Darkness - lighting unknown,Kensington and Chelsea,-0.210215,1,3,Dry,Single carriageway,Urban,Fine no high winds,Taxi/Private hire car,August,2019
3,200701BS66128,Serious,2019-08-16,51.495478,Daylight,Kensington and Chelsea,-0.202731,1,4,Dry,Single carriageway,Urban,Fine no high winds,Bus or coach (17 or more pass seats),August,2019
4,200701BS66837,Slight,2019-03-09,51.488576,Darkness - lights lit,Kensington and Chelsea,-0.192487,1,2,Dry,Single carriageway,Urban,Fine no high winds,Other vehicle,March,2019
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
660674,201091NM01760,Slight,2022-02-18,57.374005,Daylight,Highland,-3.467828,2,1,Dry,Single carriageway,Rural,Fine no high winds,Car,February,2022
660675,201091NM01881,Slight,2022-02-21,57.232273,Darkness - no lighting,Highland,-3.809281,1,1,Frost or ice,Single carriageway,Rural,Fine no high winds,Car,February,2022
660676,201091NM01935,Slight,2022-02-23,57.585044,Daylight,Highland,-3.862727,1,3,Frost or ice,Single carriageway,Rural,Fine no high winds,Car,February,2022
660677,201091NM01964,Serious,2022-02-23,57.214898,Darkness - no lighting,Highland,-3.823997,1,2,Wet or damp,Single carriageway,Rural,Fine no high winds,Motorcycle over 500cc,February,2022


<h1>---Changing the Data Type of the Columns I Created---</h1>

In [59]:
ukroadaccident['Month'] = ukroadaccident['Month'].astype('category')
ukroadaccident['Year'] = ukroadaccident['Year'].astype('category')

In [60]:
ukroadaccident.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 660679 entries, 0 to 660678
Data columns (total 16 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   Index                    660679 non-null  category      
 1   Accident_Severity        660679 non-null  category      
 2   Accident Date            660679 non-null  datetime64[ns]
 3   Latitude                 660679 non-null  float64       
 4   Light_Conditions         660679 non-null  category      
 5   District Area            660679 non-null  category      
 6   Longitude                660679 non-null  float64       
 7   Number_of_Casualties     660679 non-null  int64         
 8   Number_of_Vehicles       660679 non-null  int64         
 9   Road_Surface_Conditions  660679 non-null  category      
 10  Road_Type                660679 non-null  category      
 11  Urban_or_Rural_Area      660679 non-null  category      
 12  Weather_Conditio

<hr>
<h1>---20 Questions and Insights---</h1>
<hr>

<h1>~EDA~</h1>

<h1>1. What is the most usual vehicle type?</h1>

In [48]:
ukroadaccident['Vehicle_Type'].mode()

0    Car
Name: Vehicle_Type, dtype: category
Categories (16, object): ['Agricultural vehicle', 'Bus or coach (17 or more pass seats)', 'Car', 'Data missing or out of range', ..., 'Pedal cycle', 'Ridden horse', 'Taxi/Private hire car', 'Van / Goods 3.5 tonnes mgw or under']

<h2><strong>Insight: </strong> <i>According to the result, the most frequent/usual type of vehicle involve in accidents is car.</i></h2>

<hr>
<h1>2. What is the usual light condition during the accidents?</h1>

In [49]:
ukroadaccident['Light_Conditions'].mode()

0    Daylight
Name: Light_Conditions, dtype: category
Categories (5, object): ['Darkness - lighting unknown', 'Darkness - lights lit', 'Darkness - lights unlit', 'Darkness - no lighting', 'Daylight']

<h2> <strong>Insight: </strong> <i>The result shows that accidents are usually happening during daylight.</i> </h2>

<hr>
<h1>3. In terms of road surface conditions, what is the the most frequent?</h1>

In [50]:
ukroadaccident['Road_Surface_Conditions'].mode()

0    Dry
Name: Road_Surface_Conditions, dtype: category
Categories (5, object): ['Dry', 'Flood over 3cm. deep', 'Frost or ice', 'Snow', 'Wet or damp']

<h2> <strong>Insight: </strong> <i>Based on the result, dry road surface condition are prone to accidents.</i> </h2>

<hr>
<h1>4. How many number of vehicles are mostly being involved during accidents?</h1>

In [51]:
ukroadaccident['Number_of_Vehicles'].mode()

0    2
Name: Number_of_Vehicles, dtype: int64

<h2> <strong>Insight: </strong> <i>The result shows that two vehicles are mostly involved during road accidents.</i> </h2>

<hr>
<h1>5. What type of road does the accidents usually happen? </h1>

In [52]:
ukroadaccident['Road_Type'].mode()

0    Single carriageway
Name: Road_Type, dtype: category
Categories (5, object): ['Dual carriageway', 'One way street', 'Roundabout', 'Single carriageway', 'Slip road']

<h2> <strong>Insight: </strong> <i>According to the result, accidents usually happen on a single carriageway road.</i> </h2>

<h1>6. How many record of accidents per year?</h1>

In [24]:
ukroadaccident['Year'].value_counts()

Year
2019    182115
2020    170591
2021    163554
2022    144419
Name: count, dtype: int64

<h2> <strong>Insight: </strong> <i>The result shows that the year 2019, had the highest record of accidents with 182,115. The second most hightest is the year 2020 with 170,591 records. Next is 2021 with 163,554 records. While the year that got the least records is 2022 with 144,419. </i> </h2>

<h1>7. In terms of weather conditions, when does the least and most accidents record?</h1>

In [47]:
ukroadaccident['Weather_Conditions'].value_counts()

Weather_Conditions
Fine no high winds       535013
Raining no high winds     79696
Other                     17150
Raining + high winds       9615
Fine + high winds          8554
Snowing no high winds      6238
Fog or mist                3528
Snowing + high winds        885
Name: count, dtype: int64

<h2> <strong>Insight: </strong> <i>Based on the result, accidents usually happen during 'fine no high winds' weather condition, then the least accident record happens during 'snowing + high winds'.</i> </h2>

<h1>~Aggregation~</h1>

<h1>8. What is the total number of accidents per road type?</h1>

In [36]:
ukroadaccident.groupby('Road_Type').size()

Road_Type
Dual carriageway       99424
One way street         13559
Roundabout             43992
Single carriageway    496663
Slip road               7041
dtype: int64

<h2> <strong>Insight: </strong> <i>We can see from the result that the road type that had the highest number of accidents is the Single Carriageway with 496,663 records. Followed by Dual Carriageway with 99,424 records. The third is Roundabout that had 43,992 accident record, then the One way street had 13,559 records, and the road type that had the least record is the Slip road with 7,041 records.</i> </h2>

<h1>9. What is the average of number of casualties involved in accidents by accident severity?</h1>

In [39]:
ukroadaccident.groupby('Accident_Severity')['Number_of_Casualties'].mean()

Accident_Severity
Fatal      1.903129
Serious    1.467280
Slight     1.331402
Name: Number_of_Casualties, dtype: float64

<h2> <strong>Insight: </strong> <i>The average of accident severity in terms of fatal is 1.903129 (highest), serious is 1.467280, and slight is 1.331402 (lowest).</i> </h2>

<h1>10. How do accident severities vary across different light conditions?</h1>

In [41]:
ukroadaccident.groupby(['Accident_Severity', 'Light_Conditions']).size()

Accident_Severity  Light_Conditions           
Fatal              Darkness - lighting unknown        68
                   Darkness - lights lit            1860
                   Darkness - lights unlit            45
                   Darkness - no lighting           1612
                   Daylight                         5076
Serious            Darkness - lighting unknown       794
                   Darkness - lights lit           19130
                   Darkness - lights unlit           360
                   Darkness - no lighting           7174
                   Daylight                        60759
Slight             Darkness - lighting unknown      5622
                   Darkness - lights lit          108345
                   Darkness - lights unlit          2138
                   Darkness - no lighting          28651
                   Daylight                       419045
dtype: int64

<h2> <strong>Insight: </strong> <i>We can conclude that all the three types of accident severity happens the most during the daylight, while the three of them also happens the least during darkness - lights unlit. </i> </h2>

<h1>11. What is the distribution of accidents by road type and road surface condition?</h1>

In [43]:
ukroadaccident.groupby(['Road_Type', 'Road_Surface_Conditions']).size()

Road_Type           Road_Surface_Conditions
Dual carriageway    Dry                         66272
                    Flood over 3cm. deep          302
                    Frost or ice                 2408
                    Snow                          909
                    Wet or damp                 29533
One way street      Dry                         10100
                    Flood over 3cm. deep            5
                    Frost or ice                  183
                    Snow                           76
                    Wet or damp                  3195
Roundabout          Dry                         30784
                    Flood over 3cm. deep           22
                    Frost or ice                  745
                    Snow                          232
                    Wet or damp                 12209
Single carriageway  Dry                        336671
                    Flood over 3cm. deep          679
                    Frost or ice      

<h2> <strong>Insight: </strong> <i>The result shows that all of the road types had the same highest number of accidents in terms of road surface conditions which is dry, they also had the same least number of accidents which is flood over 3cm. deep (road surface condition). </i> </h2>

<h1>12. What type of area does accidents happen the most?</h1>

In [44]:
ukroadaccident.groupby('Urban_or_Rural_Area').size()

Urban_or_Rural_Area
Rural          238990
Unallocated        11
Urban          421678
dtype: int64

<h2> <strong>Insight: </strong> <i>According to the result, accidents usually happen at urban areas.</i> </h2>

<h1>13. What month does accidents usually happens?</h1>

In [46]:
ukroadaccident.groupby('Month').size()

Month
April        53656
August       54721
December     50216
February     49042
January      57226
July         55937
June         56259
March        53802
May          57038
November     58687
October      57946
September    56149
dtype: int64

<h2> <strong>Insight: </strong> <i>The result shows that accidents usually happen during the month of November.</i> </h2>

<h1>14. What is the distribution of accidents by accident severity and vehicle type? Look for the most type of vehicle.</h1>

In [56]:
ukroadaccident.groupby(['Accident_Severity','Vehicle_Type']).size()

Accident_Severity  Vehicle_Type                         
Fatal              Agricultural vehicle                         21
                   Bus or coach (17 or more pass seats)        325
                   Car                                        6577
                   Data missing or out of range                  0
                   Goods 7.5 tonnes mgw and over               216
                   Goods over 3.5t. and under 7.5t              67
                   Minibus (8 - 16 passenger seats)             29
                   Motorcycle 125cc and under                  189
                   Motorcycle 50cc and under                    95
                   Motorcycle over 125cc and up to 500cc       105
                   Motorcycle over 500cc                       339
                   Other vehicle                                70
                   Pedal cycle                                   6
                   Ridden horse                                  0
     

<h2> <strong>Insight: </strong> <i>According to the result, car is the vehicle type that got the highest accident of all the three types of accident severity.</i> </h2>

<h1>~Correlation~</h1>

In [61]:
ukroadaccident.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 660679 entries, 0 to 660678
Data columns (total 16 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   Index                    660679 non-null  category      
 1   Accident_Severity        660679 non-null  category      
 2   Accident Date            660679 non-null  datetime64[ns]
 3   Latitude                 660679 non-null  float64       
 4   Light_Conditions         660679 non-null  category      
 5   District Area            660679 non-null  category      
 6   Longitude                660679 non-null  float64       
 7   Number_of_Casualties     660679 non-null  int64         
 8   Number_of_Vehicles       660679 non-null  int64         
 9   Road_Surface_Conditions  660679 non-null  category      
 10  Road_Type                660679 non-null  category      
 11  Urban_or_Rural_Area      660679 non-null  category      
 12  Weather_Conditio

<h1>15. Is there a correlation between the number of casualties and number of vehicles?</h1>

In [58]:
ukroadaccident['Number_of_Casualties'].corr(ukroadaccident['Number_of_Vehicles'])

np.float64(0.2288888612692756)

<h2> <strong>Insight: </strong> <i>The result shows that number of casualties and number of vehicles has no correlation.</i> </h2>

<h1>16. Is there a correlation betweem latitude number of casualties?</h1>

In [63]:
ukroadaccident['Latitude'].corr(ukroadaccident['Number_of_Casualties'])

np.float64(0.032200686625906395)

<h2> <strong>Insight: </strong> <i>Based on the result, there is no correlation between laltitude and number of casualties.</i> </h2>

<h1>17. Is there a correlation between longitude and number of casualties?</h1>

In [64]:
ukroadaccident['Longitude'].corr(ukroadaccident['Number_of_Casualties'])

np.float64(-0.0404056457884545)

<h2> <strong>Insight: </strong> <i>The result shows that there is no correlation between longitude and number of casualties.</i> </h2>

<h1>18. Is there a significant difference between accident severity and number of vehicles?</h1>

In [65]:
ukroadaccident['Accident_Severity'].unique()

['Serious', 'Slight', 'Fatal']
Categories (3, object): ['Fatal', 'Serious', 'Slight']

In [67]:
vehiserious = ukroadaccident[ukroadaccident['Accident_Severity'] == 'Serious']['Number_of_Vehicles']
vehislight = ukroadaccident[ukroadaccident['Accident_Severity'] == 'Slight']['Number_of_Vehicles']
vehifatal = ukroadaccident[ukroadaccident['Accident_Severity'] == 'Fatal']['Number_of_Vehicles']

In [68]:
result, pvalue = f_oneway(vehiserious, vehislight, vehifatal)
pvalue

np.float64(0.0)

<h2> <strong>Insight: </strong> <i>The result shows that there is extremely significance between the accident severity and number of vehicles, it shows that they are very related to each other.</i> </h2>

<h1>19. Is there a significant difference between the area and number of casualties?</h1>

In [69]:
ukroadaccident['Urban_or_Rural_Area'].unique()

['Urban', 'Rural', 'Unallocated']
Categories (3, object): ['Rural', 'Unallocated', 'Urban']

In [70]:
urbancasualty = ukroadaccident[ukroadaccident['Urban_or_Rural_Area'] == 'Urban']['Number_of_Casualties']
ruralcasualty = ukroadaccident[ukroadaccident['Urban_or_Rural_Area'] == 'Rural']['Number_of_Casualties']
unallocatedcasualty = ukroadaccident[ukroadaccident['Urban_or_Rural_Area'] == 'Unallocated']['Number_of_Casualties']

In [72]:
result, pvalue = f_oneway(urbancasualty, ruralcasualty, unallocatedcasualty)
pvalue

np.float64(0.0)

<h2> <strong>Insight: </strong> <i>According to the result, there is a significant difference between the areas and the number of casualties.</i> </h2>

<h1>20. Is there a significant difference between light conditions and latitude?</h1>

In [73]:
ukroadaccident['Light_Conditions'].unique()

['Darkness - lights lit', 'Daylight', 'Darkness - lighting unknown', 'Darkness - lights unlit', 'Darkness - no lighting']
Categories (5, object): ['Darkness - lighting unknown', 'Darkness - lights lit', 'Darkness - lights unlit', 'Darkness - no lighting', 'Daylight']

In [77]:
lighta = ukroadaccident[ukroadaccident['Light_Conditions'] == 'Darkness - lights lit']['Latitude']
lightb = ukroadaccident[ukroadaccident['Light_Conditions'] == 'Daylight']['Latitude']
lightc = ukroadaccident[ukroadaccident['Light_Conditions'] == 'Darkness - lighting unknown']['Latitude']
lightd = ukroadaccident[ukroadaccident['Light_Conditions'] == 'Darkness - lights unlit']['Latitude']
lighte = ukroadaccident[ukroadaccident['Light_Conditions'] == 'Darkness - no lighting']['Latitude']

In [78]:
result, pvalue = f_oneway(lighta, lightb, lightc, lightd, lighte)
pvalue

np.float64(1.224998791423201e-27)

<h2> <strong>Insight: </strong> <i>The result shows that the difference between light conditions and latitude is very significant, mean the light conditions has an extreme effect on the latitude.</i> </h2>