In [3]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [34]:
accident_data = pd.read_csv('datasets\\accident_data.csv')

In [47]:
accident_data

Unnamed: 0,Index,Accident_Severity,Accident Date,Latitude,Light_Conditions,District Area,Longitude,Number_of_Casualties,Number_of_Vehicles,Road_Surface_Conditions,Road_Type,Urban_or_Rural_Area,Weather_Conditions,Vehicle_Type
0,200701BS64157,Serious,05/06/2019,51.506187,Darkness - lights lit,Kensington and Chelsea,-0.209082,1,2,Dry,Single carriageway,Urban,Fine no high winds,Car
1,200701BS65737,Serious,02/07/2019,51.495029,Daylight,Kensington and Chelsea,-0.173647,1,2,Wet or damp,Single carriageway,Urban,Raining no high winds,Car
2,200701BS66127,Serious,26/08/2019,51.517715,Darkness - lighting unknown,Kensington and Chelsea,-0.210215,1,3,Dry,Single carriageway,Urban,Fine no high winds,Taxi/Private hire car
3,200701BS66128,Serious,16/08/2019,51.495478,Daylight,Kensington and Chelsea,-0.202731,1,4,Dry,Single carriageway,Urban,Fine no high winds,Bus or coach (17 or more pass seats)
4,200701BS66837,Slight,03/09/2019,51.488576,Darkness - lights lit,Kensington and Chelsea,-0.192487,1,2,Dry,Single carriageway,Urban,Fine no high winds,Other vehicle
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
660674,201091NM01760,Slight,18/02/2022,57.374005,Daylight,Highland,-3.467828,2,1,Dry,Single carriageway,Rural,Fine no high winds,Car
660675,201091NM01881,Slight,21/02/2022,57.232273,Darkness - no lighting,Highland,-3.809281,1,1,Frost or ice,Single carriageway,Rural,Fine no high winds,Car
660676,201091NM01935,Slight,23/02/2022,57.585044,Daylight,Highland,-3.862727,1,3,Frost or ice,Single carriageway,Rural,Fine no high winds,Car
660677,201091NM01964,Serious,23/02/2022,57.214898,Darkness - no lighting,Highland,-3.823997,1,2,Wet or damp,Single carriageway,Rural,Fine no high winds,Motorcycle over 500cc


In [55]:
accident_data['Road_Type']

0         Single carriageway
1         Single carriageway
2         Single carriageway
3         Single carriageway
4         Single carriageway
                 ...        
660674    Single carriageway
660675    Single carriageway
660676    Single carriageway
660677    Single carriageway
660678      Dual carriageway
Name: Road_Type, Length: 660679, dtype: object

<h1>FillUp the Null Values</h1>

In [35]:
accident_data.isnull().sum()

Index                          0
Accident_Severity              0
Accident Date                  0
Latitude                      25
Light_Conditions               0
District Area                  0
Longitude                     26
Number_of_Casualties           0
Number_of_Vehicles             0
Road_Surface_Conditions      726
Road_Type                   4520
Urban_or_Rural_Area           15
Weather_Conditions         14128
Vehicle_Type                   0
dtype: int64

In [36]:
accident_data['Latitude'] = accident_data['Latitude'].fillna(accident_data['Latitude'].mean(0))
accident_data['Longitude'] = accident_data['Longitude'].fillna(accident_data['Longitude'].mean(0))
accident_data['Road_Surface_Conditions'] = accident_data['Road_Surface_Conditions'].fillna(accident_data['Road_Surface_Conditions'].mode()[0])
accident_data['Road_Type'] = accident_data['Road_Type'].fillna(accident_data['Road_Type'].mode()[0])
accident_data['Urban_or_Rural_Area'] = accident_data['Urban_or_Rural_Area'].fillna(accident_data['Urban_or_Rural_Area'].mode()[0])
accident_data['Weather_Conditions'] = accident_data['Weather_Conditions'].fillna(accident_data['Weather_Conditions'].mode()[0])

In [37]:
accident_data.isnull().sum()

Index                      0
Accident_Severity          0
Accident Date              0
Latitude                   0
Light_Conditions           0
District Area              0
Longitude                  0
Number_of_Casualties       0
Number_of_Vehicles         0
Road_Surface_Conditions    0
Road_Type                  0
Urban_or_Rural_Area        0
Weather_Conditions         0
Vehicle_Type               0
dtype: int64

<h1>Questions:</h1>

<h2>1. Is there a difference in the number of accidents occurring during daylight versus darkness?</h2>

In [39]:
accident_data["Light_Conditions"].value_counts()

Light_Conditions
Daylight                       484880
Darkness - lights lit          129335
Darkness - no lighting          37437
Darkness - lighting unknown      6484
Darkness - lights unlit          2543
Name: count, dtype: int64

<h3>Insight: Daylight dominates accident counts, though darkness still accounts for a significant share tied to reduced visibility.</h3>

<h2>2. Between urban and rural, in which area do accidents most occur?</h2>

In [38]:
accident_data["Urban_or_Rural_Area"].value_counts()

Urban_or_Rural_Area
Urban          421678
Rural          238990
Unallocated        11
Name: count, dtype: int64

<h3>Insight: Urban areas typically see more accidents due to higher traffic density compared to rural roads.</h3>

<h2>3. What are the accident locations' average coordinates (latitude and longitude)?</h2>

In [40]:
accident_data[["Latitude","Longitude"]].mean()

Latitude     52.553866
Longitude    -1.431210
dtype: float64

<h3>Insight: The mean coordinates center around a specific region, giving a rough “geographic midpoint” of accident locations.</h3>

<h2>4. Which kind of vehicle type has the highest distribution in accidents?</h2>

In [41]:
accident_data["Vehicle_Type"].value_counts().head(1)

Vehicle_Type
Car    497992
Name: count, dtype: int64

<h3>Insight: Cars dominate accident involvement, reflecting their overwhelming presence on the road.</h3>

<h2>5. Among the different road conditions, in which of them does an accident usually happen?</h2>

In [42]:
accident_data["Road_Surface_Conditions"].value_counts().head(1)

Road_Surface_Conditions
Dry    448547
Name: count, dtype: int64

<h3>Insight: Dry roads account for the majority of crashes, showing that accidents are not always weather-related.</h3>

<h2>6. What are the oldest and most recent accident dates in the dataset?</h2>

In [50]:
accident_data["Accident Date"].min(), accident_data["Accident Date"].max()

('01/01/2019', '31/12/2022')

<h3>Insight: The dataset spans multiple years, helping identify long-term accident patterns.</h3>

<h2>7. Are accidents more frequent on dry roads compared to damp or wet roads?</h2>

In [57]:
accident_data["Road_Surface_Conditions"].value_counts()

Road_Surface_Conditions
Dry                     448547
Wet or damp             186708
Frost or ice             18517
Snow                      5890
Flood over 3cm. deep      1017
Name: count, dtype: int64

<h3>Insight: Yes—dry roads dominate, though wet roads carry higher risk per mile traveled.</h3>

<h2>8. Which light conditions has the highest number of casualties?</h2>

In [58]:
accident_data.groupby("Light_Conditions")["Number_of_Casualties"].sum()

Light_Conditions
Darkness - lighting unknown      8471
Darkness - lights lit          177108
Darkness - lights unlit          3385
Darkness - no lighting          57715
Daylight                       649889
Name: Number_of_Casualties, dtype: int64

<h3>Insight: Daylight produces more total casualties simply due to traffic volume.</h3>

<h2>9. On average, how many vehicles are typically involved in road accidents, and what does this suggest about crash dynamics?</h2>

In [59]:
accident_data["Number_of_Vehicles"].mean()

np.float64(1.8312554205597575)

<h3>Insight: Most crashes involve a small number of vehicles, typically two. This indicates that multi-vehicle pile-ups are relatively rare compared to simpler collisions.</h3>

<h2>10. Which accident severity occurs most often, and how does this reflect the overall risk profile of the dataset?</h2>

In [60]:
accident_data["Accident_Severity"].value_counts()

Accident_Severity
Slight     563801
Serious     88217
Fatal        8661
Name: count, dtype: int64

<h3>Insight: Slight accidents dominate the dataset, followed by serious ones. Fatal crashes are far fewer, suggesting that while accidents are frequent, most are not life-threatening.</h3>

<h2>11. Under which weather conditions do accidents most frequently occur, and what might this say about driver behavior?</h2>

In [61]:
accident_data["Weather_Conditions"].mode(), accident_data["Weather_Conditions"].value_counts().head()

(0    Fine no high winds
 Name: Weather_Conditions, dtype: object,
 Weather_Conditions
 Fine no high winds       535013
 Raining no high winds     79696
 Other                     17150
 Raining + high winds       9615
 Fine + high winds          8554
 Name: count, dtype: int64)

<h3>Insight: Clear weather conditions are often linked with the highest number of crashes. This may reflect higher road usage and driver overconfidence when conditions seem safe.</h3>

<h2>12. How does the average number of casualties vary across different severity levels of accidents?</h2>

In [63]:
accident_data.groupby("Accident_Severity")["Number_of_Casualties"].mean()

Accident_Severity
Fatal      1.903129
Serious    1.467280
Slight     1.331402
Name: Number_of_Casualties, dtype: float64

<h3>Insight: Fatal and serious crashes have a noticeably higher average casualty count than slight accidents. This indicates severity classification aligns well with casualty outcomes.</h3>

<h2>13. How does the number of vehicles involved typically change across slight, serious, and fatal accidents?</h2>

In [67]:
accident_data.groupby("Accident_Severity")["Number_of_Vehicles"].mean()

Accident_Severity
Fatal      1.786976
Serious    1.678327
Slight     1.855864
Name: Number_of_Vehicles, dtype: float64

<h3>Insight: Fatal crashes tend to involve slightly more vehicles on average than slight accidents. This suggests that multi-vehicle interactions can intensify crash severity.</h3>

<h2>14. What relationship exists between the number of vehicles and the number of casualties in an accident?</h2>

In [68]:
accident_data["Number_of_Vehicles"].corr(accident_data["Number_of_Casualties"])

np.float64(0.22888886126927635)

<h3>Insight: The correlation between vehicles and casualties is generally positive, meaning more vehicles often result in more casualties. However, the relationship is not perfect, implying other factors matter too.</h3>

<h2>15. What is the mode of vehicle types involved in accidents overall?</h2>

In [70]:
accident_data["Vehicle_Type"].mode()

0    Car
Name: Vehicle_Type, dtype: object

<h3>Insight: Cars usually dominate accident involvement compared to other vehicle types. This reflects their prevalence on the roads.</h3>

<h2>16. What is the average number of casualties per accident?</h2>

In [71]:
accident_data["Number_of_Casualties"].mean()

np.float64(1.357040257068864)

<h3>Insight: Most accidents involve only 1–2 casualties, with larger pileups being rare.</h3>

<h2>17. Which road surface condition has the highest accident frequency?</h2>

In [72]:
accident_data["Road_Surface_Conditions"].value_counts().head(1)

Road_Surface_Conditions
Dry    448547
Name: count, dtype: int64

<h3>Insight: Dry roads again appear most, underscoring that volume matters more than adverse conditions.</h3>

<h2>18. Which combination of surface and light conditions has the highest mean casualties?</h2>

In [75]:
accident_data.groupby(["Road_Surface_Conditions","Light_Conditions"])["Number_of_Casualties"].mean()

Road_Surface_Conditions  Light_Conditions           
Dry                      Darkness - lighting unknown    1.297072
                         Darkness - lights lit          1.352732
                         Darkness - lights unlit        1.301622
                         Darkness - no lighting         1.567877
                         Daylight                       1.321303
Flood over 3cm. deep     Darkness - lighting unknown    1.466667
                         Darkness - lights lit          1.525000
                         Darkness - lights unlit        3.500000
                         Darkness - no lighting         1.411321
                         Daylight                       1.513089
Frost or ice             Darkness - lighting unknown    1.281124
                         Darkness - lights lit          1.366371
                         Darkness - lights unlit        1.353846
                         Darkness - no lighting         1.408670
                         Daylight    

<h3>Insight: Nighttime accidents on wet or icy surfaces often yield higher casualty averages.</h3>

<h2>19. Which weather-light condition pairing leads to the highest accident counts?</h2>

In [77]:
accident_data.groupby(["Weather_Conditions","Light_Conditions"]).size()

Weather_Conditions     Light_Conditions           
Fine + high winds      Darkness - lighting unknown        59
                       Darkness - lights lit            1852
                       Darkness - lights unlit            51
                       Darkness - no lighting            796
                       Daylight                         5796
Fine no high winds     Darkness - lighting unknown      5333
                       Darkness - lights lit           93958
                       Darkness - lights unlit          1745
                       Darkness - no lighting          25251
                       Daylight                       408726
Fog or mist            Darkness - lighting unknown        65
                       Darkness - lights lit             921
                       Darkness - lights unlit            37
                       Darkness - no lighting            862
                       Daylight                         1643
Other                  Darkness - 

<h3>Insight: Dark and rainy conditions often amplify risks, creating a dangerous combination.</h3>

<h2>20. Do dual carriageways or single carriageways record higher casualty averages?</h2>

In [79]:
accident_data.groupby("Road_Type")["Number_of_Casualties"].mean()

Road_Type
Dual carriageway      1.477279
One way street        1.192713
Roundabout            1.274891
Single carriageway    1.343788
Slip road             1.423661
Name: Number_of_Casualties, dtype: float64

<h3>Insight: Single-carriageways usually dominate counts, but dual carriageways may record higher severity.</h3>

In [78]:
accident_data.groupby("Weather_Conditions")["Number_of_Vehicles"].mean()

Weather_Conditions
Fine + high winds        1.779401
Fine no high winds       1.835907
Fog or mist              1.807823
Other                    1.777668
Raining + high winds     1.777951
Raining no high winds    1.831610
Snowing + high winds     1.737853
Snowing no high winds    1.754889
Name: Number_of_Vehicles, dtype: float64

<h3>Insight: Adverse conditions may lead to fewer vehicles per crash but more severe damage.</h3>