<h1>UK Road Accident Analysis</h1>
<h3>Analyst: Jomarie Roperez</h3>

<h1><strong>STEP 1:</strong> Import necessary libraries</h1>

In [1]:
import pandas as pd
import numpy as np
from scipy.stats import f_oneway

import matplotlib.pyplot as plt
import seaborn as sns

<h1><strong>STEP 1.1:</strong>Load dataset</h1>

In [2]:
uk_accident_data = pd.read_csv('datasets\\accident_data.csv')

<h1><strong>STEP 1.2:</strong>Create a copy for EDA (Preserve the original dataset)</h1>

In [3]:
eda_data = uk_accident_data.copy()


In [4]:
eda_data

Unnamed: 0,Index,Accident_Severity,Accident Date,Latitude,Light_Conditions,District Area,Longitude,Number_of_Casualties,Number_of_Vehicles,Road_Surface_Conditions,Road_Type,Urban_or_Rural_Area,Weather_Conditions,Vehicle_Type
0,200701BS64157,Serious,5/6/2019,51.506187,Darkness - lights lit,Kensington and Chelsea,-0.209082,1,2,Dry,Single carriageway,Urban,Fine no high winds,Car
1,200701BS65737,Serious,2/7/2019,51.495029,Daylight,Kensington and Chelsea,-0.173647,1,2,Wet or damp,Single carriageway,Urban,Raining no high winds,Car
2,200701BS66127,Serious,26-08-2019,51.517715,Darkness - lighting unknown,Kensington and Chelsea,-0.210215,1,3,Dry,,Urban,,Taxi/Private hire car
3,200701BS66128,Serious,16-08-2019,51.495478,Daylight,Kensington and Chelsea,-0.202731,1,4,Dry,Single carriageway,Urban,Fine no high winds,Bus or coach (17 or more pass seats)
4,200701BS66837,Slight,3/9/2019,51.488576,Darkness - lights lit,Kensington and Chelsea,-0.192487,1,2,Dry,,Urban,,Other vehicle
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
660674,201091NM01760,Slight,18-02-2022,57.374005,Daylight,Highland,-3.467828,2,1,Dry,Single carriageway,Rural,Fine no high winds,Car
660675,201091NM01881,Slight,21-02-2022,57.232273,Darkness - no lighting,Highland,-3.809281,1,1,Frost or ice,Single carriageway,Rural,Fine no high winds,Car
660676,201091NM01935,Slight,23-02-2022,57.585044,Daylight,Highland,-3.862727,1,3,Frost or ice,Single carriageway,Rural,Fine no high winds,Car
660677,201091NM01964,Serious,23-02-2022,57.214898,Darkness - no lighting,Highland,-3.823997,1,2,Wet or damp,Single carriageway,Rural,Fine no high winds,Motorcycle over 500cc


In [5]:
eda_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 660679 entries, 0 to 660678
Data columns (total 14 columns):
 #   Column                   Non-Null Count   Dtype  
---  ------                   --------------   -----  
 0   Index                    660679 non-null  object 
 1   Accident_Severity        660679 non-null  object 
 2   Accident Date            660679 non-null  object 
 3   Latitude                 660654 non-null  float64
 4   Light_Conditions         660679 non-null  object 
 5   District Area            660679 non-null  object 
 6   Longitude                660653 non-null  float64
 7   Number_of_Casualties     660679 non-null  int64  
 8   Number_of_Vehicles       660679 non-null  int64  
 9   Road_Surface_Conditions  659953 non-null  object 
 10  Road_Type                656159 non-null  object 
 11  Urban_or_Rural_Area      660664 non-null  object 
 12  Weather_Conditions       646551 non-null  object 
 13  Vehicle_Type             660679 non-null  object 
dtypes: f

In [6]:
eda_data.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Latitude,660654.0,52.553866,1.406922,49.91443,51.49069,52.315641,53.453452,60.757544
Longitude,660653.0,-1.43121,1.38333,-7.516225,-2.332291,-1.411667,-0.232869,1.76201
Number_of_Casualties,660679.0,1.35704,0.824847,1.0,1.0,1.0,1.0,68.0
Number_of_Vehicles,660679.0,1.831255,0.715269,1.0,1.0,2.0,2.0,32.0


<h1><strong>STEP 2:</strong> Check for Missing Values</h1>

In [7]:
eda_data.isnull().sum()

Index                          0
Accident_Severity              0
Accident Date                  0
Latitude                      25
Light_Conditions               0
District Area                  0
Longitude                     26
Number_of_Casualties           0
Number_of_Vehicles             0
Road_Surface_Conditions      726
Road_Type                   4520
Urban_or_Rural_Area           15
Weather_Conditions         14128
Vehicle_Type                   0
dtype: int64

<h1><strong>STEP 3:</strong>Handle Missing Values(still in Progress)</h1>

In [8]:
eda_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 660679 entries, 0 to 660678
Data columns (total 14 columns):
 #   Column                   Non-Null Count   Dtype  
---  ------                   --------------   -----  
 0   Index                    660679 non-null  object 
 1   Accident_Severity        660679 non-null  object 
 2   Accident Date            660679 non-null  object 
 3   Latitude                 660654 non-null  float64
 4   Light_Conditions         660679 non-null  object 
 5   District Area            660679 non-null  object 
 6   Longitude                660653 non-null  float64
 7   Number_of_Casualties     660679 non-null  int64  
 8   Number_of_Vehicles       660679 non-null  int64  
 9   Road_Surface_Conditions  659953 non-null  object 
 10  Road_Type                656159 non-null  object 
 11  Urban_or_Rural_Area      660664 non-null  object 
 12  Weather_Conditions       646551 non-null  object 
 13  Vehicle_Type             660679 non-null  object 
dtypes: f

<h1><strong>STEP 4:</strong> Exploring Categorical Columns</h1>

In [9]:
# for object type
categorical_columns = eda_data.select_dtypes(include=['object']).columns
categorical_columns


Index(['Index', 'Accident_Severity', 'Accident Date', 'Light_Conditions',
       'District Area', 'Road_Surface_Conditions', 'Road_Type',
       'Urban_or_Rural_Area', 'Weather_Conditions', 'Vehicle_Type'],
      dtype='object')

In [10]:
# unique values for each categorical column
for col in categorical_columns:
    print(f"Unique values in '{col}':\n", eda_data[col].unique(), "\n")


Unique values in 'Index':
 ['200701BS64157' '200701BS65737' '200701BS66127' ... '201091NM01935'
 '201091NM01964' '201091NM02142'] 

Unique values in 'Accident_Severity':
 ['Serious' 'Slight' 'Fatal'] 

Unique values in 'Accident Date':
 ['5/6/2019' '2/7/2019' '26-08-2019' ... '26-12-2022' '25-07-2022'
 '25-12-2022'] 

Unique values in 'Light_Conditions':
 ['Darkness - lights lit' 'Daylight' 'Darkness - lighting unknown'
 'Darkness - lights unlit' 'Darkness - no lighting'] 

Unique values in 'District Area':
 ['Kensington and Chelsea' 'Westminster' 'Richmond upon Thames'
 'Hammersmith and Fulham' 'Hounslow' 'Tower Hamlets' 'City of London'
 'Southwark' 'Camden' 'Hackney' 'Islington' 'Barnet' 'Brent' 'Haringey'
 'Merton' 'Ealing' 'Enfield' 'Greenwich' 'Newham'
 'London Airport (Heathrow)' 'Hillingdon' 'Waltham Forest' 'Redbridge'
 'Barking and Dagenham' 'Bromley' 'Havering' 'Croydon' 'Lambeth'
 'Wandsworth' 'Sutton' 'Bexley' 'Lewisham' 'Harrow' 'Kingston upon Thames'
 'Barrow-in-Furness'

In [11]:
# Count unique values for each categorical column
for col in categorical_columns:
    print(f"Value counts for '{col}':\n", eda_data[col].value_counts(), "\n")


Value counts for 'Index':
 Index
2.01E+12         239478
2.01E+282             4
2.01E+86              4
2.01E+98              4
2.01E+197             4
                  ...  
200804BC08155         1
200804BC08153         1
200804BC08152         1
200804BC08151         1
201091NM02142         1
Name: count, Length: 421020, dtype: int64 

Value counts for 'Accident_Severity':
 Accident_Severity
Slight     563801
Serious     88217
Fatal        8661
Name: count, dtype: int64 

Value counts for 'Accident Date':
 Accident Date
30-11-2019    704
31-01-2019    697
13-07-2019    692
13-11-2021    692
14-08-2019    688
             ... 
30-12-2022    171
25-12-2019    157
25-12-2022    145
10/1/2022     123
25-12-2020    118
Name: count, Length: 1461, dtype: int64 

Value counts for 'Light_Conditions':
 Light_Conditions
Daylight                       484880
Darkness - lights lit          129335
Darkness - no lighting          37437
Darkness - lighting unknown      6484
Darkness - lights unlit 

<h1><strong>STEP 5:</strong>Data Type Conversion</h1>

In [12]:
eda_data['Accident Date'] = pd.to_datetime(
    eda_data['Accident Date'], 
    dayfirst=True, 
    errors='coerce'
    )

<h1><strong>STEP 6:</strong>Categorization of columns</h1>

In [13]:
category_columns = [
    "Index",
    "Accident_Severity",
    "Light_Conditions",
    "Weather_Conditions",
    "Road_Surface_Conditions",
    "Road_Type",
    "Urban_or_Rural_Area",
    "Vehicle_Type"
]
eda_data[category_columns] = eda_data[category_columns].astype('category')


In [14]:
eda_data.dtypes

Index                            category
Accident_Severity                category
Accident Date              datetime64[ns]
Latitude                          float64
Light_Conditions                 category
District Area                      object
Longitude                         float64
Number_of_Casualties                int64
Number_of_Vehicles                  int64
Road_Surface_Conditions          category
Road_Type                        category
Urban_or_Rural_Area              category
Weather_Conditions               category
Vehicle_Type                     category
dtype: object

<h1><strong>STEP 7:</strong>Summary Statistics</h1>

In [15]:
eda_data.describe().T

Unnamed: 0,count,mean,min,25%,50%,75%,max,std
Accident Date,265007.0,2020-11-23 16:22:04.873683456,2019-01-01 00:00:00,2019-12-03 00:00:00,2020-11-07 00:00:00,2021-11-07 00:00:00,2022-12-12 00:00:00,
Latitude,660654.0,52.553866,49.91443,51.49069,52.315641,53.453452,60.757544,1.406922
Longitude,660653.0,-1.43121,-7.516225,-2.332291,-1.411667,-0.232869,1.76201,1.38333
Number_of_Casualties,660679.0,1.35704,1.0,1.0,1.0,1.0,68.0,0.824847
Number_of_Vehicles,660679.0,1.831255,1.0,1.0,2.0,2.0,32.0,0.715269


<h1><strong>Insights/Findings:</strong></h1>

<h1>Question #1: What is the most common accident severity?</h1>


In [16]:
# Accident_Severity = eda_data['Accident_Severity'].value_counts()

In [17]:
# Accident_Severity

<h3>Insight #1:</h3>
Most accidents fall under the "Slight" category (563,801 cases), making up the majority. "Serious" accidents (88,217) are significantly lower, while "Fatal" accidents (8,661) are the least common.

This suggests that while accidents are frequent, most are not severe.

<h1>Question #2: How do accidents vary based on light conditions?</h1>


In [18]:
# Light_Cond_ACDs = eda_data['Light_Conditions'].value_counts()

In [19]:
# Light_Cond_ACDs

<h3><strong>Insight #2-5:</strong></h3>
<ol>
    <li>Most accidents occur in daylight (484,880 cases), likely because of higher traffic volume during the day.</li>
    <li>Darkness with streetlights on (129,335 cases) still accounts for a significant number of accidents, indicating that visibility at night remains a concern.</li>
    <li>Darkness with no lighting (37,437 cases) and unknown lighting (6,484 cases) show fewer accidents, but these might have a higher severity due to poor visibility.</li>
    <li>Darkness with streetlights unlit (2,543 cases) is the least common but could be a critical issue—possibly indicating infrastructure problems.</li>
</ol>






<h1>Question #3: How do accidents vary based on weather conditions?</h1>


In [20]:
# Weather_Cond_ACDs = eda_data['Weather_Conditions'].value_counts()


In [21]:
# Weather_Cond_ACDs

<h3><strong>Insight #6-10: </strong>Insights on Weather Conditions and Accidents:</h3>

<ol>
    <li><b>Most Accidents Happen in Good Weather (Fine, No High Winds) = 520,885 Cases (≈81%)</b>
      <ul>
        <li>Majority of accidents occur in clear weather, likely due to higher traffic volumes and a false sense of security.</li>
        <li>Suggests weather alone isn't the biggest factor—human behavior and road conditions may play a larger role.</li>
      </ul>
    </li>
    <li><b>Rain Increases Accident Risk = 89,311 Cases (≈14%)</b>
      <ul>
        <li>Raining (No High Winds): 79,696 accidents</li>
        <li>Raining + High Winds: 9,615 accidents</li>
        <li>Wet roads reduce traction, leading to longer stopping distances and increased accident risk.</li>
      </ul>
    </li>
    <li><b>High Winds Alone Are Less of a Factor = 8,554 Cases (≈1.3%)</b>
      <ul>
        <li>Fine + High Winds: 8,554 accidents</li>
        <li>Strong winds alone don’t cause many accidents unless combined with rain or snow.</li>
      </ul>
    </li>
    <li><b>Snow and Ice Pose Moderate Risks = 7,123 Cases (≈1.1%)</b>
      <ul>
        <li>Snowing (No High Winds): 6,238 accidents</li>
        <li>Snowing + High Winds: 885 accidents</li>
        <li>Fewer accidents happen in snow as drivers are more cautious, or fewer people drive in extreme conditions.</li>
      </ul>
    </li>
    <li><b>Fog and Mist Are Particularly Dangerous = 3,528 Cases (≈0.5%)</b>
      <ul>
        <li>Though fog-related accidents are lower, poor visibility can lead to high-severity crashes, especially on highways.</li>
      </ul>
    </li>
  </ol>



In [22]:
eda_data.columns


Index(['Index', 'Accident_Severity', 'Accident Date', 'Latitude',
       'Light_Conditions', 'District Area', 'Longitude',
       'Number_of_Casualties', 'Number_of_Vehicles', 'Road_Surface_Conditions',
       'Road_Type', 'Urban_or_Rural_Area', 'Weather_Conditions',
       'Vehicle_Type'],
      dtype='object')

<h1>Question #4: What is the average number of casualties per accident severity?</h1>


In [23]:
# avg_casualties_per_severity = eda_data.groupby("Accident_Severity")["Number_of_Casualties"].mean()

In [24]:
# avg_casualties_per_severity

<h3><strong>Insight #11-13:</strong> Severity and Casualty Rate</h3>
<ol>
  <li>Fatal accidents have the highest average casualties (1.90), meaning they tend to involve more people per incident.</li>
  <li>Serious accidents average 1.47 casualties, while slight accidents have the lowest at 1.33.</li>
  <li>The differences suggest that while slight accidents are more frequent, fatal accidents tend to be more severe in impact.</li>
</ol>


<h1>Question #5: How many accidents occurred for each combination of road type and accident severity?</h1>


In [25]:

# accidents_by_road_type_severity = eda_data.groupby(["Road_Type", "Accident_Severity"]).size()


In [26]:
# accidents_by_road_type_severity

<h3><strong>Insight #14-17:</strong> Road Type & Accident Severity</h3>
<ol>
  <li>Single carriageways have the highest number of accidents across all severities, with over 415,000 slight accidents alone.</li>
  <li>Dual carriageways follow, though they have significantly fewer accidents compared to single carriageways.</li>
  <li>Roundabouts and one-way streets show a lower number of severe accidents, likely due to lower speeds and controlled traffic flow.</li>
  <li>Slip roads have the least number of accidents overall, but their design may still pose specific risks.</li>
</ol>


In [27]:
# vehicles_by_weather = eda_data.groupby("Weather_Conditions")["Number_of_Vehicles"].sum()

In [28]:
# vehicles_by_weather

<h3><strong>Insights #18-22:</strong> Weather Conditions & Vehicle Involvement</h3>
<ol>
  <li>Accidents are most frequent in fine weather with no high winds, involving over 950,000 vehicles.</li>
  <li>Rain (no high winds) is the second most common condition, with around 146,000 vehicles involved.</li>
  <li>Fog, snow, and high winds contribute to a smaller percentage of accidents but may have higher risks per incident.</li>
  <li>The "Other" category suggests unclassified conditions, which might need further investigation.</li>
</ol>


<h1><strong>Question #6:</strong> Does accident severity vary based on road surface conditions and light conditions?</h1>

In [29]:
# eda_data['Accident_Severity'].dtype

In [30]:
# eda_data['Accident_Severity'].unique()

In [31]:
copy_eda_data = eda_data.copy()

In [32]:
# copy_eda_data['Accident_Severity'] = pd.to_numeric(copy_eda_data['Accident_Severity'], errors='coerce')


In [33]:
# copy_eda_data['Accident_Severity'].dtype

In [34]:
# eda_data['Accident_Severity'].unique()

In [35]:
# accident_severity_on_RSFs_and_LCs = eda_data.groupby(
#     ['Road_Surface_Conditions', 'Light_Conditions']
#     )['Accident_Severity'].size().unstack()


In [36]:
# accident_severity_on_RSFs_and_LCs

<h2><strong>Insights #23-27:</strong> Insigts on Road Surface Conditions & Light Conditions Impacting Accidents</h2>

<ol>
    <li><strong>Most Accidents Occur on Dry Roads and in Daylight</strong>  
        <ul>
            <li><strong>356,085 accidents</strong> happened on dry roads in daylight, the highest among all conditions.</li>
            <li>This suggests that accidents are more frequent when visibility is good and roads are clear, possibly due to <strong>higher traffic volumes and driver complacency</strong>.</li>
        </ul>
    </li>
    <li><strong>Wet or Damp Roads Increase Accident Risk in Poor Lighting</strong>  
        <ul>
            <li><strong>52,463 accidents</strong> happened on wet roads under "Darkness - lights lit" conditions.</li>
            <li>Wet roads combined with limited visibility increase braking distances, making accidents more likely.</li>
        </ul>
    </li>
    <li><strong>Frost, Ice, and Snow Lead to Fewer but Severe Accidents</strong>  
        <ul>
            <li>Only <strong>10,172 accidents</strong> on <strong>frosty roads in daylight</strong>, but their severity is often higher.</li>
            <li><strong>Snow-related accidents (1,587 in darkness, 3,415 in daylight)</strong> are low in number, likely due to <strong>fewer vehicles on the road and more cautious driving</strong>.</li>
        </ul>
    </li>
    <li><strong>Flooded Roads Have the Fewest Accidents</strong>  
        <ul>
            <li>Only <strong>573 accidents</strong> occurred on flooded roads in daylight.</li>
            <li>Drivers may avoid flooded areas, explaining the lower accident count.</li>
        </ul>
    </li>
    <li><strong>Darkness Without Lighting is Extremely Dangerous</strong>  
        <ul>
            <li><strong>15,619 accidents</strong> on dry roads in total darkness (no lighting).</li>
            <li><strong>17,424 accidents</strong> on wet roads in total darkness.</li>
            <li>Poor lighting makes hazards harder to see, increasing crash risk.</li>
        </ul>
    </li>
</ol>

<h2>Conclusion:</h2>
<p><strong>Accident severity does vary based on road and lighting conditions.</strong></p>
<ul>
    <li><strong>Dry roads in daylight</strong> see the most accidents, likely due to more traffic.</li>
    <li><strong>Wet and icy roads are dangerous in poor lighting</strong>, increasing accident risk.</li>
    <li><strong>Snow and floods lead to fewer but possibly more severe accidents.</strong></li>
    <li><strong>Dark, unlit roads are a major hazard</strong>, especially when combined with wet conditions.</li>
</ul>


<h1>Question #7 Do different vehicle types have higher accident severity under specific road conditions?</h1>

In [37]:
# accident_severity_on_VTs_onSpecific_RCs = eda_data.groupby(
#     ['Vehicle_Type', 'Road_Surface_Conditions']
#     )['Accident_Severity'].size().unstack()


In [38]:
# accident_severity_on_VTs_onSpecific_RCs

<h1><strong>Insights #28-32</strong> : Insights on Vehicle Types and Accident Severity Under Specific Road Conditions</h1>

<ol>
    <li><strong>Cars Have the Most Accidents Across All Road Conditions</strong>
        <ul>
            <li>337,311 accidents on dry roads and 140,764 on wet roads, the highest in all conditions.</li>
            <li>This is expected due to their high volume on the road.</li>
        </ul>
    </li>
    <li><strong>Motorcycles Are Highly Affected by Road Conditions</strong>
        <ul>
            <li>Motorcycles over 500cc: 7,233 accidents on wet roads, 689 on icy roads, the highest among motorcycles.</li>
            <li>125cc and under: 4,192 accidents on wet roads, showing high vulnerability in slippery conditions.</li>
            <li>Small motorcycles (50cc and under) also show significant risk on frost and ice (224 accidents).</li>
        </ul>
    </li>
    <li><strong>Buses and Coaches See More Accidents on Dry and Wet Roads</strong>
        <ul>
            <li>17,604 accidents on dry roads, 7,355 on wet roads.</li>
            <li>Higher accident numbers may be due to urban traffic congestion rather than severity.</li>
        </ul>
    </li>
    <li><strong>Heavy Goods Vehicles (HGVs) Have More Accidents on Dry & Wet Roads</strong>
        <ul>
            <li>Goods 7.5t+ (11,690 dry, 4,988 wet) and Goods 3.5t-7.5t (4,136 dry, 1,739 wet) suggest braking distances and stability in slippery conditions are factors.</li>
        </ul>
    </li>
    <li><strong>Floods and Ice Affect Larger Vehicles and Motorcycles</strong>
        <ul>
            <li>Motorcycles and HGVs show notable accident counts in frost/ice conditions, highlighting traction loss risks.</li>
            <li>Floods are rare but still dangerous, with HGVs (49 cases) and motorcycles (14 cases) affected the most.</li>
        </ul>
    </li>
</ol>

<h2>Conclusion:</h2>
<ul>
    <li>Accident severity varies by vehicle type and road conditions.</li>
    <li>Cars have the highest number of accidents, primarily due to their volume on the road.</li>
    <li>Motorcycles are highly vulnerable to wet, icy, and snowy conditions.</li>
    <li>Heavy goods vehicles and buses face braking and stability risks in wet and icy conditions.</li>
    <li>Floods are relatively rare but still lead to accidents, especially for HGVs and motorcycles.</li>
</ul>

<h1><strong>Questions #8:</strong> How does accident severity change with weather and urban/rural areas?</h1>

In [39]:
# accident_severity_on_WCs_onSpecific_Urban_or_Rural =  eda_data.groupby(['Weather_Conditions', 'Urban_or_Rural_Area'])['Accident_Severity'].size().unstack()


In [40]:
# accident_severity_on_WCs_onSpecific_Urban_or_Rural

<h1><strong>Insights #32- :</strong> Insights on Weather Conditions and Urban vs. Rural Accidents</h1>

<ol>
    <li><strong>Most Accidents Happen in Urban Areas, Regardless of Weather</strong>
        <ul>
            <li>339,076 accidents in urban areas under "Fine no high winds" conditions (highest overall).</li>
            <li>Higher vehicle density and pedestrian activity contribute to more accidents.</li>
        </ul>
    </li>
    <li><strong>Rural Areas Have More Accidents in Poor Weather Conditions</strong>
        <ul>
            <li><strong>Raining no high winds:</strong> 31,173 rural accidents vs. 48,517 urban.</li>
            <li><strong>Fog or mist:</strong> 2,385 rural accidents vs. 1,143 urban.</li>
            <li>Rural roads often have higher speeds, less lighting, and limited visibility, increasing risks.</li>
        </ul>
    </li>
    <li><strong>High Winds Contribute to Rural Accidents</strong>
        <ul>
            <li><strong>Fine + high winds:</strong> 3,993 rural accidents vs. 4,560 urban.</li>
            <li><strong>Raining + high winds:</strong> 4,723 rural accidents vs. 4,892 urban.</li>
            <li>Strong winds can impact vehicle stability, especially for high-profile vehicles like trucks and motorcycles.</li>
        </ul>
    </li>
    <li><strong>Snow-Related Accidents Are More Evenly Distributed</strong>
        <ul>
            <li><strong>Snowing no high winds:</strong> 3,050 rural vs. 3,188 urban.</li>
            <li><strong>Snowing + high winds:</strong> 571 rural vs. 313 urban.</li>
            <li>Suggests drivers in both areas take precautions or reduce travel during snow events.</li>
        </ul>
    </li>
</ol>

<h2>Conclusion:</h2>
<ul>
    <li>Urban areas experience more accidents overall, mainly due to traffic density.</li>
    <li>Rural areas see higher accident numbers in foggy, windy, and rainy conditions.</li>
    <li>High winds and snow impact rural and urban areas differently but contribute to accidents in both.</li>
    <li>Weather alone doesn’t determine accident risk—road design, traffic flow, and driver behavior also play key roles.</li>
</ul>


<h1><strong>Question #8: </strong>Does the number of vehicles involved correlate with accident severity?</h1>

In [41]:
# corr_NoV_to_AS = eda_data['Number_of_Vehicles'].corr(eda_data['Accident_Severity'])

In [65]:
# corr1 = eda_data['Number_of_Vehicles'].corr(eda_data['Accident_Severity'])

In [66]:
copy_eda_data['Accident_Severity'] = copy_eda_data['Accident_Severity'].astype('category').cat.codes
copy_eda_data['Weather_Conditions'] = copy_eda_data['Weather_Conditions'].astype('category').cat.codes
copy_eda_data['Road_Surface_Conditions'] = copy_eda_data['Road_Surface_Conditions'].astype('category').cat.codes
copy_eda_data['Light_Conditions'] = copy_eda_data['Light_Conditions'].astype('category').cat.codes
copy_eda_data['Urban_or_Rural_Area'] = copy_eda_data['Urban_or_Rural_Area'].astype('category').cat.codes

In [67]:
copy_eda_data['Accident_Severity'].unique()

array([1, 2, 0], dtype=int8)

In [68]:
corr1 = copy_eda_data['Number_of_Vehicles'].corr(copy_eda_data['Accident_Severity'])

In [69]:
corr1

0.0753240098281068

In [70]:
columns_for_corr = [
    'Number_of_Casualties',
    'Number_of_Vehicles',
    'Latitude',
    'Longitude'
]

In [71]:

categorical_cols = ['Accident_Severity', 'Weather_Conditions', 'Road_Surface_Conditions', 
                    'Light_Conditions', 'Urban_or_Rural_Area', 'Vehicle_Type']
forcorr = copy_eda_data[categorical_cols] = copy_eda_data[categorical_cols].apply(lambda x: x.astype('category').cat.codes)


In [72]:
forcorr.corr()

Unnamed: 0,Accident_Severity,Weather_Conditions,Road_Surface_Conditions,Light_Conditions,Urban_or_Rural_Area,Vehicle_Type
Accident_Severity,1.0,0.020628,0.01085,0.032714,0.08128,-0.000414
Weather_Conditions,0.020628,1.0,0.607044,-0.101776,-0.04971,0.000873
Road_Surface_Conditions,0.01085,0.607044,1.0,-0.160788,-0.10088,-0.000681
Light_Conditions,0.032714,-0.101776,-0.160788,1.0,-0.111955,-0.002791
Urban_or_Rural_Area,0.08128,-0.04971,-0.10088,-0.111955,1.0,0.008643
Vehicle_Type,-0.000414,0.000873,-0.000681,-0.002791,0.008643,1.0
