In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from folium.plugins import HeatMap

# Load the dataset (update file path)
df = pd.read_csv("/content/dft-road-casualty-statistics-vehicle-provisional-mid-year-unvalidated-2024.csv", low_memory=False)

# Display dataset information
print("Dataset Shape:", df.shape)
print("Columns in dataset:", df.columns)

# ---------------------------------------
# 📊 Step 1: Data Preprocessing
# ---------------------------------------

# Select relevant columns for analysis
df = df[['collision_year', 'collision_index', 'vehicle_type', 'vehicle_manoeuvre',
         'junction_location', 'skidding_and_overturning', 'hit_object_in_carriageway',
         'vehicle_leaving_carriageway', 'sex_of_driver', 'age_of_driver', 'age_band_of_driver',
         'engine_capacity_cc', 'age_of_vehicle', 'driver_home_area_type', 'escooter_flag']]

# Convert collision year to datetime
df['collision_year'] = pd.to_datetime(df['collision_year'], format='%Y')

# Drop missing values
df.dropna(inplace=True)

# Display first few rows
print(df.head())

# ---------------------------------------
# 📊 Step 2: Data Analysis & Visualization
# ---------------------------------------

# 1 Accidents by Vehicle Type
plt.figure(figsize=(12,6))
top_vehicle_types = df['vehicle_type'].value_counts().nlargest(10)
sns.barplot(y=top_vehicle_types.index, x=top_vehicle_types.values, palette="magma")
plt.title("Top Vehicle Types Involved in Accidents")
plt.xlabel("Number of Accidents")
plt.ylabel("Vehicle Type")
plt.show()

# 2 Age of Driver vs. Number of Accidents
plt.figure(figsize=(12,6))
sns.histplot(df['age_of_driver'], bins=30, kde=True, color="blue")
plt.title("Age of Drivers Involved in Accidents")
plt.xlabel("Age of Driver")
plt.ylabel("Number of Accidents")
plt.show()

# 3 Vehicle Manoeuvre at the Time of Collision
plt.figure(figsize=(12,6))
top_manoeuvres = df['vehicle_manoeuvre'].value_counts().nlargest(10)
sns.barplot(y=top_manoeuvres.index, x=top_manoeuvres.values, palette="viridis")
plt.title("Top Vehicle Manoeuvres During Collisions")
plt.xlabel("Number of Accidents")
plt.ylabel("Vehicle Manoeuvre")
plt.show()

# ---------------------------------------
# 🗺️ Step 3: Visualizing Accident Hotspots
# ---------------------------------------

# Check if latitude and longitude exist in your dataset
if 'Latitude' in df.columns and 'Longitude' in df.columns:
    # Create a map centered in the UK
    m = folium.Map(location=[55.3781, -3.4360], zoom_start=6)

    # Convert accident location data to a list for HeatMap
    heat_data = df[['Latitude', 'Longitude']].values.tolist()

    # Add heatmap layer
    HeatMap(heat_data[:10000], radius=10).add_to(m)  # Limit to first 10,000 points for performance

    # Save and display the map
    m.save("UK_Accident_Hotspots.html")
    print("Heatmap saved as 'UK_Accident_Hotspots.html'")

else:
    print("Latitude and Longitude columns are missing. Cannot generate heatmap.")
