In [None]:
# traffic_accident_analysis.ipynb

# 📦 Import libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import folium
from folium.plugins import HeatMap

# 📁 Load dataset
df = pd.read_csv('../data/accident_data.csv')  # Adjust the path if needed
df.head()

# 📌 Basic info
df.info()
df.describe()

# 🔍 Check for null values
missing = df.isnull().sum().sort_values(ascending=False)
print("Missing Values:\n", missing[missing > 0])

# 💡 Clean data (optional: drop or fill NA)
df = df.dropna(subset=['Start_Time', 'Weather_Condition', 'Start_Lat', 'Start_Lng'])

# 🕒 Extract time features
df['Start_Time'] = pd.to_datetime(df['Start_Time'])
df['Hour'] = df['Start_Time'].dt.hour
df['DayOfWeek'] = df['Start_Time'].dt.day_name()

# 📊 Accidents by hour
plt.figure(figsize=(10, 5))
sns.countplot(x='Hour', data=df, palette='coolwarm')
plt.title('Accidents by Hour of Day')
plt.xlabel('Hour')
plt.ylabel('Number of Accidents')
plt.grid(True)
plt.tight_layout()
plt.show()

# 📊 Accidents by weather
top_weather = df['Weather_Condition'].value_counts().head(10).index
plt.figure(figsize=(10, 5))
sns.countplot(y='Weather_Condition', data=df[df['Weather_Condition'].isin(top_weather)], palette='viridis')
plt.title('Top 10 Weather Conditions in Accidents')
plt.xlabel('Number of Accidents')
plt.ylabel('Weather Condition')
plt.tight_layout()
plt.show()

# 📊 Accidents by road surface condition
if 'Surface_Condition' in df.columns:
    plt.figure(figsize=(8, 4))
    sns.countplot(data=df, x='Surface_Condition', order=df['Surface_Condition'].value_counts().index)
    plt.title('Accidents by Road Surface Condition')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

# 🗺️ Accident Hotspot Map (USA or large city)
map_data = df[['Start_Lat', 'Start_Lng']].dropna().sample(10000)  # Limit for performance
m = folium.Map(location=[map_data['Start_Lat'].mean(), map_data['Start_Lng'].mean()], zoom_start=5)
HeatMap(map_data.values, radius=7).add_to(m)
m
