In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
import matplotlib.ticker as mtick
import folium
from folium.plugins import HeatMap
import plotly.express as px

In [None]:
#!pip install folium

In [None]:
print(plt.style.available)

In [None]:
plt.style.use("seaborn-v0_8-darkgrid")
sns.set_context('talk')

In [None]:
df = pd.read_csv('dataset/cleaned_us_accident_data.csv')

In [None]:
print(df.describe())

In [None]:
plt.figure(figsize=(10, 6))
sns.set_style("whitegrid")
ax = sns.countplot(x='severity', data=df, palette='magma')
for p in ax.patches:
    ax.annotate(f'{p.get_height():,.0f}', 
                (p.get_x() + p.get_width() / 2., p.get_height()), 
                ha='center', va='bottom', fontsize=12, color='black')
ax.yaxis.set_major_formatter(mtick.FuncFormatter(lambda x, _: f'{int(x):,}'))
plt.title('Distribution of Accident Severity', fontsize=14, fontweight='bold')
plt.xlabel('Severity Level', fontsize=12)
plt.ylabel('Number of Accidents', fontsize=12)
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.show()

In [None]:
#print(df['severity'].value_counts())


In [None]:
df['year'] = pd.to_datetime(df['start_time']).dt.year
df['month'] = pd.to_datetime(df['start_time']).dt.month

plt.figure(figsize=(12, 6))
sns.set_style("whitegrid")
palette_years = sns.color_palette("Blues_r", len(df['year'].unique()))
ax = sns.countplot(x='year', data=df, palette=palette_years)
for p in ax.patches:
    ax.annotate(f'{p.get_height():,}', 
                (p.get_x() + p.get_width() / 2., p.get_height()), 
                ha='center', va='bottom', fontsize=12, color='black')
ax.yaxis.set_major_formatter(mtick.FuncFormatter(lambda x, _: f'{int(x):,}'))
plt.title('Accident Frequency by Year', fontsize=14, fontweight='bold')
plt.xlabel('Year', fontsize=12)
plt.ylabel('Number of Accidents', fontsize=12)
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.show()

plt.figure(figsize=(12, 6))
palette_months = sns.color_palette("coolwarm", 12)
ax = sns.countplot(x='month', data=df, palette=palette_months)
for p in ax.patches:
    ax.annotate(f'{p.get_height():,}', 
                (p.get_x() + p.get_width() / 2., p.get_height()), 
                ha='center', va='bottom', fontsize=12, color='black')
ax.yaxis.set_major_formatter(mtick.FuncFormatter(lambda x, _: f'{int(x):,}'))
plt.title('Monthly Accident Frequency', fontsize=14, fontweight='bold')
plt.xlabel('Month', fontsize=12)
plt.ylabel('Number of Accidents', fontsize=12)
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.show()



In [None]:
df_sampled = df.sample(n=1000000, random_state=42) 
map_center = [df_sampled['start_lat'].mean(), df_sampled['start_lng'].mean()]

m = folium.Map(location=map_center, zoom_start=5)
heat_data = [[row['start_lat'], row['start_lng']] for _, row in df_sampled.iterrows()]
HeatMap(heat_data, radius=8, blur=6, min_opacity=0.3).add_to(m)
m

In [None]:
numeric_df = df.select_dtypes(include=['number'])

corr_matrix = numeric_df.corr()

mask = np.triu(np.ones_like(corr_matrix, dtype=bool))

plt.figure(figsize=(14, 10))

sns.heatmap(corr_matrix, mask=mask, annot=True, fmt=".2f", cmap="coolwarm", 
            linewidths=0.5, cbar=True, square=True, annot_kws={"size": 10})

plt.title("Correlation Matrix of Numerical Features", fontsize=16, fontweight="bold")

plt.xticks(rotation=90, fontsize=12)
plt.yticks(rotation=0, fontsize=12)


plt.show()


In [None]:
plt.figure(figsize=(14, 7))
sns.countplot(y='weather_condition', data=df, order=df['weather_condition'].value_counts().index[:10], palette='Set3')
plt.title('Top 10 Weather Conditions for Accidents')
plt.xlabel('Number of Accidents')
plt.ylabel('Weather Condition')
plt.show()


In [None]:
plt.figure(figsize=(12,7))
sns.barplot(y=df['city'].value_counts()[:10].index, x=df['city'].value_counts()[:10], palette='cubehelix')
plt.title("Top 10 Cities with Most Accidents", fontsize=14)
plt.xlabel("Number of Accidents")
plt.ylabel("City")
plt.show()


In [None]:
plt.figure(figsize=(14,7))
sns.countplot(y=df['weather_condition'], order=df['weather_condition'].value_counts().index[:10], palette='plasma')
plt.title("Top 10 Weather Conditions for Accidents", fontsize=14)
plt.xlabel("Number of Accidents")
plt.ylabel("Weather Condition")
plt.show()


In [None]:
features = ['traffic_signal', 'crossing', 'junction', 'stop']

fig, axes = plt.subplots(2, 2, figsize=(14, 10))
fig.suptitle("Impact of Road Features on Accidents", fontsize=16, fontweight="bold")

colors = ["#FF9999", "#4682B4"] 

for i, feature in enumerate(features):
    row, col = divmod(i, 2)
    ax = axes[row, col]
    
    sns.countplot(x=feature, data=df, palette=colors, ax=ax)
    
    ax.set_xticklabels(['No', 'Yes'], fontsize=12)
    
    total = len(df)
    for p in ax.patches:
        percentage = f"{100 * p.get_height() / total:.1f}%"
        ax.annotate(percentage, (p.get_x() + p.get_width() / 2., p.get_height()), 
                    ha='center', va='bottom', fontsize=12, color='black')

    ax.yaxis.set_major_formatter(mtick.FuncFormatter(lambda x, _: f'{int(x):,}'))
    ax.set_title(f"Impact of {feature.replace('_', ' ').title()} on Accidents", fontsize=14)

plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.show()


In [None]:
#!pip install joypy

In [None]:
import joypy
from matplotlib import cm

plt.figure(figsize=(12,8))
joypy.joyplot(data=df[['start_hour', 'severity']], by="severity", colormap=cm.coolwarm)
plt.title('Accidents by Time of Day (Ridgeline Plot)')
plt.xlabel('Hour of the Day')
plt.show()


In [None]:
fig = px.scatter(df, x='start_year', y='severity', size='distance(mi)', 
                 color='state', animation_frame='start_year',
                 title="Accidents Over Time",
                 labels={'start_year': 'Year', 'severity': 'Severity'})
 
fig.show()