In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from folium.plugins import HeatMap
import os

# ======================
# CREATE OUTPUT FOLDER
# ======================
os.makedirs("outputs", exist_ok=True)

# ======================
# LOAD DATA (safe size)
# ======================
df = pd.read_csv("US_Accidents_March23.csv", nrows=200000)

print("Shape:", df.shape)

# ======================
# CLEANING
# ======================
df['Start_Time'] = pd.to_datetime(df['Start_Time'], errors='coerce')
df = df.dropna(subset=['Start_Time','Start_Lat','Start_Lng'])

df['Hour'] = df['Start_Time'].dt.hour

def period(h):
    if h < 6:
        return "Night"
    elif h < 12:
        return "Morning"
    elif h < 18:
        return "Afternoon"
    else:
        return "Evening"

df['Time_Period'] = df['Hour'].apply(period)

sns.set_style("whitegrid")

# ======================
# ACCIDENTS BY HOUR
# ======================
plt.figure(figsize=(10,5))
df['Hour'].value_counts().sort_index().plot(kind='bar')
plt.title("Accidents by Hour")
plt.xlabel("Hour")
plt.ylabel("Count")
plt.savefig("outputs/accidents_by_hour.png", bbox_inches='tight')
plt.show()

# ======================
# TIME PERIOD PIE
# ======================
plt.figure(figsize=(6,6))
df['Time_Period'].value_counts().plot(kind='pie', autopct='%1.1f%%')
plt.ylabel("")
plt.title("Accidents by Time Period")
plt.savefig("outputs/time_period_distribution.png", bbox_inches='tight')
plt.show()

# ======================
# WEATHER CONDITIONS
# ======================
plt.figure(figsize=(10,5))
df['Weather_Condition'].value_counts().head(10).plot(kind='bar')
plt.title("Top Weather Conditions")
plt.xlabel("Weather")
plt.ylabel("Count")
plt.savefig("outputs/weather_conditions.png", bbox_inches='tight')
plt.show()

# ======================
# SEVERITY DISTRIBUTION
# ======================
plt.figure(figsize=(6,4))
sns.countplot(x='Severity', data=df)
plt.title("Severity Distribution")
plt.savefig("outputs/severity_distribution.png", bbox_inches='tight')
plt.show()

# ======================
# WEATHER VS SEVERITY HEATMAP
# ======================
top_weather = df['Weather_Condition'].value_counts().head(5).index
subset = df[df['Weather_Condition'].isin(top_weather)]

ct = pd.crosstab(subset['Weather_Condition'], subset['Severity'])

plt.figure(figsize=(8,5))
sns.heatmap(ct, annot=True, fmt='d')
plt.title("Weather vs Severity")
plt.savefig("outputs/weather_vs_severity.png", bbox_inches='tight')
plt.show()

# ======================
# HOTSPOT MAP
# ======================
sample = df.sample(10000)

m = folium.Map(location=[39,-95], zoom_start=4)
heat_data = list(zip(sample['Start_Lat'], sample['Start_Lng']))
HeatMap(heat_data).add_to(m)

m.save("outputs/accident_hotspots.html")

print("All graphs saved inside 'outputs' folder âœ…")
