In [1]:
pip install pandas numpy matplotlib seaborn folium

Collecting folium
  Downloading folium-0.20.0-py2.py3-none-any.whl.metadata (4.2 kB)
Collecting branca>=0.6.0 (from folium)
  Downloading branca-0.8.2-py3-none-any.whl.metadata (1.7 kB)
Downloading folium-0.20.0-py2.py3-none-any.whl (113 kB)
Downloading branca-0.8.2-py3-none-any.whl (26 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.8.2 folium-0.20.0
Note: you may need to restart the kernel to use updated packages.


In [22]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from folium.plugins import HeatMap

# Set global plotting style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)

# ==============================================================================
# 1. Load Data and Data Preparation
# ==============================================================================
print("--- 1. Data Preparation ---")
df = pd.read_csv('Accident.csv')

# Convert 'Start_Time' to a datetime object
df['Start_Time'] = pd.to_datetime(df['Start_Time'])

# Extract features for Time of Day analysis
df['Hour'] = df['Start_Time'].dt.hour
df['Day_of_Week'] = df['Start_Time'].dt.day_name()

print(f"Total records analyzed: {len(df)}")
print(f"Columns used: Start_Time, Hour, Day_of_Week, Weather_Condition, Road_Condition, Latitude, Longitude")
print("-" * 40)

# ==============================================================================
# 2. Analyze Patterns and Contributing Factors (Visualizations)
# ==============================================================================

# --- Pattern 2.1: Accident Frequency by Hour of Day ---
plt.figure(figsize=(10, 6))
sns.countplot(data=df, x='Hour', palette='viridis', order=sorted(df['Hour'].unique()))
plt.title('Accident Frequency by Hour of Day', fontsize=16)
plt.xlabel('Hour of Day (0=Midnight, 23=11 PM)', fontsize=12)
plt.ylabel('Number of Accidents', fontsize=12)
plt.xticks(range(0, 24))
plt.tight_layout()
plt.savefig('accident_frequency_by_hour.png')
plt.close()
print("Saved: accident_frequency_by_hour.png")

# --- Pattern 2.2: Accident Frequency by Weather Conditions ---
plt.figure(figsize=(10, 6))
# Get value counts and use them to define the order for plotting
weather_counts = df['Weather_Condition'].value_counts().sort_values(ascending=False)
sns.barplot(x=weather_counts.values, y=weather_counts.index, palette='plasma')
plt.title('Accident Frequency by Weather Condition', fontsize=16)
plt.xlabel('Number of Accidents', fontsize=12)
plt.ylabel('Weather Condition', fontsize=12)
plt.tight_layout()
plt.savefig('accident_frequency_by_weather.png')
plt.close()
print("Saved: accident_frequency_by_weather.png")

# --- Pattern 2.3: Accident Frequency by Road Conditions ---
plt.figure(figsize=(10, 6))
# Get value counts and use them to define the order for plotting
road_counts = df['Road_Condition'].value_counts().sort_values(ascending=False)
sns.barplot(x=road_counts.values, y=road_counts.index, palette='rocket')
plt.title('Accident Frequency by Road Surface Condition', fontsize=16)
plt.xlabel('Number of Accidents', fontsize=12)
plt.ylabel('Road Surface Condition', fontsize=12)
plt.tight_layout()
plt.savefig('accident_frequency_by_road.png')
plt.close()
print("Saved: accident_frequency_by_road.png")
print("-" * 40)

# ==============================================================================
# 3. Visualize Accident Hotspots (Geographical Map)
# ==============================================================================
print("--- 3. Hotspot Visualization ---")

# Prepare data for Folium HeatMap
# We only need Latitude and Longitude for the map
hotspot_data = df[['Latitude', 'Longitude']].values.tolist()

if hotspot_data:
    # Get the average coordinates to center the map
    center_lat = df['Latitude'].mean()
    center_lon = df['Longitude'].mean()

    # Create a Folium map centered on the average accident location
    m = folium.Map(location=[center_lat, center_lon], zoom_start=10)

    # Add the HeatMap layer to the map
    # 'radius' controls the spread of each point, 'blur' controls smoothness
    HeatMap(hotspot_data, radius=15, blur=15).add_to(m)

    # Save the map as an HTML file
    map_filename = 'accident_hotspots_map.html'
    m.save(map_filename)
    print(f"Interactive Accident Hotspots map saved to {map_filename}")
else:
    print("Skipping Hotspot visualization: No valid geographic data found.")
print("-" * 40)

--- 1. Data Preparation ---


FileNotFoundError: [Errno 2] No such file or directory: 'Accident.csv'

In [20]:
# --- Step 2: Data Cleaning and Feature Engineering ---

# Convert date/time columns to datetime objects (adjust column names as needed)
# Assuming 'Date' and 'Time' columns exist and need to be combined/parsed.
# If your dataset has a single 'Timestamp' column, simplify this step.

# Example with separate Date and Time columns
if 'Accident_Date' in df.columns and 'Time' in df.columns:
    # Combine Date and Time into a single datetime column
    df['Datetime'] = pd.to_datetime(df['Accident_Date'] + ' ' + df['Time'], errors='coerce')
elif 'Timestamp' in df.columns:
    df['Datetime'] = pd.to_datetime(df['Timestamp'], errors='coerce')
else:
    print("Warning: Could not find suitable date/time columns. Time analysis skipped.")

# Drop rows where Datetime could not be parsed
df.dropna(subset=['Datetime'], inplace=True)

# Extract new time-based features
df['Hour'] = df['Datetime'].dt.hour
df['Day_of_Week'] = df['Datetime'].dt.day_name()

# Handle missing values in key categorical columns (e.g., fill with 'Unknown')
categorical_cols = ['Road_Surface_Conditions', 'Weather_Conditions']
for col in categorical_cols:
    if col in df.columns:
        df[col].fillna('Unknown', inplace=True)

print("\nData preprocessing complete.")
df.head()



KeyError: ['Datetime']

In [11]:
# --- STEP 2: DATA CLEANING & TYPE CONVERSION ---
print("\n--- Data Cleaning & Conversion ---")

# Convert the Start_Time column to a datetime object, crucial for time analysis
df['Start_Time'] = pd.to_datetime(df['Start_Time'])

# Clean Categorical Data: Standardize case for consistent counting
df['Weather_Condition'] = df['Weather_Condition'].str.title()
df['Road_Condition'] = df['Road_Condition'].str.title()# --- STEP 2: DATA CLEANING & TYPE CONVERSION ---
print("\n--- Data Cleaning & Conversion ---")

# Convert the Start_Time column to a datetime object, crucial for time analysis
df['Start_Time'] = pd.to_datetime(df['Start_Time'])

# Clean Categorical Data: Standardize case for consistent counting
df['Weather_Condition'] = df['Weather_Condition'].str.title()
df['Road_Condition'] = df['Road_Condition'].str.title()


--- Data Cleaning & Conversion ---

--- Data Cleaning & Conversion ---
