In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr

# Load the dataset
df = pd.read_csv("Road_Accident_Data.csv")

# Display the first few rows of the dataset
print("Dataset Preview:")
print(df.head())

# Check for missing values
print("\nMissing Values:")
print(df.isnull().sum())

# Handle missing values (if any)
df = df.dropna()  # Dropping missing values for simplicity

# 1. Frequency of Accidents Over Time
print("\nTotal Number of Accidents:")
print(df.shape[0])

# Extract date components
df['Accident Date'] = pd.to_datetime(df['Accident Date'])
df['year'] = df['Accident Date'].dt.year
df['month'] = df['Accident Date'].dt.month
df['day_of_week'] = df['Accident Date'].dt.day_name()
df['hour'] = df['Accident Date'].dt.hour

# Plot accidents by year
plt.figure(figsize=(10, 6))
df['year'].value_counts().sort_index().plot(kind='bar', color='skyblue')
plt.title('Accidents by Year')
plt.xlabel('Year')
plt.ylabel('Number of Accidents')
plt.show()

# 2. Geographical Distribution
print("\nTop Accident Locations:")
print(df['Location'].value_counts().head(10))

plt.figure(figsize=(12, 6))
df['Location'].value_counts().head(10).plot(kind='bar')
plt.title('Top 10 Accident Locations')
plt.xlabel('Location')
plt.ylabel('Number of Accidents')
plt.show()

# 3. Accident Severity Analysis
print("\nAccident Severity Distribution:")
print(df['Severity'].value_counts())

plt.figure(figsize=(8, 6))
df['Severity'].value_counts().plot.pie(autopct='%1.1f%%', colors=['red', 'orange', 'yellow'], startangle=140)
plt.title('Accident Severity Distribution')
plt.show()

# Correlation between severity and hour
severity_hour_corr, _ = pearsonr(df['Severity'], df['hour'])
print(f"Correlation between accident severity and time of day: {severity_hour_corr:.2f}")

# 4. Demographic Insights
print("\nAge Distribution of Individuals Involved:")
plt.figure(figsize=(10, 6))
sns.histplot(df['Age'], bins=20, kde=True, color='purple')
plt.title('Age Distribution of Individuals Involved in Accidents')
plt.xlabel('Age')
plt.ylabel('Frequency')
plt.show()

print("\nGender Distribution:")
print(df['Gender'].value_counts())

plt.figure(figsize=(8, 6))
df['Gender'].value_counts().plot(kind='bar', color='lightgreen')
plt.title('Gender Distribution in Accidents')
plt.xlabel('Gender')
plt.ylabel('Count')
plt.show()

# 5. Environmental and Road Conditions
print("\nAccidents by Weather Conditions:")
print(df['Weather_Condition'].value_counts().head(5))

plt.figure(figsize=(12, 6))
df['Weather_Condition'].value_counts().head(5).plot(kind='bar', color='cornflowerblue')
plt.title('Top 5 Weather Conditions Leading to Accidents')
plt.xlabel('Weather Condition')
plt.ylabel('Number of Accidents')
plt.show()

# Lighting conditions
print("\nAccidents by Lighting Conditions:")
print(df['Lighting_Condition'].value_counts())

plt.figure(figsize=(10, 6))
df['Lighting_Condition'].value_counts().plot(kind='bar', color='gold')
plt.title('Accidents by Lighting Conditions')
plt.xlabel('Lighting Condition')
plt.ylabel('Number of Accidents')
plt.show()

# 6. Vehicle and Driver Information
print("\nMost Frequent Vehicle Types Involved in Accidents:")
print(df['Vehicle_Type'].value_counts().head(5))

plt.figure(figsize=(12, 6))
df['Vehicle_Type'].value_counts().head(5).plot(kind='bar', color='salmon')
plt.title('Top 5 Vehicle Types Involved in Accidents')
plt.xlabel('Vehicle Type')
plt.ylabel('Count')
plt.show()

# 7. Temporal Patterns
print("\nAccidents by Day of the Week:")
print(df['day_of_week'].value_counts())

plt.figure(figsize=(10, 6))
df['day_of_week'].value_counts().plot(kind='bar', color='orchid')
plt.title('Accidents by Day of the Week')
plt.xlabel('Day of Week')
plt.ylabel('Number of Accidents')
plt.show()

# 8. Contributing Factors
print("\nTop Contributing Factors to Accidents:")
print(df['Contributing_Factor'].value_counts().head(10))

plt.figure(figsize=(12, 6))
df['Contributing_Factor'].value_counts().head(10).plot(kind='bar', color='skyblue')
plt.title('Top 10 Contributing Factors to Accidents')
plt.xlabel('Contributing Factor')
plt.ylabel('Number of Accidents')
plt.show()

# 9. Injury and Fatality Analysis
print("\nInjury and Fatality Distribution:")
print(df['Injury_Severity'].value_counts())

plt.figure(figsize=(10, 6))
df['Injury_Severity'].value_counts().plot(kind='bar', color='teal')
plt.title('Distribution of Injury Severities')
plt.xlabel('Injury Severity')
plt.ylabel('Count')
plt.show()

# 10. Comparative Analysis
print("\nAccidents in Urban vs. Rural Areas:")
print(df['Area_Type'].value_counts())

plt.figure(figsize=(8, 6))
df['Area_Type'].value_counts().plot(kind='bar', color='lime')
plt.title('Urban vs. Rural Accidents')
plt.xlabel('Area Type')
plt.ylabel('Number of Accidents')
plt.show()

print("Analysis complete. Please review the visualizations and insights.")


FileNotFoundError: [Errno 2] No such file or directory: 'Road_Accident_Data.csv'