In [1]:
# Load the dataset
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('Accidentdata.csv')

In [3]:
df.head(5)

Unnamed: 0,ID,Source,Severity,Start_Time,End_Time,Start_Lat,Start_Lng,End_Lat,End_Lng,Distance(mi),...,Roundabout,Station,Stop,Traffic_Calming,Traffic_Signal,Turning_Loop,Sunrise_Sunset,Civil_Twilight,Nautical_Twilight,Astronomical_Twilight
0,A-1,Source2,3,2016-02-08 05:46:00,2016-02-08 11:00:00,39.865147,-84.058723,,,0.01,...,False,False,False,False,False,False,Night,Night,Night,Night
1,A-2,Source2,2,2016-02-08 06:07:59,2016-02-08 06:37:59,39.928059,-82.831184,,,0.01,...,False,False,False,False,False,False,Night,Night,Night,Day
2,A-3,Source2,2,2016-02-08 06:49:27,2016-02-08 07:19:27,39.063148,-84.032608,,,0.01,...,False,False,False,False,True,False,Night,Night,Day,Day
3,A-4,Source2,3,2016-02-08 07:23:34,2016-02-08 07:53:34,39.747753,-84.205582,,,0.01,...,False,False,False,False,False,False,Night,Day,Day,Day
4,A-5,Source2,2,2016-02-08 07:39:07,2016-02-08 08:09:07,39.627781,-84.188354,,,0.01,...,False,False,False,False,True,False,Day,Day,Day,Day


In [4]:
from shapely.geometry import Point, Polygon

In [5]:
# Convert 'Start_Time' column to datetime format
df['Start_Time'] = pd.to_datetime(df['Start_Time'], format='ISO8601')

In [6]:
# Extract year, month, day, hour, minute, and second from 'Start_Time' column
df['Year'] = df['Start_Time'].dt.year
df['Month'] = df['Start_Time'].dt.month
df['Day'] = df['Start_Time'].dt.day
df['Hour'] = df['Start_Time'].dt.hour
df['Minute'] = df['Start_Time'].dt.minute
df['Second'] = df['Start_Time'].dt.second

In [7]:
df.columns

Index(['ID', 'Source', 'Severity', 'Start_Time', 'End_Time', 'Start_Lat',
       'Start_Lng', 'End_Lat', 'End_Lng', 'Distance(mi)', 'Description',
       'Street', 'City', 'County', 'State', 'Zipcode', 'Country', 'Timezone',
       'Airport_Code', 'Weather_Timestamp', 'Temperature(F)', 'Wind_Chill(F)',
       'Humidity(%)', 'Pressure(in)', 'Visibility(mi)', 'Wind_Direction',
       'Wind_Speed(mph)', 'Precipitation(in)', 'Weather_Condition', 'Amenity',
       'Bump', 'Crossing', 'Give_Way', 'Junction', 'No_Exit', 'Railway',
       'Roundabout', 'Station', 'Stop', 'Traffic_Calming', 'Traffic_Signal',
       'Turning_Loop', 'Sunrise_Sunset', 'Civil_Twilight', 'Nautical_Twilight',
       'Astronomical_Twilight', 'Year', 'Month', 'Day', 'Hour', 'Minute',
       'Second'],
      dtype='object')

In [None]:
# Distribution of accidents by weather conditions
plt.figure(figsize=(100, 60))
sns.countplot(x='Weather_Condition', data=df, order=df['Weather_Condition'].value_counts().index)
plt.title('Distribution of Accidents by Weather Conditions')
plt.xlabel('Weather Condition')
plt.ylabel('Number of Accidents')
plt.xticks(rotation=45)
plt.show()

In [None]:
# Distribution of accidents by time of day
plt.figure(figsize=(10, 6))
sns.countplot(x='Hour', data=df, color='skyblue')
plt.title('Distribution of Accidents by Time of Day')
plt.xlabel('Hour of Day')
plt.ylabel('Number of Accidents')
plt.show()

In [None]:

# Visualizing accident hotspots (latitude and longitude)
plt.figure(figsize=(12, 8))
sns.scatterplot(x='Start_Lng', y='Start_Lat', data=df, alpha=0.1, color='red')
plt.title('Accident Hotspots')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.show()


In [None]:
# Count the number of accidents by month
accidents_by_month = df['Month'].value_counts()
print(accidents_by_month)

In [None]:
# Count the number of accidents by hour of the day
accidents_by_hour = df['Hour'].value_counts()
print(accidents_by_hour)

In [None]:
# Visualize accidents by hour of the day using a line chart
accidents_by_hour.plot(kind='line')
plt.title('Accidents by Hour of the Day')
plt