In [1]:
import pandas as pd
import requests
from datetime import datetime
import matplotlib.pyplot as plt

In [2]:
# create a dataframe from the csv file
filePath = '../Resources/Montgomery_Crash_Data.csv'
df = pd.read_csv(filePath)

In [None]:
# print the info of the dataframe
df.columns
df.info()

In [None]:
# create a new dataframe with only the columns 'Crash Date/Time' and 'Injury Severity'
df_dateTimeSeverity = df[['Crash Date/Time', 'Injury Severity']]
df_dateTimeSeverity.head()
df_dateTimeSeverity.tail()

In [None]:
# set the index of the new dataframe to 'Crash Date/Time' and convert the index to datetime
df_dateTimeSeverity.set_index('Crash Date/Time', inplace=True)
df_dateTimeSeverity.index = pd.to_datetime(df_dateTimeSeverity.index)
display(df_dateTimeSeverity.head())
display(df_dateTimeSeverity.tail())

In [None]:
# Display the count of each 'Injury Severity' value
count = df_dateTimeSeverity["Injury Severity"].value_counts()
display(count)

In [None]:
# Change the 'Injury Severity' column to title case
df_dateTimeSeverity = df_dateTimeSeverity.map(lambda x: x.capitalize() if isinstance(x, str) else x)
unique_severity = df_dateTimeSeverity['Injury Severity'].unique()
print(f"The unique_severity colums are as follows {df_dateTimeSeverity}")

In [None]:
# Display the shape of df_dateTimeSeverity
count = df_dateTimeSeverity.shape[0]
print(f"The total number of accidents is {count}")

In [None]:
# Sort be date and time
df_dateTimeSeverity = df_dateTimeSeverity.sort_values(by='Crash Date/Time')
display(df_dateTimeSeverity.head())
display(df_dateTimeSeverity.tail())

In [None]:
df_dateTimeSeverity['Injury Severity'].value_counts().plot(kind='bar',
                                                        color='blue',
                                                        alpha=0.7, 
                                                        rot=45,
                                                        title='Injury Severity Count',
                                                        xlabel='Injury Severity',
                                                        ylabel='Number of accidents')
plt.tight_layout()
plt.show()


In [None]:
# create a dataframe for each injury severity
df_no_apparent_injury = df_dateTimeSeverity[df_dateTimeSeverity['Injury Severity'] == 'No apparent injury']
df_suspected_minor_injury = df_dateTimeSeverity[df_dateTimeSeverity['Injury Severity'] == 'Suspected minor injury']
df_suspected_serious_injury = df_dateTimeSeverity[df_dateTimeSeverity['Injury Severity'] == 'Suspected serious injury']
df_fatal_injury = df_dateTimeSeverity[df_dateTimeSeverity['Injury Severity'] == 'Fatal injury']

# make a list of the severity dataframes
injury_severity_list = [
    df_no_apparent_injury,
    df_suspected_minor_injury,
    df_suspected_serious_injury,
    df_fatal_injury
]

# display the first 5 rows of each severity dataframe
for i in injury_severity_list:
    display(i.head())

In [None]:
# Display the total number of accidents for each severity
for i in injury_severity_list:
    display(f"Total number {i["Injury Severity"].iloc[0]} {len(i)}")

In [None]:
# make a new dataframe with the time of day and hour of accident
def makeNewDataFrame(df):
    df = df.copy()
    df['Time of Day'] = df.index.time
    df['HourOfAccident'] = pd.to_datetime(df['Time of Day'], format='%H:%M:%S').dt.hour
    return df

df_fatal_injury = makeNewDataFrame(df_fatal_injury)
df_suspected_serious_injury = makeNewDataFrame(df_suspected_serious_injury)
df_suspected_minor_injury = makeNewDataFrame(df_suspected_minor_injury)
df_no_apparent_injury = makeNewDataFrame(df_no_apparent_injury)

# make a list of the injury dataframes
df_injury_list = [df_no_apparent_injury,
                df_suspected_minor_injury,
                df_suspected_serious_injury,
                df_fatal_injury]

# display the first 5 rows of each injury dataframe
for i in df_injury_list: 
    display(i.head())

In [14]:
# Function to graph the time of day for each injury severity

def graph_injury_severity(df):
    plt.figure(figsize=(8, 5))
    plt.hist(df['HourOfAccident'], bins=24, color='blue', alpha=0.7, rwidth=0.80)
    time_labels = [f'{str(i).zfill(2)}:00' for i in range(24)]
    plt.title('Time of Day')
    plt.grid(axis='y')
    plt.title(f'{i["Injury Severity"].iloc[0]} Time of Day')
    plt.xlabel('Time of Day')
    plt.ylabel('Accident Count')
    plt.xticks(range(24), time_labels, rotation=45)
    plt.tight_layout()
    plt.show()

In [None]:
# Graph the time of day for each injury severity
for i in df_injury_list:
    graph_injury_severity(i)  



In [None]:
# Accients by month
df_dateTimeSeverity['Month'] = df_dateTimeSeverity.index.month
plt.figure(figsize=(8, 5))
plt.hist(df_dateTimeSeverity['Month'], bins=12, color='blue', alpha=0.7, rwidth=0.8)
plt.title('Accidents by Month')
plt.xlabel('Month')
plt.ylabel('Accident Count')
plt.xticks(range(1, 13))
plt.tight_layout()
plt.show()

In [None]:
# Plot accidents by year    
df_dateTimeSeverity['Year'] = df_dateTimeSeverity.index.year
plt.figure(figsize=(8, 5))
plt.hist(df_dateTimeSeverity['Year'], bins=10, color='blue', alpha=0.7, rwidth=0.8)
plt.title('Accidents by Year')
plt.xlabel('Year')
plt.ylabel('Accident Count')
plt.xticks(range(2015, 2024), rotation=45)
plt.tight_layout()
plt.show()