In [None]:
# Dependencies
import pandas as pd
import numpy as np
import requests
import json
from pathlib import Path


In [None]:
traffic_crash_url = "https://data.cityofchicago.org/resource/85ca-t3if.json"
people_url = "https://data.cityofchicago.org/resource/u6pd-qa9d.json"
vehicles_url= "https://data.cityofchicago.org/resource/68nd-jvt3.json"

people = requests.get(people_url).json()
people_df = pd.DataFrame()
people[0]

In [None]:
# Import csv files 
crashes_file = Path("Resources/Crashes.csv")
people_file = Path("Resources/People.csv")
vehicles_file = Path("Resources/Vehicles.csv")

# Create dataframe
crashes_df = pd.read_csv(crashes_file, encoding="ISO-8859-1")
people_df = pd.read_csv(people_file, encoding="ISO-8859-1")
vehicles_df = pd.read_csv(vehicles_file, encoding="ISO-8859-1")

In [None]:
crashes_df.columns


In [None]:
crash_month_summary = crashes_df['CRASH_MONTH'].value_counts().sort_index()
for month in range(1, 13):
    count = crash_month_summary.get(month, 0)
    print(f"Month {month}: {count} occurrences")

In [None]:
crash_type_by_month = crashes_df.groupby(['CRASH_MONTH', 'CRASH_TYPE']).size().unstack(fill_value=0)

print(crash_type_by_month)

In [None]:
import matplotlib.pyplot as plt

# Define the data
data = {
    'CRASH_TYPE': ['INJURY AND / OR TOW DUE TO CRASH', 'NO INJURY / DRIVE AWAY'],
    '1': [2474, 5721],
    '2': [2194, 5769],
    '3': [2426, 6125],
    '4': [2559, 5973],
    '5': [2831, 6994],
    '6': [2855, 6746],
    '7': [2806, 6456],
    '8': [2697, 6532],
    '9': [2747, 6853],
    '10': [2950, 6960],
    '11': [2587, 6207],
    '12': [2643, 6290]
}

# Convert the data to a DataFrame
df = pd.DataFrame(data)
df.set_index('CRASH_TYPE', inplace=True)

# Create a list of month names
month_names = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']

# Create a grouped bar chart
ax = df.T.plot(kind='bar', figsize=(10, 6))
plt.title('CRASH TYPE Comparison by Month')
plt.xlabel('Month')
plt.ylabel('Count')
plt.xticks(range(len(month_names)), month_names, rotation=45)  # Use month names for x-axis labels

# Move the legend outside the plot area
plt.legend(title='CRASH TYPE', bbox_to_anchor=(1.05, 1), loc='upper left')

# Show the plot
plt.show()

In [None]:
#The highest count of "no injury / drive away" accidents are in May and October
#The highest count of "injury and / or tow due to crash" accidents are in October. 

In [None]:
len(crashes_df["INJURIES_FATAL"])

In [None]:
fatal_injuries_by_month = crashes_df[crashes_df['INJURIES_FATAL'] == 1]

fatal_injuries_by_month = fatal_injuries_by_month.groupby('CRASH_MONTH').size()

print(fatal_injuries_by_month)

In [None]:
# Group the data by 'CRASH_MONTH' and sum the 'INJURIES_FATAL' values
fatal_injuries_by_month = crashes_df.groupby('CRASH_MONTH')['INJURIES_FATAL'].sum()

# Print the total fatal injuries for each month
print(fatal_injuries_by_month)

In [None]:
import matplotlib.pyplot as plt

# Group the data by 'CRASH_MONTH' and sum the 'INJURIES_FATAL' values
fatal_injuries_by_month = crashes_df.groupby('CRASH_MONTH')['INJURIES_FATAL'].sum()

# Create a line graph
plt.figure(figsize=(10, 6))
fatal_injuries_by_month.plot(marker='o', linestyle='-')
plt.title('Total Fatal Injuries by Month')
plt.xlabel('Month')
plt.ylabel('Total Fatal Injuries')

# Customize the x-axis tick labels to display month names
month_names = ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]
plt.xticks(fatal_injuries_by_month.index, month_names, rotation=45)

# Show the plot
plt.grid(True)
plt.show()

In [None]:
#Total fatal injuries observed by month, is highest in July, followed by December, and finally, September.

In [None]:
crashes_df.head()

In [None]:
people_df.head()

In [None]:
people_df.columns

In [None]:
# Get the counts of "CRASH_DATE" by month
crash_date_by_month = people_df['CRASH_DATE'].value_counts().sort_index()

# Convert the index (month) to a datetime object
crash_date_by_month.index = pd.to_datetime(crash_date_by_month.index)

# Group the data by month and sum the counts
monthly_counts = crash_date_by_month.resample('M').sum()

print(monthly_counts)

In [None]:
import matplotlib.pyplot as plt

# Get the counts of "CRASH_DATE" by month
crash_date_by_month = people_df['CRASH_DATE'].value_counts().sort_index()

# Convert the index (month) to a datetime object
crash_date_by_month.index = pd.to_datetime(crash_date_by_month.index)

# Group the data by month and sum the counts
monthly_counts = crash_date_by_month.resample('M').sum()

# Create a line graph
plt.figure(figsize=(10, 6))
plt.plot(monthly_counts.index, monthly_counts.values, marker='o', linestyle='-')
plt.title('Crash Counts by Month')
plt.xlabel('Month')
plt.ylabel('Crash Count')
plt.grid(True)

# Customize the x-axis tick labels to display month names
custom_tick_labels = [month.strftime('%B') for month in monthly_counts.index]
plt.xticks(monthly_counts.index, custom_tick_labels, rotation=45)

# Show the plot
plt.show()

In [None]:
#Crash count is highest in October, followed by May and finally, June.

In [None]:
vehicles_df.columns


In [None]:
crash_date_summary = vehicles_df['CRASH_DATE'].value_counts().sort_index()
for month in range(1, 13):
    count = crash_date_summary.get(month, 0)
    print(f"Month {month}: {count} occurrences")

In [None]:
vehicles_df.head()

In [None]:
# When are the most fatal months? (time series analysis) – Jian Liu


In [None]:
# Traffic accidents in City of Chicago – When and where is it safest to travel through Chicago, looking at just serious injuries and fatalities, last 5 years (2018-2022)

In [None]:
# What are the most fatal intersections in Chicago?


In [None]:
# What are the most common causes? (summary statistics) – Cynthia Saving


In [None]:
# What driver age is most prevalent? (comparison) – Sonia Singh


In [None]:
# What vehicle age is most common? (correlation) – Tony Quintana


In [None]:
# What vehicle make and/or model is most common? (comparison) – Sean Bell
