In [24]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display, clear_output
import geopandas as gpd
import folium
from folium.plugins import HeatMap, MarkerCluster

In [25]:
#process the data
df = pd.read_csv("data/clean_data.csv", parse_dates=['OCC_DATE'])

In [26]:
df.head()

Unnamed: 0,OBJECTID,OCC_DATE,OCC_HOUR,LOCATION_TYPE,NEIGHBOURHOOD_158,LONG_WGS84,LAT_WGS84,MCI_CATEGORY,dayofweek,dayofweek_sin,dayofweek_cos,hour_sin,hour_cos,year,month,isholiday,lat_hour_cos,lat_hour_sin,long_hour_cos,long_hour_sin
0,2,2014-01-01 05:00:00+00:00,3,Commercial Dwelling Unit,South Riverdale,-79.350229,43.646293,Break and Enter,2,0.974928,-0.222521,0.707107,0.707107,2014,1,True,30.86259,30.86259,-56.109085,-56.109085
1,3,2014-01-01 05:00:00+00:00,4,Apartment,North St.James Town,-79.376497,43.666423,Assault,2,0.974928,-0.222521,0.866025,0.5,2014,1,True,21.833211,37.816231,-39.688249,-68.742063
2,4,2014-01-01 05:00:00+00:00,4,"Streets, Roads, Highways",NSA,-85.488744,0.0,Theft Over,2,0.974928,-0.222521,0.866025,0.5,2014,1,True,0.0,0.0,-42.744372,-74.035424
3,5,2014-01-01 05:00:00+00:00,4,"Streets, Roads, Highways",Blake-Jones,-79.344839,43.678946,Assault,2,0.974928,-0.222521,0.866025,0.5,2014,1,True,21.839473,37.827077,-39.67242,-68.714646
4,6,2014-01-01 05:00:00+00:00,2,Bar / Restaurant,Wellington Place,-79.391841,43.646639,Assault,2,0.974928,-0.222521,0.5,0.866025,2014,1,True,37.799098,21.82332,-68.755351,-39.695921


***

In [27]:
# Group by year, month, and crime, and count the number of crimes create a new dataframe that allows us to plot
crime_YM_counts = df.groupby(['year', 'month', 'MCI_CATEGORY']).size().reset_index(name='CrimeCount')

# Combine 'Year' and 'Month' columns to create a new 'Date' column
crime_YM_counts['Date'] = pd.to_datetime(crime_YM_counts[['year', 'month']].assign(day=1))


In [28]:
crime_YM_counts.head()

Unnamed: 0,year,month,MCI_CATEGORY,CrimeCount,Date
0,2014,1,Assault,1363,2014-01-01
1,2014,1,Auto Theft,240,2014-01-01
2,2014,1,Break and Enter,586,2014-01-01
3,2014,1,Robbery,291,2014-01-01
4,2014,1,Theft Over,103,2014-01-01


# Crime Over Time (2014~2022)

In [29]:
fig = px.line(crime_YM_counts, x='Date', y='CrimeCount',color='MCI_CATEGORY', title='Time Series Plot', width=800, height=400)
fig.update_layout(
    title='Incidents of Crime over time (2014 ~ 2022)',
    xaxis_title='Year',
    yaxis_title='Crime Count',
    legend_title='Crime Category',
    template='plotly',
)
fig.show()


# Type of Crime by time of day

In [30]:
# Group by year, month, and crime, and count the number of crimes create a new dataframe that allows us to plot
crime_hr_counts = df.groupby(['year','OCC_HOUR', 'MCI_CATEGORY']).size().reset_index(name='CrimeCount')
crime_hr_counts.head()

Unnamed: 0,year,OCC_HOUR,MCI_CATEGORY,CrimeCount
0,2014,0,Assault,1194
1,2014,0,Auto Theft,212
2,2014,0,Break and Enter,433
3,2014,0,Robbery,210
4,2014,0,Theft Over,121


In [31]:
def update_plot(selected_year):
    #clear_output(wait=True)  # Clear previous output
    filtered_data = crime_hr_counts[crime_hr_counts['year'] == selected_year]
    fig = px.line(filtered_data,
                 x='OCC_HOUR',
                 y='CrimeCount',
                 color='MCI_CATEGORY',
                 labels={'Month': 'Month', 'CrimeCount': 'Number of Crimes'},
                 title=f'Crime Count for Year {selected_year} by Hour')
    fig.update_layout(showlegend=True)  # Show legend for crime types
    fig.show()

In [32]:
# Get the unique years in the dataset
years = crime_hr_counts['year'].unique()

# Create a dropdown widget for selecting the year
dropdown = widgets.Dropdown(options=years, description='year')

# Define an event handler for the dropdown value change
def dropdown_event_handler(change):
    update_plot(change.new)

# Attach the event handler to the dropdown's value change event
dropdown.observe(dropdown_event_handler, names='value')

# Display the dropdown widget
display(dropdown)

# Initialize the plot with the first year in the dataset
initial_year = years[0] #start 2014
update_plot(initial_year)

Dropdown(description='year', options=(2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022), value=2014)

***

# Heat Map

In [34]:
# create a dataframe that sum up by neighbourhood and crime
type_crime_df = df.groupby(['NEIGHBOURHOOD_158', 'MCI_CATEGORY']).size().reset_index(name='CrimeCount')

In [36]:
# create a wide format to plot
wide_format_df = type_crime_df.pivot(index='NEIGHBOURHOOD_158', columns='MCI_CATEGORY', values='CrimeCount')

# sum row-wise to obtain Total_Crime count in Neighbourhood
wide_format_df['Total_Crime'] = wide_format_df[['Assault','Auto Theft','Break and Enter','Robbery','Theft Over']].sum(axis=1)
wide_format_df

MCI_CATEGORY,Assault,Auto Theft,Break and Enter,Robbery,Theft Over,Total_Crime
NEIGHBOURHOOD_158,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt North,673,343,381,226,40,1663
Agincourt South-Malvern West,1124,404,657,236,103,2524
Alderwood,322,227,245,64,57,915
Annex,2403,275,1387,387,272,4724
Avondale,424,130,101,58,40,753
...,...,...,...,...,...,...
Yonge-Doris,693,177,130,150,73,1223
Yonge-Eglinton,667,123,267,106,43,1206
Yonge-St.Clair,280,78,222,31,30,641
York University Heights,2987,1225,1031,599,353,6195


In [40]:
wide_format_df['Total_Crime']

NEIGHBOURHOOD_158
Agincourt North                 1663
Agincourt South-Malvern West    2524
Alderwood                        915
Annex                           4724
Avondale                         753
                                ... 
Yonge-Doris                     1223
Yonge-Eglinton                  1206
Yonge-St.Clair                   641
York University Heights         6195
Yorkdale-Glen Park              3229
Name: Total_Crime, Length: 159, dtype: int64