<h1>Crime GeoSpatial HeatMap: Chicago Crimes</h1>
<h3>Inclusion Year: 2001-Present</h3>
<h4>Analyst: Rogemson P. Molina</h5>

<h2> Importing Libraries

In [None]:
import numpy as np
import pandas as pd 
import seaborn as sns 
import matplotlib.pyplot as plt 

import folium
from folium.plugins import HeatMap

import warnings 
warnings.filterwarnings('ignore')

<h2> Importing dataset

In [None]:
df = pd.read_csv('dataset//Chicago_Crimes.csv')

In [None]:
df

<h2> Checking and filling up null values

In [None]:
df['Location Description'] = df['Location Description'].fillna('Unknown')

In [None]:
df.isnull().sum()

<h2> Changing datatypes

In [None]:
df.dtypes

In [None]:
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
df['Updated On'] = pd.to_datetime(df['Updated On'], errors='coerce')
df['Community Area'] = df['Community Area'].fillna(-1).astype(int)
df['Case Number'] = df['Case Number'].astype(str)
df['IUCR'] = df['IUCR'].astype(str)
df['Latitude'] = df['Latitude'].astype('float32')
df['Longitude'] = df['Longitude'].astype('float32')

In [None]:
df.dtypes

<h2> Insights

In [None]:
df = df.dropna(subset=['Latitude', 'Longitude'])

df['Date'] = pd.to_datetime(df['Date'])
df['Month'] = df['Date'].dt.month_name()
df['Day'] = df['Date'].dt.day_name()
df['Year'] = df['Date'].dt.year
df['Hour'] = df['Date'].dt.hour

<h3> Where are theft incidents most concentrated in the city?

In [None]:
filtered_df = df[(df['Primary Type'] == 'THEFT') & df['Latitude'].notnull() & df['Longitude'].notnull()]

print(f"Filtered data size: {filtered_df.shape}")

aggregated_df = filtered_df.groupby(['Latitude', 'Longitude']).size().reset_index(name='incident_count')

aggregated_df['normalized_weight'] = (
    (aggregated_df['incident_count'] - aggregated_df['incident_count'].min()) /
    (aggregated_df['incident_count'].max() - aggregated_df['incident_count'].min())
)

total_thefts = aggregated_df['incident_count'].sum()
print(f"Total number of theft incidents: {total_thefts}")

max_incident_location = aggregated_df.loc[aggregated_df['incident_count'].idxmax()]
print(f"Location with the highest number of theft incidents: Latitude {max_incident_location['Latitude']}, Longitude {max_incident_location['Longitude']}")
print(f"Number of theft incidents in this location: {max_incident_location['incident_count']}")

heat_data = aggregated_df[['Latitude', 'Longitude', 'normalized_weight']].values.tolist()
theft_map = folium.Map(location=[filtered_df['Latitude'].mean(), filtered_df['Longitude'].mean()], zoom_start=11)
HeatMap(heat_data, radius=10, blur=15).add_to(theft_map)
theft_map

<h2> Where are Assault cases concentrated the most?

In [None]:
filtered_df = df[(df['Primary Type'] == 'ASSAULT') & df['Latitude'].notnull() & df['Longitude'].notnull()]
print(f"Filtered data size: {filtered_df.shape}")

aggregated_df = filtered_df.groupby(['Latitude', 'Longitude']).size().reset_index(name='incident_count')
aggregated_df['normalized_weight'] = (
    (aggregated_df['incident_count'] - aggregated_df['incident_count'].min()) /
    (aggregated_df['incident_count'].max() - aggregated_df['incident_count'].min())
)

total_assaults = aggregated_df['incident_count'].sum()
print(f"Total number of assault incidents: {total_assaults}")

max_incident_location = aggregated_df.loc[aggregated_df['incident_count'].idxmax()]
print(f"Location with the highest number of assault incidents: Latitude {max_incident_location['Latitude']}, Longitude {max_incident_location['Longitude']}")
print(f"Number of assault incidents in this location: {max_incident_location['incident_count']}")

heat_data = aggregated_df[['Latitude', 'Longitude', 'normalized_weight']].values.tolist()

assault_map = folium.Map(location=[filtered_df['Latitude'].mean(), filtered_df['Longitude'].mean()], zoom_start=11)
HeatMap(heat_data, radius=10, blur=15).add_to(assault_map)
assault_map


<h2> How do crime incidents vary with Domestic vs Non-Domestic crimes?

In [None]:
filtered_df = df[df['Domestic'] == True & df['Latitude'].notnull() & df['Longitude'].notnull()]

print(f"Filtered data size: {filtered_df.shape}")

aggregated_df = filtered_df.groupby(['Latitude', 'Longitude']).size().reset_index(name='incident_count')

aggregated_df['normalized_weight'] = (
    (aggregated_df['incident_count'] - aggregated_df['incident_count'].min()) /
    (aggregated_df['incident_count'].max() - aggregated_df['incident_count'].min())
)

total_domestic_incidents = aggregated_df['incident_count'].sum()
print(f"Total number of domestic incidents: {total_domestic_incidents}")

max_incident_location = aggregated_df.loc[aggregated_df['incident_count'].idxmax()]
print(f"Location with the highest number of domestic incidents: Latitude {max_incident_location['Latitude']}, Longitude {max_incident_location['Longitude']}")
print(f"Number of domestic incidents in this location: {max_incident_location['incident_count']}")

heat_data = aggregated_df[['Latitude', 'Longitude', 'normalized_weight']].values.tolist()
domestic_map = folium.Map(location=[filtered_df['Latitude'].mean(), filtered_df['Longitude'].mean()], zoom_start=11)
HeatMap(heat_data, radius=10, blur=15).add_to(domestic_map)
domestic_map


<h2> What is the relationship between crime type and the district?

In [None]:
filtered_df = df[(df['Primary Type'] == 'THEFT') & df['District'].notnull() & df['Latitude'].notnull() & df['Longitude'].notnull()]

print(f"Filtered data size: {filtered_df.shape}")

aggregated_df = filtered_df.groupby(['Latitude', 'Longitude', 'District']).size().reset_index(name='incident_count')

aggregated_df['normalized_weight'] = (
    (aggregated_df['incident_count'] - aggregated_df['incident_count'].min()) /
    (aggregated_df['incident_count'].max() - aggregated_df['incident_count'].min())
)

total_theft_incidents = aggregated_df['incident_count'].sum()
total_theft_answer = f"Across the dataset, there were a total of {total_theft_incidents} theft incidents recorded."

max_incident_location = aggregated_df.loc[aggregated_df['incident_count'].idxmax()]
max_incident_answer = f"The location with the highest number of theft incidents is at Latitude {max_incident_location['Latitude']}, Longitude {max_incident_location['Longitude']}, with {max_incident_location['incident_count']} incidents."

print(total_theft_answer)
print(max_incident_answer)

heat_data = aggregated_df[['Latitude', 'Longitude', 'normalized_weight']].values.tolist()
district_map = folium.Map(location=[filtered_df['Latitude'].mean(), filtered_df['Longitude'].mean()], zoom_start=11)
HeatMap(heat_data, radius=10, blur=15).add_to(district_map)
district_map

<h2> Crime frequency in different wards based on domestic violence

In [None]:
filtered_df = df[(df['Domestic'] == True) & df['Ward'].isin([3, 7, 10]) & df['Latitude'].notnull() & df['Longitude'].notnull()]

print(f"Filtered data size: {filtered_df.shape}")

aggregated_df = filtered_df.groupby(['Latitude', 'Longitude']).size().reset_index(name='incident_count')

aggregated_df['normalized_weight'] = (
    (aggregated_df['incident_count'] - aggregated_df['incident_count'].min()) /
    (aggregated_df['incident_count'].max() - aggregated_df['incident_count'].min())
)

total_domestic_incidents = aggregated_df['incident_count'].sum()
total_domestic_answer = f"Across the dataset, there were a total of {total_domestic_incidents} domestic violence incidents recorded in wards 3, 7, and 10."

max_incident_location = aggregated_df.loc[aggregated_df['incident_count'].idxmax()]
max_incident_answer = f"The location with the highest number of domestic violence incidents is at Latitude {max_incident_location['Latitude']}, Longitude {max_incident_location['Longitude']}, with {max_incident_location['incident_count']} incidents."

print(total_domestic_answer)
print(max_incident_answer)

heat_data = aggregated_df[['Latitude', 'Longitude', 'normalized_weight']].values.tolist()
ward_map = folium.Map(location=[filtered_df['Latitude'].mean(), filtered_df['Longitude'].mean()], zoom_start=11)
HeatMap(heat_data, radius=10, blur=15).add_to(ward_map)
ward_map

<h2> Domestic Violence by Beat

In [None]:
filtered_df = df[(df['Domestic'] == True) & df['Beat'].notnull() & df['Latitude'].notnull() & df['Longitude'].notnull()]

print(f"Filtered data size: {filtered_df.shape}")

aggregated_df = filtered_df.groupby(['Latitude', 'Longitude', 'Beat']).size().reset_index(name='incident_count')

aggregated_df['normalized_weight'] = (
    (aggregated_df['incident_count'] - aggregated_df['incident_count'].min()) /
    (aggregated_df['incident_count'].max() - aggregated_df['incident_count'].min())
)

total_domestic_incidents = aggregated_df['incident_count'].sum()
total_domestic_answer = f"Across the dataset, there were a total of {total_domestic_incidents} domestic violence incidents recorded by Beat."

print(total_domestic_answer)

heat_data = aggregated_df[['Latitude', 'Longitude', 'normalized_weight']].values.tolist()
beat_map = folium.Map(location=[filtered_df['Latitude'].mean(), filtered_df['Longitude'].mean()], zoom_start=11)
HeatMap(heat_data, radius=10, blur=15).add_to(beat_map)
beat_map

<h2> Crime Type vs. Arrest Status

In [None]:
filtered_df = df[(df['Arrest'] == True) & df['Primary Type'].isin(['THEFT', 'ASSAULT']) & df['Latitude'].notnull() & df['Longitude'].notnull()]

print(f"Filtered data size: {filtered_df.shape}")

aggregated_df = filtered_df.groupby(['Latitude', 'Longitude', 'Primary Type']).size().reset_index(name='incident_count')

aggregated_df['normalized_weight'] = (
    (aggregated_df['incident_count'] - aggregated_df['incident_count'].min()) /
    (aggregated_df['incident_count'].max() - aggregated_df['incident_count'].min())
)
total_arrested_incidents = aggregated_df['incident_count'].sum()
total_arrested_answer = f"Across the dataset, there were a total of {total_arrested_incidents} incidents resulting in arrest."

print(total_arrested_answer)

heat_data = aggregated_df[['Latitude', 'Longitude', 'normalized_weight']].values.tolist()
arrest_map = folium.Map(location=[filtered_df['Latitude'].mean(), filtered_df['Longitude'].mean()], zoom_start=11)
HeatMap(heat_data, radius=10, blur=15).add_to(arrest_map)
arrest_map

<h2> Yearly Distribution of Crime

In [None]:
filtered_df = df[(df['Year'] >= 2015) & df['Primary Type'].isin(['THEFT', 'ASSAULT']) & df['Latitude'].notnull() & df['Longitude'].notnull()]

print(f"Filtered data size: {filtered_df.shape}")

aggregated_df = filtered_df.groupby(['Latitude', 'Longitude', 'Year']).size().reset_index(name='incident_count')

aggregated_df['normalized_weight'] = (
    (aggregated_df['incident_count'] - aggregated_df['incident_count'].min()) /
    (aggregated_df['incident_count'].max() - aggregated_df['incident_count'].min())
)

total_crimes_2015_2020 = aggregated_df['incident_count'].sum()
total_crimes_answer = f"Across the dataset from 2015 to 2020, there were a total of {total_crimes_2015_2020} incidents."

print(total_crimes_answer)

heat_data = aggregated_df[['Latitude', 'Longitude', 'normalized_weight']].values.tolist()
year_map = folium.Map(location=[filtered_df['Latitude'].mean(), filtered_df['Longitude'].mean()], zoom_start=11)
HeatMap(heat_data, radius=10, blur=15).add_to(year_map)
year_map

<h2> Crime by Community Area and Domestic Violence

In [None]:
filtered_df = df[(df['Domestic'] == True) & df['Community Area'].isin([1, 3, 5]) & df['Latitude'].notnull() & df['Longitude'].notnull()]

print(f"Filtered data size: {filtered_df.shape}")

aggregated_df = filtered_df.groupby(['Latitude', 'Longitude', 'Community Area']).size().reset_index(name='incident_count')

aggregated_df['normalized_weight'] = (
    (aggregated_df['incident_count'] - aggregated_df['incident_count'].min()) /
    (aggregated_df['incident_count'].max() - aggregated_df['incident_count'].min())
)

total_domestic_incidents = aggregated_df['incident_count'].sum()
total_domestic_answer = f"Across the dataset, there were a total of {total_domestic_incidents} domestic violence incidents in the selected community areas."

print(total_domestic_answer)

heat_data = aggregated_df[['Latitude', 'Longitude', 'normalized_weight']].values.tolist()
community_map = folium.Map(location=[filtered_df['Latitude'].mean(), filtered_df['Longitude'].mean()], zoom_start=11)
HeatMap(heat_data, radius=10, blur=15).add_to(community_map)
community_map


<h2> Crime Description vs. Location Description

In [None]:
filtered_df = df[(df['Description'].notnull()) & df['Location Description'].notnull() & df['Latitude'].notnull() & df['Longitude'].notnull()]

print(f"Filtered data size: {filtered_df.shape}")

aggregated_df = filtered_df.groupby(['Latitude', 'Longitude', 'Description', 'Location Description']).size().reset_index(name='incident_count')

aggregated_df['normalized_weight'] = (
    (aggregated_df['incident_count'] - aggregated_df['incident_count'].min()) /
    (aggregated_df['incident_count'].max() - aggregated_df['incident_count'].min())
)
total_crimes_description_location = aggregated_df['incident_count'].sum()
total_crimes_description_location_answer = f"Across the dataset, there were a total of {total_crimes_description_location} incidents with the given descriptions and locations."

print(total_crimes_description_location_answer)

heat_data = aggregated_df[['Latitude', 'Longitude', 'normalized_weight']].values.tolist()
description_location_map = folium.Map(location=[filtered_df['Latitude'].mean(), filtered_df['Longitude'].mean()], zoom_start=11)
HeatMap(heat_data, radius=10, blur=15).add_to(description_location_map)
description_location_map


<h2> Crime Frequency by District and Year 

In [None]:
filtered_df = df[(df['Year'] >= 2010) & df['District'].notnull() & df['Latitude'].notnull() & df['Longitude'].notnull()]

print(f"Filtered data size: {filtered_df.shape}")

aggregated_df = filtered_df.groupby(['Latitude', 'Longitude', 'District', 'Year']).size().reset_index(name='incident_count')

aggregated_df['normalized_weight'] = (
    (aggregated_df['incident_count'] - aggregated_df['incident_count'].min()) /
    (aggregated_df['incident_count'].max() - aggregated_df['incident_count'].min())
)

total_crimes_by_district = aggregated_df['incident_count'].sum()
total_crimes_answer = f"Across the dataset, there were a total of {total_crimes_by_district} crimes recorded in the selected districts during the given period."

print(total_crimes_answer)

heat_data = aggregated_df[['Latitude', 'Longitude', 'normalized_weight']].values.tolist()
district_year_map = folium.Map(location=[filtered_df['Latitude'].mean(), filtered_df['Longitude'].mean()], zoom_start=11)
HeatMap(heat_data, radius=10, blur=15).add_to(district_year_map)
district_year_map


<h2> Crime Frequency by Arrest and Year

In [None]:
filtered_df = df[(df['Arrest'] == True) & df['Year'] >= 2015 & df['Latitude'].notnull() & df['Longitude'].notnull()]

print(f"Filtered data size: {filtered_df.shape}")

aggregated_df = filtered_df.groupby(['Latitude', 'Longitude', 'Year']).size().reset_index(name='incident_count')

aggregated_df['normalized_weight'] = (
    (aggregated_df['incident_count'] - aggregated_df['incident_count'].min()) /
    (aggregated_df['incident_count'].max() - aggregated_df['incident_count'].min())
)

total_arrested_crimes_by_year = aggregated_df['incident_count'].sum()
total_arrested_crimes_answer = f"Across the dataset, there were a total of {total_arrested_crimes_by_year} arrested crimes between the years 2015 and the present."

print(total_arrested_crimes_answer)

heat_data = aggregated_df[['Latitude', 'Longitude', 'normalized_weight']].values.tolist()
arrest_year_map = folium.Map(location=[filtered_df['Latitude'].mean(), filtered_df['Longitude'].mean()], zoom_start=11)
HeatMap(heat_data, radius=10, blur=15).add_to(arrest_year_map)
arrest_year_map

<h2> Theft Incidents in Apartments

In [None]:
filtered_df = df[(df['Primary Type'] == 'THEFT') & (df['Location Description'] == 'APARTMENT') & 
                 df['Latitude'].notnull() & df['Longitude'].notnull()]

print(f"Filtered data size: {filtered_df.shape}")

aggregated_df = filtered_df.groupby(['Latitude', 'Longitude']).size().reset_index(name='incident_count')
aggregated_df['normalized_weight'] = (
    (aggregated_df['incident_count'] - aggregated_df['incident_count'].min()) /
    (aggregated_df['incident_count'].max() - aggregated_df['incident_count'].min())
)

total_thefts_in_apartments = aggregated_df['incident_count'].sum()
total_thefts_answer = f"Across the dataset, there were a total of {total_thefts_in_apartments} theft incidents reported in apartments."

print(total_thefts_answer)

heat_data = aggregated_df[['Latitude', 'Longitude', 'normalized_weight']].values.tolist()
theft_apartment_map = folium.Map(location=[filtered_df['Latitude'].mean(), filtered_df['Longitude'].mean()], zoom_start=12)
HeatMap(heat_data, radius=10, blur=15).add_to(theft_apartment_map)
theft_apartment_map


<h2> Top 5 months with highest crime recorded

In [None]:
monthly_incidents = df.groupby('Month').size().reset_index(name='incident_count')

top_5_months = monthly_incidents.sort_values(by='incident_count', ascending=False).head(5)

print(top_5_months)

filtered_df = df[df['Month'].isin(top_5_months['Month']) & df['Latitude'].notnull() & df['Longitude'].notnull()]

aggregated_df = filtered_df.groupby(['Latitude', 'Longitude', 'Month']).size().reset_index(name='incident_count')

aggregated_df['normalized_weight'] = (
    (aggregated_df['incident_count'] - aggregated_df['incident_count'].min()) /
    (aggregated_df['incident_count'].max() - aggregated_df['incident_count'].min())
)

heat_data = aggregated_df[['Latitude', 'Longitude', 'normalized_weight']].values.tolist()
incident_map = folium.Map(location=[filtered_df['Latitude'].mean(), filtered_df['Longitude'].mean()], zoom_start=12)
HeatMap(heat_data, radius=10, blur=15).add_to(incident_map)
incident_map

<h2> Seasonal Crime Patterns — Day

In [None]:
daily_incidents = df.groupby('Day').size().reset_index(name='incident_count')
top_5_days = daily_incidents.sort_values(by='incident_count', ascending=False).head(5)
print(top_5_days)

filtered_df = df[df['Day'].isin(top_5_days['Day']) & df['Latitude'].notnull() & df['Longitude'].notnull()]
print(f"Filtered data size: {filtered_df.shape}")

aggregated_df = filtered_df.groupby(['Latitude', 'Longitude', 'Day']).size().reset_index(name='incident_count')
aggregated_df['normalized_weight'] = (
    (aggregated_df['incident_count'] - aggregated_df['incident_count'].min()) /
    (aggregated_df['incident_count'].max() - aggregated_df['incident_count'].min())
)

max_loc = aggregated_df.loc[aggregated_df['normalized_weight'].idxmax()]
print(f"Peak day location at Latitude {max_loc['Latitude']}, Longitude {max_loc['Longitude']}.")
print(f"Total incidents on those days: {aggregated_df['incident_count'].sum()}.")

heat_data = aggregated_df[['Latitude', 'Longitude', 'normalized_weight']].values.tolist()
seasonal_crime = folium.Map(location=[filtered_df['Latitude'].mean(), filtered_df['Longitude'].mean()], zoom_start=11)
HeatMap(heat_data, radius=10, blur=15).add_to(seasonal_crime)
seasonal_crime

<h2>  Most Arrested Crime Types at Night (8 PM – 4 AM)

In [None]:
night_df = df[(df['Hour'].between(20, 23)) | (df['Hour'].between(0, 4)) & (df['Arrest'] == True)]
crime_arrest_counts = night_df.groupby('Primary Type').size().reset_index(name='arrest_count')
top_5_crimes = crime_arrest_counts.sort_values(by='arrest_count', ascending=False).head(5)
print(top_5_crimes)

filtered_df = night_df[night_df['Primary Type'].isin(top_5_crimes['Primary Type']) & 
                       night_df['Latitude'].notnull() & 
                       night_df['Longitude'].notnull()]
print(f"Filtered data size: {filtered_df.shape}")

aggregated_df = filtered_df.groupby(['Latitude', 'Longitude', 'Primary Type']).size().reset_index(name='arrest_count')
aggregated_df['normalized_weight'] = (
    (aggregated_df['arrest_count'] - aggregated_df['arrest_count'].min()) /
    (aggregated_df['arrest_count'].max() - aggregated_df['arrest_count'].min())
)

max_loc = aggregated_df.loc[aggregated_df['normalized_weight'].idxmax()]
print(f"Peak arrest location at night: {max_loc['Latitude']}, {max_loc['Longitude']}")
print(f"Total nighttime arrests: {aggregated_df['arrest_count'].sum()}.")

heat_data = aggregated_df[['Latitude', 'Longitude', 'normalized_weight']].values.tolist()
crime_night = folium.Map(location=[filtered_df['Latitude'].mean(), filtered_df['Longitude'].mean()], zoom_start=11)
HeatMap(heat_data, radius=10, blur=15).add_to(crime_night)
crime_night

<h2> Domestic Incidents in Residences

In [None]:
domestic_df = df[(df['Domestic'] == True) & (df['Location Description'] == 'RESIDENCE')]
domestic_count = domestic_df.groupby('Day').size().reset_index(name='incident_count')
top_days = domestic_count.sort_values(by='incident_count', ascending=False).head(5)
print(top_days)

filtered_df = domestic_df[domestic_df['Day'].isin(top_days['Day']) & 
                          domestic_df['Latitude'].notnull() & 
                          domestic_df['Longitude'].notnull()]
print(f"Filtered data size: {filtered_df.shape}")

aggregated_df = filtered_df.groupby(['Latitude', 'Longitude', 'Day']).size().reset_index(name='incident_count')
aggregated_df['normalized_weight'] = (
    (aggregated_df['incident_count'] - aggregated_df['incident_count'].min()) /
    (aggregated_df['incident_count'].max() - aggregated_df['incident_count'].min())
)

max_loc = aggregated_df.loc[aggregated_df['normalized_weight'].idxmax()]
print(f"Domestic peak at Latitude {max_loc['Latitude']}, Longitude {max_loc['Longitude']}")
print(f"Total incidents: {aggregated_df['incident_count'].sum()}")

heat_data = aggregated_df[['Latitude', 'Longitude', 'normalized_weight']].values.tolist()
domestice_residence = folium.Map(location=[filtered_df['Latitude'].mean(), filtered_df['Longitude'].mean()], zoom_start=11)
HeatMap(heat_data, radius=10, blur=15).add_to(domestice_residence)
domestice_residence

<h2> Theft/Narcotics in Top 5 Community Areas on Weekdays

In [None]:
weekday_df = df[df['Day'].isin(['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday'])]
target_crimes = weekday_df[weekday_df['Primary Type'].isin(['THEFT', 'NARCOTICS'])]

top_areas = target_crimes['Community Area'].value_counts().head(5).index.tolist()
filtered_df = target_crimes[target_crimes['Community Area'].isin(top_areas) & 
                            target_crimes['Hour'].between(12, 18) &
                            target_crimes['Latitude'].notnull() & 
                            target_crimes['Longitude'].notnull()]
print(f"Filtered data size: {filtered_df.shape}")

aggregated_df = filtered_df.groupby(['Latitude', 'Longitude', 'Community Area']).size().reset_index(name='incident_count')
aggregated_df['normalized_weight'] = (
    (aggregated_df['incident_count'] - aggregated_df['incident_count'].min()) /
    (aggregated_df['incident_count'].max() - aggregated_df['incident_count'].min())
)

max_loc = aggregated_df.loc[aggregated_df['normalized_weight'].idxmax()]
print(f"Peak weekday afternoon hotspot: {max_loc['Latitude']}, {max_loc['Longitude']}")
print(f"Total weekday drug/theft cases: {aggregated_df['incident_count'].sum()}")

heat_data = aggregated_df[['Latitude', 'Longitude', 'normalized_weight']].values.tolist()
theft_narcotics = folium.Map(location=[filtered_df['Latitude'].mean(), filtered_df['Longitude'].mean()], zoom_start=11)
HeatMap(heat_data, radius=10, blur=15).add_to(theft_narcotics)
theft_narcotics

<h2> Criminal Damage in High-Numbered Wards with Reporting Delay

In [None]:
df['Reporting Delay'] = (df['Updated On'] - df['Date']).dt.days

damage_df = df[(df['Primary Type'] == 'CRIMINAL DAMAGE') & (df['Ward'] > 30)]
high_delay_df = damage_df[damage_df['Reporting Delay'] > damage_df['Reporting Delay'].median()]
print(f"Delayed reports: {high_delay_df.shape}")

filtered_df = high_delay_df[high_delay_df['Latitude'].notnull() & high_delay_df['Longitude'].notnull()]
aggregated_df = filtered_df.groupby(['Latitude', 'Longitude']).size().reset_index(name='incident_count')
aggregated_df['normalized_weight'] = (
    (aggregated_df['incident_count'] - aggregated_df['incident_count'].min()) /
    (aggregated_df['incident_count'].max() - aggregated_df['incident_count'].min())
)

max_loc = aggregated_df.loc[aggregated_df['normalized_weight'].idxmax()]
print(f"Peak delay damage hotspot: {max_loc['Latitude']}, {max_loc['Longitude']}")
print(f"Total delayed damage reports: {aggregated_df['incident_count'].sum()}")

heat_data = aggregated_df[['Latitude', 'Longitude', 'normalized_weight']].values.tolist()
high_numbered_wards = folium.Map(location=[filtered_df['Latitude'].mean(), filtered_df['Longitude'].mean()], zoom_start=11)
HeatMap(heat_data, radius=10, blur=15).add_to(high_numbered_wards)
high_numbered_wards

<h2> Crime Classification Comparison Based on Primary Type, FBI Code, and IUCR

In [None]:
filtered_class_df = df[
    (df['Primary Type'].isin(['THEFT', 'BATTERY', 'CRIMINAL DAMAGE'])) &
    (df['FBI Code'].notnull()) & (df['IUCR'].notnull())
]

combo_counts = filtered_class_df.groupby(['Primary Type', 'FBI Code', 'IUCR']).size().reset_index(name='incident_count')
high_freq_combos = combo_counts[combo_counts['incident_count'] > combo_counts['incident_count'].median()]
print(f"High frequency classification combinations: {high_freq_combos.shape}")

refined_df = df.merge(high_freq_combos, on=['Primary Type', 'FBI Code', 'IUCR'])
filtered_df = refined_df[refined_df['Latitude'].notnull() & refined_df['Longitude'].notnull()]
print(f"Filtered mapping data size: {filtered_df.shape}")

aggregated_df = filtered_df.groupby(['Latitude', 'Longitude']).size().reset_index(name='incident_count')
aggregated_df['normalized_weight'] = (
    (aggregated_df['incident_count'] - aggregated_df['incident_count'].min()) /
    (aggregated_df['incident_count'].max() - aggregated_df['incident_count'].min())
)

max_loc = aggregated_df.loc[aggregated_df['normalized_weight'].idxmax()]
print(f"Classification hotspot: Latitude {max_loc['Latitude']}, Longitude {max_loc['Longitude']}")
print(f"Total incidents in selected classification groups: {aggregated_df['incident_count'].sum()}")

heat_data = aggregated_df[['Latitude', 'Longitude', 'normalized_weight']].values.tolist()
crime_classification = folium.Map(location=[filtered_df['Latitude'].mean(), filtered_df['Longitude'].mean()], zoom_start=11)
HeatMap(heat_data, radius=10, blur=15).add_to(crime_classification)
crime_classification