In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import plotly.graph_objects as go
from bokeh.plotting import figure, show, output_file
from bokeh.io import output_notebook
from bokeh.palettes import HighContrast3
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.embed import components
from bokeh.resources import CDN
import folium
from folium.plugins import HeatMap
from folium import plugins


In [15]:


def dataframes(focuscrimes=None):
    '''
    This function reads the data from the two csv files and combines them into one dataframe.
    It then filters the data based on the focus crimes and returns the filtered dataframe.
    If no focus crimes are provided, the function returns the entire dataframe including all crimes. 
    '''
    df_present = pd.read_csv('PD_SF_2018_2024.csv', usecols=["Incident Category", "Incident Datetime", "Latitude", "Longitude"])
    df_past = pd.read_csv("PD_SF_2003_2018.csv", usecols=["Category", "Date", "Time", "Y", "X"])
    df_present['Incident Datetime'] = pd.to_datetime(df_present['Incident Datetime'])
    # df_past['Incident Datetime'] = df_past['Date'] + ' ' + df_past['Time']
    # df_past['Incident Datetime'] = pd.to_datetime(df_past['Incident Datetime'])
    df_past['Incident Datetime'] = pd.to_datetime(df_past['Date'] + ' ' + df_past['Time'],
    errors='coerce')

   
    df_past.rename(columns={"Category": "Incident Category", 'X': 'Longitude', 'Y': 'Latitude'}, inplace=True)
    df_past.index = df_past['Incident Datetime']
    df_present.index = df_present['Incident Datetime']


    df_past.sort_index(inplace=True)
    df_present.sort_index(inplace=True)

    df_past = df_past.loc[:'2018-01-01']
    df_present = df_present.loc['2018-01-01':]
    #To create consistency with the focus crimes, all crimes are set the upper case letters
    df_present['Incident Category'] = df_present['Incident Category'].str.upper()
    
    #Combining the two data set vertically 
    df = pd.concat([df_past, df_present], axis=0)
    df = df.drop(columns=['Date', 'Time'])
    # Standardize crime categories
    crime_rename_map = {
        "LARCENY THEFT": "LARCENY/THEFT",
        "MOTOR VEHICLE THEFT": "VEHICLE THEFT",
        "MOTOR VEHICLE THEFT": "VEHICLE THEFT",
        "MALICIOUS MISCHIEF": "VANDALISM",
        "WEAPONS OFFENCE": "WEAPON LAWS", 
        "WEAPONS OFFENSE": "WEAPON LAWS", 
        "WEAPONS CARRYING ET": "WEAPON LAWS", 
        "DRUG NARCOTIC": "DRUG/NARCOTIC",
        "DRUG VIOLATION": "DRUG/NARCOTIC",
        "DRUG OFFENSE": "DRUG/NARCOTIC",
        'DISORDERLY CONDUCT': 'DRUNKENNESS'
    }
    df["Incident Category"] = df["Incident Category"].replace(crime_rename_map)
    

    #Providing columns with different times will be used in the tasks
    df['Year'] = df.index.year
    df['Month'] = df.index.month
    df['Day'] = df.index.day
    df['Hour'] = df.index.hour
    df['Minute'] = df.index.minute
    df['YearMonth'] = df.index.to_period('M')
    # make  df['YearMonth'] datetime
    df['YearMonth'] = df['YearMonth'].dt.to_timestamp()
    

    if focuscrimes:

        df_focuscrimes = df[df['Incident Category'].isin(focuscrimes)]
        return df_focuscrimes
    else:
        return df



In [16]:
focuscrimes = set(['OFFENCES AGAINST THE FAMILY AND CHILDREN', 'DRUG/NARCOTIC', 'DRUNKENNESS'])

df = dataframes() #full dataset
df_focuscrimes = dataframes(focuscrimes) #dataset with only focuscrimes

# Filter data for years 2014 to 2024
df_filtered = df_focuscrimes[(df_focuscrimes['Year'] >= 2018) & (df_focuscrimes['Year'] <= 2024)]


In [17]:
df_cleaned = df_filtered.dropna(subset=['Latitude', 'Longitude'])

Folium heatmap plot OFFENCE

In [40]:

# Assuming 'Incident Date' is already a datetime object
# Create Weight column based on the month from 'Incident Date'
df_abuse = df_cleaned[df_cleaned["Incident Category"] == "OFFENCES AGAINST THE FAMILY AND CHILDREN"]

heat_df = df_abuse.dropna(axis=0, subset=['Latitude','Longitude'])
heat_df['Weight'] = df_abuse.index.year
lat=37.77919
lon=-122.41914
map_hooray=folium.Map([lat, lon], zoom_start=13)
# Drop rows with missing Latitude, Longitude, or Weight
heat_df = heat_df.dropna(axis=0, subset=['Latitude', 'Longitude', 'Weight'])

# List comprehension to create list of lists for heatmap
heat_data = [
    [[row['Latitude'], row['Longitude']] for index, row in heat_df[heat_df['Weight'] == i].iterrows()]
    for i in range(df_cleaned['Year'][0], df_cleaned['Year'][-1]+1)  
]

loc = 'OFFENCES AGAINST THE FAMILY AND CHILDREN'
title_html = '''
             <h3 align="center" style="font-size:16px"><b>{}</b></h3>
             '''.format(loc) 
map_hooray.get_root().html.add_child(folium.Element(title_html))
# Plot HeatMapWithTime
year_range = list(range(df_cleaned['Year'].min(), df_cleaned['Year'].max() + 1))


hm = plugins.HeatMapWithTime(
    heat_data,
    index=year_range,  # <- this makes the time slider show actual years
    auto_play=True,
    max_opacity=0.8
)

hm.add_to(map_hooray)# Inject custom CSS to make the time slider smaller
# Inject custom CSS to make the time slider smaller
# Custom CSS to style the time slider
custom_css = """
    /* Resize the time slider */
    .leaflet-control-heatmap-time-slider {
        width: 50px !important;  /* Set the width of the time slider */
        height: 5px !important;  /* Set the height of the time slider */
        font-size: 1px !important;  /* Reduce the font size of the slider labels */
        background-color: rgba(0, 0, 0, 0.3) !important; /* Optional: Adjust background color */
        border-radius: 5px !important;  /* Optional: Add rounded corners */
    }

    /* Adjust the slider's button size */
    .leaflet-control-heatmap-time-slider button {
        width: 40px !important;  /* Adjust the width of the play/pause button */
        height: 20px !important;  /* Adjust the height of the play/pause button */
        font-size: 10px !important;  /* Reduce the font size of the play/pause button */
    }

    /* Adjust the position of the time slider (optional) */
    .leaflet-control-heatmap-time-slider {
        bottom: 20px !important;  /* Move the slider upwards */
        left: 10px !important;    /* Move the slider towards left */
    }
"""

# Inject custom CSS into the map
map_hooray.get_root().html.add_child(folium.Element(f"<style>{custom_css}</style>"))


# Display the map
map_hooray.save("map_hooray_offences.html")

map_hooray


  for i in range(df_cleaned['Year'][0], df_cleaned['Year'][-1]+1)


DRUNKENNESS

In [25]:
# Assuming 'Incident Date' is already a datetime object
# Create Weight column based on the month from 'Incident Date'
df_drunk= df_cleaned[df_cleaned["Incident Category"] == "DRUNKENNESS"]

heat_df = df_drunk.dropna(axis=0, subset=['Latitude','Longitude'])
heat_df['Weight'] = df_drunk.index.year
lat=37.77919
lon=-122.41914
map_hooray=folium.Map([lat, lon], zoom_start=13)
# Drop rows with missing Latitude, Longitude, or Weight
heat_df = heat_df.dropna(axis=0, subset=['Latitude', 'Longitude', 'Weight'])

# List comprehension to create list of lists for heatmap
heat_data = [
    [[row['Latitude'], row['Longitude']] for index, row in heat_df[heat_df['Weight'] == i].iterrows()]
    for i in range(df_cleaned['Year'][0], df_cleaned['Year'][-1]+1)  
]
loc = 'DRUNKENNESS'
title_html = '''
             <h3 align="center" style="font-size:16px"><b>{}</b></h3>
             '''.format(loc) 
map_hooray.get_root().html.add_child(folium.Element(title_html))
# Plot HeatMapWithTime
year_range = list(range(df_cleaned['Year'].min(), df_cleaned['Year'].max() + 1))

hm = plugins.HeatMapWithTime(
    heat_data,
    index=year_range,  # <- this makes the time slider show actual years
    auto_play=True,
    max_opacity=0.8
)

hm.add_to(map_hooray)

# Display the map
map_hooray.save("map_hooray_drunk.html")

map_hooray


  for i in range(df_cleaned['Year'][0], df_cleaned['Year'][-1]+1)


In [27]:
# Assuming 'Incident Date' is already a datetime object
# Create Weight column based on the month from 'Incident Date'
df_drug= df_cleaned[df_cleaned["Incident Category"] == "DRUG/NARCOTIC"]

heat_df = df_drug.dropna(axis=0, subset=['Latitude','Longitude'])
heat_df['Weight'] = df_drug.index.year
lat=37.77919
lon=-122.41914
map_hooray=folium.Map([lat, lon], zoom_start=13)
# Drop rows with missing Latitude, Longitude, or Weight
heat_df = heat_df.dropna(axis=0, subset=['Latitude', 'Longitude', 'Weight'])

# List comprehension to create list of lists for heatmap
heat_data = [
    [[row['Latitude'], row['Longitude']] for index, row in heat_df[heat_df['Weight'] == i].iterrows()]
    for i in range(df_cleaned['Year'][0], df_cleaned['Year'][-1]+1)  
]
loc = 'DRUG/NARCOTIC'
title_html = '''
             <h3 align="center" style="font-size:16px"><b>{}</b></h3>
             '''.format(loc) 
map_hooray.get_root().html.add_child(folium.Element(title_html))
# Plot HeatMapWithTime
year_range = list(range(df_cleaned['Year'].min(), df_cleaned['Year'].max() + 1))

hm = plugins.HeatMapWithTime(
    heat_data,
    index=year_range,  # <- this makes the time slider show actual years
    auto_play=True,
    max_opacity=0.8
)

hm.add_to(map_hooray)

# Display the map
map_hooray.save("map_hooray_drug.html")

map_hooray


  for i in range(df_cleaned['Year'][0], df_cleaned['Year'][-1]+1)
