In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import plotly.graph_objects as go
from bokeh.plotting import figure, show, output_file
from bokeh.io import output_notebook
from bokeh.palettes import HighContrast3
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.embed import components
from bokeh.resources import CDN

In [None]:


def dataframes(focuscrimes=None):
    '''
    This function reads the data from the two csv files and combines them into one dataframe.
    It then filters the data based on the focus crimes and returns the filtered dataframe.
    If no focus crimes are provided, the function returns the entire dataframe including all crimes. 
    '''
    df_present = pd.read_csv('PD_SF_2018_2024.csv', usecols=["Incident Category", "Incident Datetime", "Latitude", "Longitude"])
    df_past = pd.read_csv("PD_SF_2003_2018.csv", usecols=["Category", "Date", "Time", "Y", "X"])
    df_present['Incident Datetime'] = pd.to_datetime(df_present['Incident Datetime'])
    df_past['Incident Datetime'] = df_past['Date'] + ' ' + df_past['Time']
    df_past['Incident Datetime'] = pd.to_datetime(df_past['Incident Datetime'])

   
    df_past.rename(columns={"Category": "Incident Category", 'X': 'Longitude', 'Y': 'Latitude'}, inplace=True)
    df_past.index = df_past['Incident Datetime']
    df_present.index = df_present['Incident Datetime']


    df_past.sort_index(inplace=True)
    df_present.sort_index(inplace=True)

    df_past = df_past.loc[:'2018-01-01']
    df_present = df_present.loc['2018-01-01':]
    #To create consistency with the focus crimes, all crimes are set the upper case letters
    df_present['Incident Category'] = df_present['Incident Category'].str.upper()
    
    #Combining the two data set vertically 
    df = pd.concat([df_past, df_present], axis=0)

    # Standardize crime categories
    crime_rename_map = {
        "LARCENY THEFT": "LARCENY/THEFT",
        "MOTOR VEHICLE THEFT": "VEHICLE THEFT",
        "MOTOR VEHICLE THEFT": "VEHICLE THEFT",
        "MALICIOUS MISCHIEF": "VANDALISM",
        "WEAPONS OFFENCE": "WEAPON LAWS", 
        "WEAPONS OFFENSE": "WEAPON LAWS", 
        "WEAPONS CARRYING ET": "WEAPON LAWS", 
        "DRUG NARCOTIC": "DRUG/NARCOTIC",
        "DRUG VIOLATION": "DRUG/NARCOTIC",
        "DRUG OFFENSE": "DRUG/NARCOTIC",
        'DISORDERLY CONDUCT': 'DRUNKENNESS'
    }
    df["Incident Category"] = df["Incident Category"].replace(crime_rename_map)
    

    #Providing columns with different times will be used in the tasks
    df['Year'] = df.index.year
    df['Month'] = df.index.month
    df['Day'] = df.index.day
    df['Hour'] = df.index.hour
    df['Minute'] = df.index.minute
    

    if focuscrimes:

        df_focuscrimes = df[df['Incident Category'].isin(focuscrimes)]
        return df_focuscrimes
    else:
        return df



In [None]:
focuscrimes = set(['WEAPON LAWS', 'PROSTITUTION', 'ROBBERY', 'BURGLARY', 'ASSAULT', 'DRUG/NARCOTIC', 'LARCENY/THEFT', 'VANDALISM', 'VEHICLE THEFT', 'STOLEN PROPERTY'])

df = dataframes() #full dataset
df_focuscrimes = dataframes(focuscrimes) #dataset with only focuscrimes

# Filter data for years 2014 to 2024
df_filtered = df_focuscrimes[(df_focuscrimes['Year'] >= 2014) & (df_focuscrimes['Year'] <= 2024)]


In [None]:
def plot_crime_by_day(df, frequency):
    # Group by frequency and normalize
    df_grouped = df.groupby(['Incident Category', frequency]).size().reset_index(name='count')
    df_grouped['normalized'] = df_grouped.groupby('Incident Category')['count'].transform(lambda x: x / x.sum())

    # Pivot the dataframe
    df_pivot = df_grouped.pivot(index=frequency, columns='Incident Category', values='normalized').fillna(0)
    df_pivot.reset_index(inplace=True)

    # Prepare data for Bokeh
    hours = [str(i) for i in range(24)]
    source = ColumnDataSource(df_pivot)

    # Create a figure with increased width
    p = figure(x_range=FactorRange(factors=hours), title="Crime by Hour of the Day",
               toolbar_location=None, tools="", width=600, height=300)

    
    # Add bars for each crime category
    colors = ['#c9d9d3', '#718dbf', '#e84d60', '#ddb7b1', '#ffbf00', '#ff8000', '#ff4000', '#ff0000', '#800000', '#400000']
    for indx, crime in enumerate(focuscrimes):
        p.vbar(x=frequency, top=crime, source=source, width=0.9, legend_label=crime,
               line_color='white', fill_color=colors[indx % len(colors)], muted_alpha=0.01, muted=True)

    # Customize the plot
    p.xgrid.grid_line_color = None
    p.y_range.start = 0
    p.legend.orientation = "vertical"
    p.legend.label_text_font_size = '8pt'  # try 6pt, 8pt, or 10pt
    p.legend.location = "top_left"
    p.add_layout(p.legend[0], 'left')

    p.legend.click_policy = "mute"
    p.xaxis.axis_label = "Hour of the Day"
    p.yaxis.axis_label = "Normalized Crime Count"

    # Output to a static HTML file
    output_file("crime_by_hour.html")

    show(p)
    # Get standalone components
    script, div = components(p)
    # Save components to HTML
    with open("bokeh_plot.html", "w") as f:
        f.write(f"""
        <!DOCTYPE html>
        <html>
        <head>
        {CDN.render()}
        </head>
        <body>
        {div}
        {script}
        </body>
        </html>
        """)

plot_crime_by_day(df_filtered, 'Hour')