In [1]:
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, NumeralTickFormatter, Legend, LegendItem
from bokeh.palettes import Category10
import pandas as pd

In [2]:
#file_path = r"C:\Users\poltr\OneDrive - udl.cat\Desktop\Police_Department_Incident_Reports_Historical_2003_to_Feb_2025_20250204.csv"
#file_path = r"C:/Users/clara/OneDrive/Skrivebord/DTU/Human Centeret Artificial Intelligence/2. semester/02806_Social_Data_Analysis_and_Visualization/Police_Department_Incident_Reports_Historical_2003_to_Feb_2025_20250204.csv"
file_path = r"C:\Users\Bruger\OneDrive - Danmarks Tekniske Universitet\Kandidat\Social Data Analysis and Visualization\Police_Department_Incident_Reports_Historical_2003_to_Feb_2025_20250204NEWNEW.csv"


# Read data from the specified file
df = pd.read_csv(file_path)

# Preview the first 5 lines of the loaded data
df.head()

Unnamed: 0,Category,PdDistrict,Longitude,Latitude,TimeOfDay,DayOfWeek,DayOfMonth,Month,Year
0,ROBBERY,INGLESIDE,-122.420084,37.708311,17,Monday,22,November,2004
1,VEHICLE THEFT,PARK,-120.5,90.0,20,Tuesday,18,October,2005
2,VEHICLE THEFT,SOUTHERN,-120.5,90.0,2,Sunday,15,February,2004
3,ASSAULT,SOUTHERN,-122.410541,37.770913,17,Sunday,21,November,2010
4,ASSAULT,TARAVAL,-122.470366,37.745158,15,Tuesday,2,April,2013


In [3]:
def create_hourly_crime_comparison(df):
    # Prepare data - get hourly counts for all crimes
    hourly_counts = df.groupby(['Category', 'TimeOfDay']).size().unstack(fill_value=0)
    normalized = hourly_counts.div(hourly_counts.sum(axis=1), axis=0)
    
    # Convert to long format for Bokeh
    normalized = normalized.reset_index().melt(id_vars='Category', 
                                            var_name='Hour', 
                                            value_name='Frequency')
    
    # Create mapping of crime types to colors
    crimes = normalized['Category'].unique().tolist()
    color_map = {crime: Category10[10][i % 10] for i, crime in enumerate(crimes)}
    
    # Create figure with wider dimensions
    p = figure(
        x_range=[str(i) for i in range(24)],
        title="Hourly San Francisco Crime Patterns",
        width=1200,
        height=600,
        tools="hover,pan,box_zoom,reset",
        tooltips="@Crime: @Frequency{0.00%} at @Hour:00"
    )
    
    # Add lines for each crime type
    renderers = []
    legend_items = []
    
    # Define which crimes should start muted (all except these will be muted)
    crimes_to_show = ['LARCENY/THEFT']
    
    for crime in crimes:
        crime_data = normalized[normalized['Category'] == crime]
        source = ColumnDataSource({
            'Hour': crime_data['Hour'].astype(str),
            'Frequency': crime_data['Frequency'],
            'Crime': [crime] * len(crime_data)
        })
        
        # Set muted=True for crimes not in our show list
        muted_initially = crime not in crimes_to_show
        
        r = p.line(
            x='Hour',
            y='Frequency',
            source=source,
            line_width=2,
            color=color_map[crime],
            alpha=0.6,
            muted_alpha=0.1,
            muted=muted_initially,  # This controls initial mute state
            name=crime
        )
        renderers.append(r)
        legend_items.append(LegendItem(label=crime, renderers=[r]))
    
    # Create separate legend and add it to the plot's left side
    legend = Legend(items=legend_items, click_policy="mute", location="center")
    p.add_layout(legend, 'left')
    
    # Formatting
    p.xaxis.axis_label = "Hour of Day"
    p.yaxis.axis_label = "Normalized Crime Frequency"
    p.yaxis.formatter = NumeralTickFormatter(format="0%")
    p.xgrid.grid_line_color = None
    
    # Adjust plot margins to make room for the legend
    p.min_border_left = 250
    
    return p

# Usage
crime_comparison = create_hourly_crime_comparison(df)
show(crime_comparison)