In [6]:
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, Select, Div
from bokeh.layouts import column
from bokeh.palettes import Spectral6
from bokeh.transform import factor_cmap

In [4]:
# Specify the full path to the CSV file
file_path = r"C:\Users\poltr\OneDrive - udl.cat\Desktop\Police_Department_Incident_Reports_Historical_2003_to_Feb_2025_20250204.csv"

# Read data from the specified file
df = pd.read_csv(file_path)

# Preview the first 5 lines of the loaded data
df.head()

Unnamed: 0,Category,PdDistrict,Longitude,Latitude,TimeOfDay,DayOfWeek,DayOfMonth,Month,Year
0,ROBBERY,INGLESIDE,-122.420084,37.708311,17,Monday,22,November,2004
1,VEHICLE THEFT,PARK,-120.5,90.0,20,Tuesday,18,October,2005
2,VEHICLE THEFT,SOUTHERN,-120.5,90.0,2,Sunday,15,February,2004
3,ASSAULT,SOUTHERN,-122.410541,37.770913,17,Sunday,21,November,2010
4,ASSAULT,TARAVAL,-122.470366,37.745158,15,Tuesday,2,April,2013


In [12]:
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, Select, Div, Label
from bokeh.layouts import column, row
from bokeh.io import output_notebook
from bokeh.palettes import Category10

output_notebook()

# Sample data processing
df['Year'] = df['Year'].astype(int)  # Ensure Year is integer

# Categorize crimes
def categorize_crime(category):
    category = str(category).upper()
    violent = ['ASSAULT', 'ROBBERY', 'WEAPON LAWS']
    qol = ['DRUG/NARCOTIC', 'VANDALISM', 'PUBLIC INTOXICATION']
    
    if any(v in category for v in violent):
        return 'Violent Crime'
    elif any(q in category for q in qol):
        return 'QOL Crime'
    elif 'LARCENY' in category or 'THEFT' in category:
        return 'Larceny'
    return 'Other'

df['Category'] = df['Category'].apply(categorize_crime)

# Prepare data
tenderloin = df[df['PdDistrict'] == 'TENDERLOIN']
citywide = df[df['PdDistrict'] != 'TENDERLOIN']

def prepare_comparison_data(main_df, compare_df, crime_type):
    # Process Tenderloin data
    tl_data = main_df[main_df['Category'] == crime_type]
    tl_counts = tl_data.groupby('Year').size().reset_index(name='Count')
    tl_counts['District'] = 'TENDERLOIN'
    
    # Process comparison data
    if crime_type == 'Violent Crime':
        compare_data = compare_df[compare_df['PdDistrict'] == 'SOUTHERN']
        label = 'SOUTH_OF_MARKET'
    elif crime_type == 'QOL Crime':
        compare_data = compare_df
        label = 'CITY_AVG'
    else:  # Larceny
        compare_data = compare_df[compare_df['PdDistrict'] == 'CENTRAL']
        label = 'UNION_SQUARE'
    
    comp_counts = compare_data[compare_data['Category'] == crime_type]
    comp_counts = comp_counts.groupby('Year').size().reset_index(name='Count')
    comp_counts['District'] = label
    
    # Combine data
    all_years = pd.DataFrame({'Year': range(2003, 2026)})
    combined = pd.concat([tl_counts, comp_counts])
    combined = pd.merge(all_years, combined, on='Year', how='left').fillna(0)
    return combined

# Initial data
current_data = prepare_comparison_data(tenderloin, citywide, 'Violent Crime')
source = ColumnDataSource(current_data)

# Create plot
p = figure(title="Tenderloin Crime Trends: Incident Count Comparison (2003-2025)", 
           x_axis_label='Year', y_axis_label='Number of Reported Incidents',
           width=900, height=550, x_range=(2003, 2025),
           tools="pan,wheel_zoom,box_zoom,reset,hover,save")

# Define color mapping
districts = ['TENDERLOIN', 'SOUTH_OF_MARKET', 'CITY_AVG', 'UNION_SQUARE']
color_map = {districts[i]: Category10[4][i] for i in range(len(districts))}

# Create renderers with explicit colors
renderers = {}
for district in districts:
    renderers[district] = p.line(x='Year', y='Count', source=source,
                               line_width=2, color=color_map[district], 
                               legend_label=district,
                               visible=(district in ['TENDERLOIN', 'SOUTH_OF_MARKET']))
    p.circle(x='Year', y='Count', source=source, size=6, 
            color=color_map[district], legend_label=district,
            visible=(district in ['TENDERLOIN', 'SOUTH_OF_MARKET']))

# Store annotations for later removal
current_annotations = []

def add_annotations(crime_type):
    # Remove previous annotations
    for ann in current_annotations:
        p.renderers.remove(ann)
    current_annotations.clear()
    
    # Add new annotations
    if crime_type == 'Violent Crime':
        ann = Label(x=2010, y=200, text="Fewer violent incidents than South of Market",
                   text_font_size='10pt', x_units='data', y_units='data')
    elif crime_type == 'QOL Crime':
        ann = Label(x=2010, y=200, text="More visible quality-of-life incidents",
                   text_font_size='10pt', x_units='data', y_units='data')
    else:
        ann = Label(x=2010, y=200, text="Lower larceny counts than shopping districts",
                   text_font_size='10pt', x_units='data', y_units='data')
    
    p.add_layout(ann)
    current_annotations.append(ann)

add_annotations('Violent Crime')

# Create selector
crime_select = Select(title="Select Crime Category:", 
                     value="Violent Crime", 
                     options=["Violent Crime", "QOL Crime", "Larceny"])

# Update callback
def update_plot(attr, old, new):
    crime_type = crime_select.value
    new_data = prepare_comparison_data(tenderloin, citywide, crime_type)
    source.data = ColumnDataSource.from_df(new_data)
    
    if crime_type == "Violent Crime":
        for district in districts:
            renderers[district].visible = (district in ['TENDERLOIN', 'SOUTH_OF_MARKET'])
    elif crime_type == "QOL Crime":
        for district in districts:
            renderers[district].visible = (district in ['TENDERLOIN', 'CITY_AVG'])
    else:
        for district in districts:
            renderers[district].visible = (district in ['TENDERLOIN', 'UNION_SQUARE'])
    
    add_annotations(crime_type)

crime_select.on_change('value', update_plot)

# Info panel
info_panel = Div(text="""
<h2>The Crime Visibility Paradox</h2>
<p><b>Toggle between crime categories:</b></p>
<ul>
    <li><b>Violent crimes:</b> Compare with South of Market</li>
    <li><b>Quality-of-life offens:</b> Compare with city average</li>
    <li><b>Larceny/Theft:</b> Compare with shopping districts</li>
</ul>
<p><b>Key Insight:</b> Visible QOL incidents create disproportionate perception of danger.</p>
""", width=900)

# Layout
layout = column(
    info_panel,
    crime_select,
    p
)

show(layout)

  combined = pd.merge(all_years, combined, on='Year', how='left').fillna(0)
You are generating standalone HTML/JS output, but trying to use real Python
callbacks (i.e. with on_change or on_event). This combination cannot work.

Only JavaScript callbacks may be used with standalone output. For more
information on JavaScript callbacks with Bokeh, see:

    https://docs.bokeh.org/en/latest/docs/user_guide/interaction/js_callbacks.html

Alternatively, to use real Python callbacks, a Bokeh server application may
be used. For more information on building and running Bokeh applications, see:

    https://docs.bokeh.org/en/latest/docs/user_guide/server.html



In [None]:
from IPython.display import display, HTML

with open("bokeh_plot.html", "r", encoding="utf-8") as f:
    html_content = f.read()

display(HTML(html_content))