# Part 3: One interactive visualization in Bokeh

### Data Loading

In [1]:
# Imports
import matplotlib.pyplot as plt
import pandas as pd
import os

In [2]:
# Loading the data 
data_path = os.path.abspath(os.path.join(os.pardir, "data"))
cleaned_data_path = os.path.join(data_path, "Police_Department_Incident_Reports_Complete.csv")
df = pd.read_csv(cleaned_data_path)

# Removing 2025 data
df = df[df['Year'] != 2025]

df_drug = df[df['Category'] == 'DRUG/NARCOTIC'].copy()

### Bokeh plot

In [7]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool

output_notebook()

# Group data by hour and count
hourly_counts = df_drug.groupby('Hour').size().reset_index(name='Count')

# Convert to a Bokeh ColumnDataSource
source = ColumnDataSource(hourly_counts)

p = figure(
    width=700,
    height=400,
    title="Drug/Narcotic Crimes by Hour of the Day",
    x_range=(-0.5, 23.5), 
    toolbar_location="above",
    tools="pan,wheel_zoom,box_zoom,reset"
)

# Vertical bar glyph
p.vbar(
    x='Hour',
    top='Count',
    width=0.9,
    source=source,
    fill_color='lavender',
    line_color='purple'
)

p.xaxis.axis_label = "Hour (24-hour format)"
p.yaxis.axis_label = "Number of Crimes"
p.xaxis.ticker = list(range(24)) 

hover = HoverTool(tooltips=[
    ("Hour", "@Hour"),
    ("Count", "@Count"),

])
p.add_tools(hover)

show(p)


In [12]:
from bokeh.models import ColumnDataSource, CustomJS, MultiSelect
from bokeh.plotting import figure, show, output_notebook
from bokeh.layouts import column
import pandas as pd

# Enable Bokeh output in the notebook
output_notebook()

# Define the days in a desired order
days_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

# Create an aggregated dictionary:
# For each hour (0-23) we compute the count of Drug/Narcotic crimes per day
data_dict = {"Hour": list(range(24))}
for day in days_order:
    counts = []
    for hour in range(24):
         count = len(df_drug[(df_drug['Day of Week'] == day) & (df_drug['Hour'] == hour)])
         counts.append(count)
    data_dict[day] = counts

# Create a full data source holding counts for every day and hour
full_source = ColumnDataSource(data=data_dict)

# Compute initial aggregated counts by summing across all days (default: all days selected)
initial_counts = []
for i in range(24):
    total = sum(data_dict[day][i] for day in days_order)
    initial_counts.append(total)

# Create an aggregated data source for the plot
agg_source = ColumnDataSource(data={"Hour": list(range(24)), "Count": initial_counts})

# Create a MultiSelect widget for day selection, defaulting to all days
day_options = [(day, day) for day in days_order]
multi_select = MultiSelect(title="Select Days", value=days_order, options=day_options)

# Define a CustomJS callback to update the aggregated counts based on selected days
callback = CustomJS(args=dict(full_source=full_source, source=agg_source, multi_select=multi_select), code="""
    var selected_days = multi_select.value;
    var full_data = full_source.data;
    var new_counts = [];
    for (var i = 0; i < full_data['Hour'].length; i++) {
        var sum_val = 0;
        for (var j = 0; j < selected_days.length; j++) {
            var day = selected_days[j];
            sum_val += full_data[day][i];
        }
        new_counts.push(sum_val);
    }
    source.data['Count'] = new_counts;
    source.change.emit();
""")
multi_select.js_on_change('value', callback)

# Create a Bokeh figure for the vbar plot
p = figure(
    width=700,
    height=400,
    title="Drug/Narcotic Crimes by Hour (Selected Days)",
    x_range=(-0.5, 23.5),
    toolbar_location="above",
    tools="pan,wheel_zoom,box_zoom,reset"
)

p.vbar(x='Hour', top='Count', width=0.9, source=agg_source, fill_color='lavender',
    line_color='purple')
p.xaxis.axis_label = "Hour (24-hour format)"
p.yaxis.axis_label = "Number of Crimes"
p.xaxis.ticker = list(range(24))

# Combine the widget and plot into a layout and display them
layout = column(multi_select, p)
show(layout)


In [17]:
import pandas as pd
import os
from bokeh.models import ColumnDataSource, FactorRange, HoverTool
from bokeh.plotting import figure, show, output_notebook
from bokeh.transform import dodge
from bokeh.palettes import Category10

# Assuming df has been loaded and filtered, and 2025 data removed:
# data_path = os.path.abspath(os.path.join(os.pardir, "data"))
# cleaned_data_path = os.path.join(data_path, "Police_Department_Incident_Reports_Complete.csv")
# df = pd.read_csv(cleaned_data_path)
# df = df[df['Year'] != 2025]
# df_drug = df[df['Category'] == 'DRUG/NARCOTIC'].copy()

# Define the desired order for days
days_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

# Create a pivot table: rows=Hour, columns=Day of Week, values=count of crimes
df_pivot = df_drug.groupby(['Hour', 'Day of Week']).size().unstack(fill_value=0)

# Reorder columns to follow days_order (only include days present in the data)
df_pivot = df_pivot[[day for day in days_order if day in df_pivot.columns]]
df_pivot = df_pivot.reset_index()

# Convert Hour to string so that Bokeh treats it as a categorical factor.
df_pivot['Hour'] = df_pivot['Hour'].astype(str)

# Create a ColumnDataSource from the pivot DataFrame.
source = ColumnDataSource(df_pivot)

# Get the list of hours (as strings) to be used as the x-axis categories.
hours_as_str = list(df_pivot['Hour'])

# Create a figure with a FactorRange for the x-axis.
p = figure(
    x_range=FactorRange(*hours_as_str),
    width=900,
    height=500,
    title="Drug/Narcotic Crimes by Hour for Each Day",
    toolbar_location='right'
)

# Identify the day columns (all columns except "Hour")
day_columns = [col for col in df_pivot.columns if col != 'Hour']
colors = Category10[len(day_columns)]

# Compute a bar width that allows for side-by-side bars (using dodge)
bar_width = 0.8 / len(day_columns)

# Create a list to hold the renderers (needed for the hover tool)
renderers = []
for i, day in enumerate(day_columns):
    r = p.vbar(
        x=dodge('Hour', -0.4 + i * bar_width, range=p.x_range),
        top=day,
        width=bar_width,
        source=source,
        legend_label=day,
        color=colors[i % len(colors)],
        fill_alpha=0.5,
        muted_alpha=0.3,
        alpha=0.6,
        name=day  # Set the name to the day, used in hover tool
    )
    renderers.append(r)

# Format the axes
p.xaxis.axis_label = "Hour of the Day"
p.yaxis.axis_label = "Number of Crimes"

# Make the legend items clickable to hide/show the corresponding day's bars.
p.legend.click_policy = "hide"

# Display the plot in the notebook.
output_notebook()
show(p)


In [24]:
import pandas as pd
from bokeh.models import ColumnDataSource, FactorRange, HoverTool
from bokeh.plotting import figure, show, output_notebook
from bokeh.palettes import Category10, Category20

# Removing "OUT OF SF" police district
df_drug = df_drug[df_drug['Police District'] != 'OUT OF SF']

# 1. Create a pivot table with raw counts:
#    Rows = Hour, Columns = Police District
crime_counts = df_drug.groupby(['Hour', 'Police District']).size().unstack(fill_value=0)

# 2. Reset the index so that "Hour" becomes a column.
df_bokeh = crime_counts.reset_index()

# 3. Convert Hour to string so that Bokeh treats it as a categorical factor.
df_bokeh['Hour'] = df_bokeh['Hour'].astype(str)

# 4. Create a ColumnDataSource for Bokeh.
source = ColumnDataSource(df_bokeh)

# 5. Define the categorical x-axis using the Hour values.
hours_as_str = list(df_bokeh['Hour'])

# 6. Create the figure with a FactorRange for x-axis.
p = figure(
    x_range=FactorRange(*hours_as_str),
    width=900,
    height=500,
    title="Drug/Narcotic Crime Counts by Hour for Police Districts",
    toolbar_location='right'
)

# 7. Identify the police district columns (all columns except "Hour")
district_columns = df_bokeh.columns.drop("Hour")
n_districts = len(district_columns)

# Choose an appropriate color palette.
if n_districts <= 10:
    palette = Category10[n_districts]
elif n_districts <= 20:
    palette = Category20[n_districts]
else:
    from bokeh.palettes import viridis
    palette = viridis(n_districts)

# 8. Overlay vbars for each police district.
#    Using the same x coordinate (no dodge) so that bars overlap.
renderers = []
for i, district in enumerate(district_columns):
    r = p.vbar(
        x='Hour',
        top=district,
        width=0.8,  # Full width bars so they overlay
        source=source,
        legend_label=district,
        color=palette[i],
        fill_alpha=0.5,  # Partial transparency to see overlapping bars
        muted_alpha=0.3,
        alpha=0.6,
        name=district  # Used in the hover tooltip
    )
    renderers.append(r)

# 9. Add a hover tool that shows the police district, hour, and raw count.
hover = HoverTool(renderers=renderers, tooltips=[
    ("District", "$name"),
    ("Hour", "@Hour"),
    ("Count", "$y")
])
p.add_tools(hover)

# 10. Set the legend to be clickable (hiding/showing bars).
p.legend.click_policy = "hide"

# 11. Format the axes.
p.xaxis.axis_label = "Hour of the Day"
p.yaxis.axis_label = "Crime Count"

# 12. Display the plot.
output_notebook()
show(p)
