# Part 3: One interactive visualization in Bokeh

### Data Loading

In [10]:
# Imports
import matplotlib.pyplot as plt
import pandas as pd
import os

In [11]:
# Loading the data 
data_path = os.path.abspath(os.path.join(os.pardir, "data"))
cleaned_data_path = os.path.join(data_path, "Police_Department_Incident_Reports_Complete.csv")
df = pd.read_csv(cleaned_data_path)

# Removing 2025 data
df = df[df['Year'] != 2025]

df_drug = df[df['Category'] == 'DRUG/NARCOTIC'].copy()

### Bokeh plot

In [13]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool

output_notebook()

# Group data by hour and count
hourly_counts = df_drug.groupby('Hour').size().reset_index(name='Count')

# Convert to a Bokeh ColumnDataSource
source = ColumnDataSource(hourly_counts)

p = figure(
    width=700,
    height=400,
    title="Drug/Narcotic Crimes by Hour of the Day",
    x_range=(-0.5, 23.5), 
    toolbar_location="above",
    tools="pan,wheel_zoom,box_zoom,reset"
)

# Vertical bar glyph
p.vbar(
    x='Hour',
    top='Count',
    width=0.9,
    source=source,
    fill_color='#9ecae1',
    line_color='#2171b5'
)

p.xaxis.axis_label = "Hour (24-hour format)"
p.yaxis.axis_label = "Number of Crimes"
p.xaxis.ticker = list(range(24)) 

hover = HoverTool(tooltips=[
    ("Hour", "@Hour"),
    ("Count", "@Count"),

])
p.add_tools(hover)

show(p)


In [12]:
from bokeh.models import ColumnDataSource, CustomJS, MultiSelect
from bokeh.plotting import figure, show, output_notebook
from bokeh.layouts import column
import pandas as pd

# Enable Bokeh output in the notebook
output_notebook()

# Define the days in a desired order
days_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

# Create an aggregated dictionary:
# For each hour (0-23) we compute the count of Drug/Narcotic crimes per day
data_dict = {"Hour": list(range(24))}
for day in days_order:
    counts = []
    for hour in range(24):
         count = len(df_drug[(df_drug['Day of Week'] == day) & (df_drug['Hour'] == hour)])
         counts.append(count)
    data_dict[day] = counts

# Create a full data source holding counts for every day and hour
full_source = ColumnDataSource(data=data_dict)

# Compute initial aggregated counts by summing across all days (default: all days selected)
initial_counts = []
for i in range(24):
    total = sum(data_dict[day][i] for day in days_order)
    initial_counts.append(total)

# Create an aggregated data source for the plot
agg_source = ColumnDataSource(data={"Hour": list(range(24)), "Count": initial_counts})

# Create a MultiSelect widget for day selection, defaulting to all days
day_options = [(day, day) for day in days_order]
multi_select = MultiSelect(title="Select Days", value=days_order, options=day_options)

# Define a CustomJS callback to update the aggregated counts based on selected days
callback = CustomJS(args=dict(full_source=full_source, source=agg_source, multi_select=multi_select), code="""
    var selected_days = multi_select.value;
    var full_data = full_source.data;
    var new_counts = [];
    for (var i = 0; i < full_data['Hour'].length; i++) {
        var sum_val = 0;
        for (var j = 0; j < selected_days.length; j++) {
            var day = selected_days[j];
            sum_val += full_data[day][i];
        }
        new_counts.push(sum_val);
    }
    source.data['Count'] = new_counts;
    source.change.emit();
""")
multi_select.js_on_change('value', callback)

# Create a Bokeh figure for the vbar plot
p = figure(
    width=700,
    height=400,
    title="Drug/Narcotic Crimes by Hour (Selected Days)",
    x_range=(-0.5, 23.5),
    toolbar_location="above",
    tools="pan,wheel_zoom,box_zoom,reset"
)

p.vbar(x='Hour', top='Count', width=0.9, source=agg_source, fill_color='lavender',
    line_color='purple')
p.xaxis.axis_label = "Hour (24-hour format)"
p.yaxis.axis_label = "Number of Crimes"
p.xaxis.ticker = list(range(24))

# Combine the widget and plot into a layout and display them
layout = column(multi_select, p)
show(layout)


In [17]:
import pandas as pd
import os
from bokeh.models import ColumnDataSource, FactorRange, HoverTool
from bokeh.plotting import figure, show, output_notebook
from bokeh.transform import dodge
from bokeh.palettes import Category10

# Assuming df has been loaded and filtered, and 2025 data removed:
# data_path = os.path.abspath(os.path.join(os.pardir, "data"))
# cleaned_data_path = os.path.join(data_path, "Police_Department_Incident_Reports_Complete.csv")
# df = pd.read_csv(cleaned_data_path)
# df = df[df['Year'] != 2025]
# df_drug = df[df['Category'] == 'DRUG/NARCOTIC'].copy()

# Define the desired order for days
days_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

# Create a pivot table: rows=Hour, columns=Day of Week, values=count of crimes
df_pivot = df_drug.groupby(['Hour', 'Day of Week']).size().unstack(fill_value=0)

# Reorder columns to follow days_order (only include days present in the data)
df_pivot = df_pivot[[day for day in days_order if day in df_pivot.columns]]
df_pivot = df_pivot.reset_index()

# Convert Hour to string so that Bokeh treats it as a categorical factor.
df_pivot['Hour'] = df_pivot['Hour'].astype(str)

# Create a ColumnDataSource from the pivot DataFrame.
source = ColumnDataSource(df_pivot)

# Get the list of hours (as strings) to be used as the x-axis categories.
hours_as_str = list(df_pivot['Hour'])

# Create a figure with a FactorRange for the x-axis.
p = figure(
    x_range=FactorRange(*hours_as_str),
    width=900,
    height=500,
    title="Drug/Narcotic Crimes by Hour for Each Day",
    toolbar_location='right'
)

# Identify the day columns (all columns except "Hour")
day_columns = [col for col in df_pivot.columns if col != 'Hour']
colors = Category10[len(day_columns)]

# Compute a bar width that allows for side-by-side bars (using dodge)
bar_width = 0.8 / len(day_columns)

# Create a list to hold the renderers (needed for the hover tool)
renderers = []
for i, day in enumerate(day_columns):
    r = p.vbar(
        x=dodge('Hour', -0.4 + i * bar_width, range=p.x_range),
        top=day,
        width=bar_width,
        source=source,
        legend_label=day,
        color=colors[i % len(colors)],
        fill_alpha=0.5,
        muted_alpha=0.3,
        alpha=0.6,
        name=day  # Set the name to the day, used in hover tool
    )
    renderers.append(r)

# Format the axes
p.xaxis.axis_label = "Hour of the Day"
p.yaxis.axis_label = "Number of Crimes"

# Make the legend items clickable to hide/show the corresponding day's bars.
p.legend.click_policy = "hide"

# Display the plot in the notebook.
output_notebook()
show(p)


In [6]:
import pandas as pd
from bokeh.models import ColumnDataSource, FactorRange, HoverTool
from bokeh.plotting import figure, show, output_notebook
from bokeh.palettes import Category10, Category20

# Removing "OUT OF SF" police district
df_drug = df_drug[df_drug['Police District'] != 'OUT OF SF']

# 1. Create a pivot table with raw counts:
#    Rows = Hour, Columns = Police District
crime_counts = df_drug.groupby(['Hour', 'Police District']).size().unstack(fill_value=0)

# 2. Reset the index so that "Hour" becomes a column.
df_bokeh = crime_counts.reset_index()

# 3. Convert Hour to string so that Bokeh treats it as a categorical factor.
df_bokeh['Hour'] = df_bokeh['Hour'].astype(str)

# 4. Create a ColumnDataSource for Bokeh.
source = ColumnDataSource(df_bokeh)

# 5. Define the categorical x-axis using the Hour values.
hours_as_str = list(df_bokeh['Hour'])

# 6. Create the figure with a FactorRange for the x-axis.
p = figure(
    x_range=FactorRange(*hours_as_str),
    width=900,
    height=500,
    title="Drug/Narcotic Crime Counts by Hour for Police Districts",
    toolbar_location='right'
)

# 7. Identify the police district columns (all columns except "Hour")
district_columns = df_bokeh.columns.drop("Hour")
n_districts = len(district_columns)

# Choose an appropriate color palette.
if n_districts <= 10:
    palette = Category10[n_districts]
elif n_districts <= 20:
    palette = Category20[n_districts]
else:
    from bokeh.palettes import viridis
    palette = viridis(n_districts)

# 8. Overlay vbars for each police district.
#    All bars are drawn at the same x position (no dodge) so that they overlay.
renderers = []
for i, district in enumerate(district_columns):
    r = p.vbar(
        x='Hour',
        top=district,
        width=0.8,  # Full width so bars overlay exactly
        source=source,
        legend_label=district,
        color=palette[i],
        fill_alpha=0.5,  # Partial transparency allows overlaps to be seen
        muted_alpha=0.3,
        alpha=0.6,
        name=district  # Used for the hover tool
    )
    renderers.append(r)
    
    # Add a dedicated HoverTool for this district that always shows the full hourly count for that district.
    # It explicitly references the column for that district, so the value doesn't vary.
    hover = HoverTool(renderers=[r], tooltips=[
        ("District", district),
        ("Hour", "@Hour"),
        ("Hourly Count", "@" + district)
    ])
    p.add_tools(hover)

# 9. Enable clickable legend items to hide/show individual district bars.
p.legend.click_policy = "hide"

# 10. Format the axes.
p.xaxis.axis_label = "Hour of the Day"
p.yaxis.axis_label = "Crime Count"

# 11. Display the plot.
output_notebook()
show(p)


In [20]:
import pandas as pd
from bokeh.models import ColumnDataSource, FactorRange, HoverTool, MultiSelect, CustomJS
from bokeh.plotting import figure
from bokeh.palettes import Category10, Category20
from bokeh.layouts import column
from bokeh.io import output_file, save

# --- Data Preparation ---
# (Assuming df_drug is defined; here we filter out the "OUT OF SF" district)
df_drug_filtered = df_drug[df_drug['Police District'] != 'OUT OF SF'].copy()

# For consistency, get all hours (as sorted strings) and all police districts from the full data.
all_hours = sorted(df_drug_filtered['Hour'].unique())
all_hours_str = [str(h) for h in all_hours]

# Get the union of police districts (from the full data pivot)
full_pivot = df_drug_filtered.groupby(['Hour', 'Police District']).size().unstack(fill_value=0)
districts = list(full_pivot.columns)

# Get the unique years (as integers) and sort them.
years = sorted(df_drug_filtered['Year'].unique())

# Precompute, for each year, a pivot table (with Hour as a column and one column per district).
# We reindex so that every pivot has the same set of hours and districts.
pivot_dict = {}
for yr in years:
    pivot = (df_drug_filtered[df_drug_filtered['Year'] == yr]
             .groupby(['Hour', 'Police District']).size().unstack(fill_value=0))
    # Reindex rows (hours) and columns (districts) so that they are consistent.
    pivot = pivot.reindex(all_hours, fill_value=0)
    pivot = pivot.reindex(columns=districts, fill_value=0)
    pivot = pivot.reset_index()
    pivot['Hour'] = pivot['Hour'].astype(str)
    # Store as a dictionary of lists.
    pivot_dict[str(yr)] = pivot.to_dict(orient='list')

# Compute the initial pivot table (using all years summed together)
initial_pivot = (df_drug_filtered
                 .groupby(['Hour', 'Police District']).size().unstack(fill_value=0))
initial_pivot = initial_pivot.reindex(all_hours, fill_value=0)
initial_pivot = initial_pivot.reindex(columns=districts, fill_value=0)
initial_pivot = initial_pivot.reset_index()
initial_pivot['Hour'] = initial_pivot['Hour'].astype(str)

# Create the ColumnDataSource from the initial pivot.
source = ColumnDataSource(initial_pivot)

# --- Create the Figure ---
p = figure(
    x_range=FactorRange(*all_hours_str),
    width=900,
    height=500,
    title="Drug/Narcotic Crime Counts by Hour for Police Districts",
    toolbar_location='above'
)

# Choose a color palette based on the number of districts.
n_districts = len(districts)
if n_districts <= 10:
    palette = Category10[n_districts]
elif n_districts <= 20:
    palette = Category20[n_districts]
else:
    from bokeh.palettes import viridis
    palette = viridis(n_districts)

# Overlay vbars for each police district (bars overlap at each hour).
renderers = []
for i, district in enumerate(districts):
    r = p.vbar(
        x='Hour',
        top=district,
        width=0.8,  # full width so that bars overlay
        source=source,
        legend_label=district,
        color=palette[i],
        fill_alpha=0.5,
        muted_alpha=0.3,
        alpha=0.6,
        name=district  # used in hover tooltip
    )
    renderers.append(r)
    # Add a HoverTool for this district that always shows its full hourly count.
    hover = HoverTool(renderers=[r], tooltips=[
        ("District", district),
        ("Hour", "@Hour"),
        ("Count", "@" + district)
    ])
    p.add_tools(hover)

p.legend.click_policy = "hide"
p.xaxis.axis_label = "Hour of the Day"
p.yaxis.axis_label = "Crime Count"

# --- Create MultiSelect Widget with a CustomJS Callback ---
# The MultiSelect will list all years (as strings)
year_options = [(str(yr), str(yr)) for yr in years]
multi_select = MultiSelect(title="Select Year(s)", value=[str(yr) for yr in years], options=year_options)

# The CustomJS callback uses the precomputed pivot_dict to sum data over the selected years.
callback = CustomJS(args=dict(source=source, pivot_dict=pivot_dict, x_range=p.x_range), code="""
    // Retrieve the selected years (as strings)
    var selected_years = cb_obj.value;
    
    // If no year is selected, clear the data.
    if(selected_years.length === 0) {
        source.data = {"Hour": []};
        x_range.factors = [];
        source.change.emit();
        return;
    }
    
    // Get reference pivot for the first selected year to determine number of hours.
    var ref = pivot_dict[selected_years[0]];
    var n = ref["Hour"].length;
    
    // Initialize new_data with "Hour" and all district keys.
    var new_data = {};
    new_data["Hour"] = ref["Hour"].slice();  // copy array of hours
    for (var key in ref) {
        if(key !== "Hour"){
            new_data[key] = new Array(n).fill(0);
        }
    }
    
    // For each selected year, add its counts to new_data.
    for (var i = 0; i < selected_years.length; i++){
        var yr = selected_years[i];
        var data = pivot_dict[yr];
        for (var key in data){
            if(key !== "Hour"){
                for(var j = 0; j < n; j++){
                    new_data[key][j] += data[key][j];
                }
            }
        }
    }
    
    // Update the ColumnDataSource and the x_range factors.
    source.data = new_data;
    x_range.factors = new_data["Hour"];
    source.change.emit();
""")
multi_select.js_on_change('value', callback)

# --- Layout ---
layout = column(multi_select, p)

# --- Save to HTML file ---
output_file("../plots/drug_crime_plot.html", title="Drug/Narcotic Crime Counts by Hour")
save(layout)

'/Users/paulabarho/Desktop/DTU/24_25/Term_2/SocDA/repo/02806_Social_Data/plots/drug_crime_plot.html'