# **Part 2: Interactive visualizations with Bokeh**

### **Exercise: Recreate a new version of the results from Week 2 (with updated dates) as an interactive visualisation (shown in the gif). To complete the exercise, follow the steps below to create your own version of the dataviz.**

### Data prep

In [120]:
import pandas as pd

# Load the dataset
df = pd.read_csv('../Assignment-1/Data/SF_Crime_Data_Focus_Cleaned.csv', parse_dates=["Incident Date"])

# ðŸ”¹ Filter data for the period 2014-2024
df = df[(df["Incident Date"].dt.year >= 2014) & (df["Incident Date"].dt.year <= 2024)]

# ðŸ”¹ Select only the 10 focus crimes
focus_crimes = ["ASSAULT", "BURGLARY", "DRUG/NARCOTIC", "LARCENY/THEFT", "PROSTITUTION", 
                "ROBBERY", "STOLEN PROPERTY", "VANDALISM", "VEHICLE THEFT", "WEAPON LAWS"]
df = df[df["Incident Category"].isin(focus_crimes)]

# ðŸ”¹ Group by hour of the day and crime category, then count occurrences
crime_hourly = df.groupby(["Incident Time", "Incident Category"]).size().reset_index(name="Crime Count")

# ðŸ”¹ Normalize data: Divide each count by the total count of that crime category
crime_hourly["Normalized Count"] = crime_hourly.groupby("Incident Category")["Crime Count"].transform(lambda x: x / x.sum())

# ðŸ”¹ Reshape the data into the required format
crime_pivot = crime_hourly.pivot(index="Incident Time", columns="Incident Category", values="Normalized Count").fillna(0)

# ðŸ”¹ Rename index to match the "Hour" column in the screenshot
crime_pivot.index.name = "Hour"

# Display the final DataFrame
crime_pivot.head()


Incident Category,ASSAULT,BURGLARY,DRUG/NARCOTIC,LARCENY/THEFT,PROSTITUTION,ROBBERY,STOLEN PROPERTY,VANDALISM,VEHICLE THEFT,WEAPON LAWS
Hour,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,0.050554,0.054347,0.032283,0.043302,0.106449,0.047013,0.047853,0.054022,0.039672,0.053834
1,0.043975,0.036507,0.019766,0.023869,0.064103,0.047767,0.02783,0.034026,0.022168,0.041225
2,0.037867,0.047186,0.017192,0.015882,0.052059,0.045537,0.02783,0.031693,0.01834,0.035647
3,0.021019,0.056375,0.013493,0.012271,0.037685,0.028298,0.022541,0.027979,0.014722,0.027402
4,0.015047,0.05592,0.00867,0.009108,0.017483,0.019619,0.020904,0.022632,0.012083,0.019036


## Step 1: Convert Data for Bokeh

In [121]:
from bokeh.models import ColumnDataSource
from bokeh.palettes import Category10
from bokeh.io import output_notebook

# ðŸ”¹ Define colors (10 unique colors for 10 crimes)
colors = Category10[10]
color_map = {crime: colors[i] for i, crime in enumerate(focus_crimes)}

source = ColumnDataSource(crime_pivot)
output_notebook()  # Display plots inside Jupyter Notebook



## Create Bokeh Figure

In [122]:
from bokeh.plotting import figure, show
from bokeh.models import FactorRange, HoverTool, Legend, LegendItem
# ðŸ”¹ Define x-axis range (Hours 0-23 as categorical values)
hours = [str(i) for i in range(24)]  # Convert hours to strings for FactorRange

# ðŸ”¹ Define Colors
colors = Category10[len(focus_crimes)]
color_map = {crime: colors[i] for i, crime in enumerate(focus_crimes)}

# ðŸ”¹ Create Bokeh Figure
p = figure(
    title="Hour-by-Hour Probabilities", 
    x_range=FactorRange(*hours),  # Ensuring categorical x-axis
    y_axis_label="Relative Frequency", 
    x_axis_label="Hour of Day (0-23)",
    height=500, width=800,
    tools="pan,wheel_zoom,box_zoom,reset"
)


## Display plot 

In [152]:
from bokeh.palettes import Category10
from bokeh.transform import dodge


# ðŸ”¹ Define x-axis range (Hours 0-23 as categorical values)
hours = [str(i) for i in range(24)]  # Convert hours to strings for FactorRange

# ðŸ”¹ Define Colors
colors = Category10[len(focus_crimes)]
color_map = {crime: colors[i] for i, crime in enumerate(focus_crimes)}

# ðŸ”¹ Create Bokeh Figure
p = figure(
    title="Hour-by-Hour Probabilities", 
    x_range=FactorRange(*hours),  # Ensuring categorical x-axis
    y_axis_label="Relative Frequency", 
    x_axis_label="Hour of Day (0-23)",
    height=500, width=900, 
    tools="pan,wheel_zoom,box_zoom,reset"
)

# ðŸ”¹ Add Bars (vbar) for Each Crime Category & Collect Legend Items
bars = {}
legend_items = []
for crime in focus_crimes:
    if crime in crime_pivot.columns:  # Ensure the crime exists in the DataFrame
        bar = p.vbar(
            x=dodge("Hour", 0.5, range=p.x_range),
            top=crime,  # Y-axis is the crime category column
            source=source,  # Data source
            width=0.8,  # Bar width
            color=color_map[crime],  # Assign color
            muted=True,  # Allow muting
            muted_alpha=0.1  # Reduce opacity when muted
        )
        bars[crime] = bar
        legend_items.append(LegendItem(label=crime, renderers=[bar]))  # Collect legend items

# ðŸ”¹ Create a Legend and Move It Outside the Chart
legend = Legend(items=legend_items, location="center")
p.add_layout(legend, "left")  # Moves the legend to the right

# ðŸ”¹ Add Hover Tool
hover = HoverTool(tooltips=[("Hour", "@Hour"), ("Crime Probability", "$y")])
p.add_tools(hover)

# ðŸ”¹ Enable Interactive Legend
p.legend.click_policy = "mute"

# ðŸ”¹ Show Plot
show(p)
