In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import plotly.graph_objects as go
from bokeh.plotting import figure, show, output_file
from bokeh.io import output_notebook
from bokeh.palettes import HighContrast3
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.embed import components
from bokeh.resources import CDN
import folium
from folium.plugins import HeatMap
from folium import plugins


In [2]:


def dataframes(focuscrimes=None):
    '''
    This function reads the data from the two csv files and combines them into one dataframe.
    It then filters the data based on the focus crimes and returns the filtered dataframe.
    If no focus crimes are provided, the function returns the entire dataframe including all crimes. 
    '''
    df_present = pd.read_csv('PD_SF_2018_2024.csv', usecols=["Incident Category", "Incident Datetime", "Latitude", "Longitude"])
    df_past = pd.read_csv("PD_SF_2003_2018.csv", usecols=["Category", "Date", "Time", "Y", "X"])
    df_present['Incident Datetime'] = pd.to_datetime(df_present['Incident Datetime'])
    # df_past['Incident Datetime'] = df_past['Date'] + ' ' + df_past['Time']
    # df_past['Incident Datetime'] = pd.to_datetime(df_past['Incident Datetime'])
    df_past['Incident Datetime'] = pd.to_datetime(df_past['Date'] + ' ' + df_past['Time'],
    errors='coerce')

   
    df_past.rename(columns={"Category": "Incident Category", 'X': 'Longitude', 'Y': 'Latitude'}, inplace=True)
    df_past.index = df_past['Incident Datetime']
    df_present.index = df_present['Incident Datetime']


    df_past.sort_index(inplace=True)
    df_present.sort_index(inplace=True)

    df_past = df_past.loc[:'2018-01-01']
    df_present = df_present.loc['2018-01-01':]
    #To create consistency with the focus crimes, all crimes are set the upper case letters
    df_present['Incident Category'] = df_present['Incident Category'].str.upper()
    
    #Combining the two data set vertically 
    df = pd.concat([df_past, df_present], axis=0)
    df = df.drop(columns=['Date', 'Time'])
    # Standardize crime categories
    crime_rename_map = {
        "LARCENY THEFT": "LARCENY/THEFT",
        "MOTOR VEHICLE THEFT": "VEHICLE THEFT",
        "MOTOR VEHICLE THEFT": "VEHICLE THEFT",
        "MALICIOUS MISCHIEF": "VANDALISM",
        "WEAPONS OFFENCE": "WEAPON LAWS", 
        "WEAPONS OFFENSE": "WEAPON LAWS", 
        "WEAPONS CARRYING ET": "WEAPON LAWS", 
        "DRUG NARCOTIC": "DRUG/NARCOTIC",
        "DRUG VIOLATION": "DRUG/NARCOTIC",
        "DRUG OFFENSE": "DRUG/NARCOTIC",
        'DISORDERLY CONDUCT': 'DRUNKENNESS'
    }
    df["Incident Category"] = df["Incident Category"].replace(crime_rename_map)
    

    #Providing columns with different times will be used in the tasks
    df['Year'] = df.index.year
    df['Month'] = df.index.month
    df['Day'] = df.index.day
    df['Hour'] = df.index.hour
    df['Minute'] = df.index.minute
    df['YearMonth'] = df.index.to_period('M')
    # make  df['YearMonth'] datetime
    df['YearMonth'] = df['YearMonth'].dt.to_timestamp()
    

    if focuscrimes:

        df_focuscrimes = df[df['Incident Category'].isin(focuscrimes)]
        return df_focuscrimes
    else:
        return df



In [3]:
focuscrimes = set(['OFFENCES AGAINST THE FAMILY AND CHILDREN', 'DRUG/NARCOTIC', 'DRUNKENNESS'])

df = dataframes() #full dataset
df_focuscrimes = dataframes(focuscrimes) #dataset with only focuscrimes

# Filter data for years 2014 to 2024
df_filtered = df_focuscrimes[(df_focuscrimes['Year'] >= 2018) & (df_focuscrimes['Year'] <= 2024)]


In [4]:
df_cleaned = df_filtered.dropna(subset=['Latitude', 'Longitude'])

In [7]:
from plotly.subplots import make_subplots
DRUNKENNESS_df = df_filtered[df_filtered['Incident Category'] == 'DRUNKENNESS']
DRUG_NARCOTIC_df = df_filtered[df_filtered['Incident Category'] == 'DRUG/NARCOTIC']
OFFENCES_AGAINST_THE_FAMILY_AND_CHILDREN_df = df_filtered[df_filtered['Incident Category'] == 'OFFENCES AGAINST THE FAMILY AND CHILDREN']
fig = make_subplots(rows=1, cols=3, subplot_titles=['DRUNKENNESS', 'DRUG/NARCOTIC', 'OFFENCES AGAINST THE FAMILY AND CHILDREN'], specs=[[{'type': 'polar'}, {'type': 'polar'}, {'type': 'polar'}]])

# Function to create polar bar plot for a given dataframe and subplot position
def add_polar_bar(df, row, col, title):
    hourly_counts = df.groupby(df.index.hour).size()
    values = hourly_counts.reindex(range(24), fill_value=0)  # Ensure all 24 hours are included
    angles = np.linspace(0, 360, 24, endpoint=False)  # 24-hour cycle in degrees

    fig.add_trace(go.Barpolar(
        r=values,
        theta=angles+7,
        width=[15] * 24,  # Adjust bar width as needed
        marker_color='lightblue',  # Lighter color
        marker_line_color='darkblue',
        marker_line_width=0.5,
        opacity=1,
        name=title
    ), row=row, col=col)
    fig.update_polars(
        radialaxis=dict(
            visible=True,
            range=[0, values.max() + 50],
            tickangle=90,  # <-- Force label angle to be horizontal
            tickfont=dict(size=14, family='Times New Roman black')       
        ),
        angularaxis=dict(
            tickmode='array',
            tickvals=angles,
            ticktext=[f"{h}:00" for h in range(24)],
            direction="clockwise",
            rotation=90
        ),
        row=row,
        col=col
    )

# Add polar bar plots for each dataframe
add_polar_bar(DRUNKENNESS_df, 1, 1, 'DRUNKENNESS')
add_polar_bar(DRUG_NARCOTIC_df, 1, 2, 'DRUG/NARCOTIC')
add_polar_bar(OFFENCES_AGAINST_THE_FAMILY_AND_CHILDREN_df, 1, 3, 'OFFENCES AGAINST THE FAMILY AND CHILDREN')

# Customize layout
fig.update_layout(
    showlegend=False,
    font=dict(family="Times New Roman"),
    annotations=[
        dict(
            text='DRUNKENNESS',
            x=0.15,
            y=1.05,
            xref='paper',
            yref='paper',
            showarrow=False,
            font=dict(size=16)
        ),
        dict(
            text='DRUG/NARCOTIC',
            x=0.51,
            y=1.05,
            xref='paper',
            yref='paper',
            showarrow=False,
            font=dict(size=16)
        ),
        dict(
            text='OFFENCES AGAINST THE FAMILY AND CHILDREN',
            x=0.87,
            y=1.05,
            xref='paper',
            yref='paper',
            showarrow=False,
            font=dict(size=16)
        )
    ],
    polar=dict(domain=dict(x=[0.0, 0.3], y=[0.0, 1.0])),
    polar2=dict(domain=dict(x=[0.35, 0.65], y=[0.0, 1.0])),
    polar3=dict(domain=dict(x=[0.7, 1.0], y=[0.0, 1.0]))
)

# Show interactive plot
fig.show()
fig.write_image("polar_subplots.png", width=1200, height=500)


Map

In [21]:
import plotly.express as px
import pandas as pd

# Step 1: Filter the data
categories = [
    "OFFENCES AGAINST THE FAMILY AND CHILDREN",
    "DRUNKENNESS",
    "DRUG/NARCOTIC"
]

# Ensure Year column exists
df_filtered = df_cleaned[df_cleaned["Incident Category"].isin(categories)].copy()
df_filtered = df_filtered.dropna(subset=["Latitude", "Longitude"])

# Step 2: Create animated scatter map
fig = px.scatter_mapbox(
    df_filtered,
    lat="Latitude",
    lon="Longitude",
    color="Incident Category",
    animation_frame="Year",
    hover_name="Incident Category",
    zoom=11,
    height=700,
    center={"lat": 37.77919, "lon": -122.41914},
    title="San Francisco Crime Incidents Over Time"
)

# Step 3: Style the map and legend
fig.update_layout(
    mapbox_style="carto-positron",
    margin={"r":0, "t":40, "l":0, "b":0},
    title_x=0.5,
    legend=dict(
        title="Crime Category",
        orientation="v",
        x=0.99,
        y=0.99,
        xanchor="right",
        yanchor="top",
        bgcolor="rgba(0,0,0,0)",     # Transparent background
        bordercolor="rgba(0,0,0,0)", # Transparent border
        font=dict(size=12)
    )
)

# Step 4: Export and show
fig.write_html("crime_animation_map.html")
fig.show()


In [18]:
import pandas as pd
from bokeh.plotting import figure, show, output_file
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.palettes import Category20
from bokeh.embed import components
from bokeh.resources import CDN
from bokeh.io import output_notebook
output_notebook()

# --- Step 1: Prepare data ---
df = df_cleaned.copy()

# Top 10 repeated lat/lon
top_locations = (
    df.groupby(["Latitude", "Longitude"])
    .size()
    .reset_index(name="Total")
    .sort_values("Total", ascending=False)
    .head(10)
)

df_top = df.merge(top_locations[["Latitude", "Longitude"]], on=["Latitude", "Longitude"])
df_top["Location"] = df_top.apply(lambda row: f"{row['Latitude']:.5f}, {row['Longitude']:.5f}", axis=1)

# Group by Location and Year
grouped = df_top.groupby(["Location", "Year"]).size().unstack(fill_value=0)
years = grouped.columns.tolist()
year_strs = [str(y) for y in years]
grouped.columns = year_strs
grouped = grouped.reset_index()

locations = grouped["Location"].tolist()
source = ColumnDataSource(grouped)

# --- Step 2: Create the figure ---
colors = Category20[len(years)] if len(years) <= 20 else Category20[20]

p = figure(
    x_range=locations,
    height=500,
    width=1000,
    title="Top 10 Repeated Locations (Overlayed by Year)",
    toolbar_location=None,
    tools="hover",
)

# Hover tool
hover = p.select_one(HoverTool)
hover.tooltips = [("Location", "@Location")] + [(y, f"@{y}") for y in year_strs]

# --- Overlay bars: each year starts at 0 ---
for i, year in enumerate(year_strs):
    p.vbar(
        x="Location",
        top=year,
        width=0.8,
        fill_color=colors[i],
        line_color="white",
        source=source,
        legend_label=year,
        muted_alpha=0.05,
        muted=True,
        name=year
    )

# --- Styling ---
p.xaxis.axis_label = "Location (Latitude, Longitude)"
p.yaxis.axis_label = "Incident Count"
p.xaxis.major_label_orientation = 1.0
p.y_range.start = 0
p.xgrid.grid_line_color = None

p.legend.location = "top_left"
p.legend.orientation = "vertical"
p.legend.click_policy = "mute"
p.legend.label_text_font_size = "8pt"

# --- Output ---
output_file("overlay_crime_by_location.html")
show(p)

# Save as embeddable HTML
script, div = components(p)
with open("bokeh_overlay_plot.html", "w") as f:
    f.write(f"""
    <!DOCTYPE html>
    <html>
    <head>
    {CDN.render()}
    </head>
    <body>
    {div}
    {script}
    </body>
    </html>
    """)
