## Bar chart of Prostitution over the years

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import bokeh

data = pd.read_csv("Police_Department_Incident_Reports__Historical_2003_to_May_2018_20240130.csv")
focuscrimes = ['WEAPON LAWS', 'PROSTITUTION', 'DRIVING UNDER THE INFLUENCE', 'ROBBERY', 
               'BURGLARY', 'ASSAULT', 'DRUNKENNESS', 'TRESPASS', 'LARCENY/THEFT', 
               'VANDALISM', 'VEHICLE THEFT', 'FRAUD', 'DRUG/NARCOTIC', 'DISORDERLY CONDUCT']
data["Date"] = pd.to_datetime(data["Date"])
data = data[(data["Date"] >= "01-01-2010") & (data["Date"] <= "31-12-2017")]
data["Time"] = pd.to_datetime(data["Time"])
data["Hour"] = data["Time"].dt.hour
data["Year"] = data["Date"].dt.year
data_grouped = data.groupby(by=["Year", "Category"]).size()

# Group by category and year and count the number of incidents
category_grouped_counts = data.groupby(['Category', 'Year']).size().reset_index(name='Counts')

# Calculate the total counts for each category over the entire period
total_counts_by_category = category_grouped_counts.groupby('Category')['Counts'].sum().reset_index(name='TotalCounts')

# Merge the total counts back to the yearly data
merged_data = category_grouped_counts.merge(total_counts_by_category, on='Category')

# Normalize the data by dividing the yearly counts by total counts for each category
merged_data['Normalized'] = merged_data['Counts'] / merged_data['TotalCounts']

# Pivot the table to have years as rows and categories as columns
normalized_pivot = merged_data.pivot(index='Year', columns='Category', values='Normalized').fillna(0)


  data = pd.read_csv("Police_Department_Incident_Reports__Historical_2003_to_May_2018_20240130.csv")
  data["Time"] = pd.to_datetime(data["Time"])


In [4]:
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, FactorRange, Legend
from bokeh.palettes import Spectral11
from bokeh.transform import linear_cmap, dodge
from bokeh.io import output_notebook


sorted_categories = normalized_pivot.reindex(sorted(normalized_pivot.columns), axis=1)
#sorted_categories = sorted_categories.reset_index()

sorted_names = sorted(normalized_pivot.columns)

sorted_categories.reset_index(inplace=True)

prostitution_data = sorted_categories[['Year', 'PROSTITUTION']].copy()
prostitution_data['Year'] = prostitution_data['Year'].astype(str)

# Create a ColumnDataSource for the bar chart
source = ColumnDataSource(data=prostitution_data)

color_mapper = linear_cmap(field_name='PROSTITUTION', palette=Spectral11, low=prostitution_data['PROSTITUTION'].min(), high=prostitution_data['PROSTITUTION'].max())

p = figure(x_range=FactorRange(*prostitution_data['Year']), height=350, title="Prostitution Incidents by Year")

p.vbar(x='Year', top='PROSTITUTION', source=source, width=0.8, color=color_mapper)
p.line(x='Year', y='PROSTITUTION', source=source, line_color='blue', line_width=2)
output_notebook()

p.xaxis.axis_label = 'Year'
p.yaxis.axis_label = 'Normalized Incidents'

show(p)


## Jitter plot of prostitution arrests by time and day of week

In [5]:
import pandas as pd
import matplotlib.pyplot as plt
from pandas.api.types import CategoricalDtype
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, FactorRange, Legend
from bokeh.palettes import Spectral11
from bokeh.transform import linear_cmap, jitter
from bokeh.io import output_notebook


data = pd.read_csv("Police_Department_Incident_Reports__Historical_2003_to_May_2018_20240130.csv")
focuscrimes = ['WEAPON LAWS', 'PROSTITUTION', 'DRIVING UNDER THE INFLUENCE', 'ROBBERY', 
               'BURGLARY', 'ASSAULT', 'DRUNKENNESS', 'TRESPASS', 'LARCENY/THEFT', 
               'VANDALISM', 'VEHICLE THEFT', 'FRAUD', 'DRUG/NARCOTIC', 'DISORDERLY CONDUCT']
data["Date"] = pd.to_datetime(data["Date"])
data = data[(data["Date"] >= "01-01-2010") & (data["Date"] <= "31-12-2017")]
data["Time"] = pd.to_datetime(data["Time"])

day_order = ["Monday","Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
cat_type = CategoricalDtype(categories=day_order, ordered=True)
data["DayOfWeek"] = data["DayOfWeek"].astype(cat_type)

data_grouped = data[data["Category"] == "PROSTITUTION"]

output_notebook()

source = ColumnDataSource(data_grouped)

p = figure(width=800, height=300, y_range=day_order, x_axis_type='datetime',
           title="Incidents by DayOfWeek - SF 2010-2017")

p.scatter(x='Time', y=jitter('DayOfWeek', width=0.6, range=p.y_range), source=source, alpha=0.3)

p.xaxis.formatter.days = '%Hh'
p.x_range.range_padding = 0
p.ygrid.grid_line_color = None

show(p)


  data = pd.read_csv("Police_Department_Incident_Reports__Historical_2003_to_May_2018_20240130.csv")
  data["Time"] = pd.to_datetime(data["Time"])


## Timeseries range plot of amount of prostitution crimes by date

In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pandas.api.types import CategoricalDtype
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, FactorRange, Legend,RangeTool
from bokeh.palettes import Spectral11
from bokeh.transform import linear_cmap, jitter
from bokeh.io import output_notebook
from bokeh.layouts import column

data = pd.read_csv("Police_Department_Incident_Reports__Historical_2003_to_May_2018_20240130.csv")
focuscrimes = ['WEAPON LAWS', 'PROSTITUTION', 'DRIVING UNDER THE INFLUENCE', 'ROBBERY', 
               'BURGLARY', 'ASSAULT', 'DRUNKENNESS', 'TRESPASS', 'LARCENY/THEFT', 
               'VANDALISM', 'VEHICLE THEFT', 'FRAUD', 'DRUG/NARCOTIC', 'DISORDERLY CONDUCT']
data["Date"] = pd.to_datetime(data["Date"], yearfirst = True)
data = data[(data["Date"] >= "01-01-2010") & (data["Date"] <= "31-12-2017")]
data["Time"] = pd.to_datetime(data["Time"])

data_grouped = data.groupby(by=["Date", "Category"]).size()

# Group by category and year and count the number of incidents
category_grouped_counts = data.groupby(['Category', 'Date']).size().reset_index(name='Counts')

# Calculate the total counts for each category over the entire period
total_counts_by_category = category_grouped_counts.groupby('Category')['Counts'].sum().reset_index(name='TotalCounts')

# Merge the total counts back to the yearly data
merged_data = category_grouped_counts.merge(total_counts_by_category, on='Category')

# Normalize the data by dividing the yearly counts by total counts for each category
merged_data['Normalized'] = merged_data['Counts'] / merged_data['TotalCounts']

# Pivot the table to have years as rows and categories as columns
normalized_pivot = merged_data.pivot(index='Date', columns='Category', values='Normalized').fillna(0)
sorted_categories = normalized_pivot.reindex(sorted(normalized_pivot.columns), axis=1)

dates = sorted_categories.index.values

sorted_categories.reset_index(inplace = True)

source = ColumnDataSource(data=dict(date=dates, count=sorted_categories["PROSTITUTION"]))

p = figure(height=300, width=800, tools="xpan", toolbar_location=None,
           x_axis_type="datetime", x_axis_location="above",
           background_fill_color="#efefef", x_range=(dates[1500], dates[2500]))

p.line('date', 'count', source=source)
p.yaxis.axis_label = 'Normalized Incidents'

select = figure(title="Drag the middle and edges of the selection box to change the range above",
                height=130, width=800, y_range=p.y_range,
                x_axis_type="datetime", y_axis_type=None,
                tools="", toolbar_location=None, background_fill_color="#efefef")

range_tool = RangeTool(x_range=p.x_range)
range_tool.overlay.fill_color = "navy"
range_tool.overlay.fill_alpha = 0.2

select.line('date', 'count', source=source)
select.ygrid.grid_line_color = None
select.add_tools(range_tool)
output_notebook()
show(column(p, select))

  data = pd.read_csv("Police_Department_Incident_Reports__Historical_2003_to_May_2018_20240130.csv")
  data["Time"] = pd.to_datetime(data["Time"])
