# Week 8 exercise

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import math
import folium
from folium import plugins
from folium.plugins import HeatMap


In [None]:
# Load data
df = pd.read_csv("/Users/Bruger/Desktop/Social Data Visualizations/socialdata2024/Police_Department_Incident_Reports__Historical_2003_to_May_2018_20240204.csv")

In [None]:
# Describe the data

df.head()
df.shape
df.info()
df.describe()
df.isnull().sum()

In [None]:
# Extracting the year
df['Date'] = pd.to_datetime(df['Date'], format='%m/%d/%Y')
# Remove 2018 as this year is not complete
df = df[df['Date'].dt.year != 2018]
df['Year'] = df['Date'].dt.year

In [None]:
NCategories = df["Category"].unique()

# Calculate the number of rows
nrows = math.ceil(len(NCategories) / 2)

fig = make_subplots(rows=nrows, cols=2, subplot_titles=NCategories)

for i, category in enumerate(NCategories, start=1):
    category_df = df[df['Category'] == category]
    bar_data = category_df['Year'].value_counts().reset_index()
    bar = go.Bar(x=bar_data['index'], y=bar_data['Year'], name=category)
    fig.add_trace(bar, row=(i+1)//2, col=i%2+1)

fig.update_layout(height=300*nrows, showlegend=False)
for i in range(1, len(NCategories) + 1):
    fig.update_xaxes(title_text="Year", row=(i+1)//2, col=i%2+1)
    fig.update_yaxes(title_text="Occurrences", row=(i+1)//2, col=i%2+1)

fig.show()

We want to look at forgery/counterfeiting. Because this category seems to be decreasing rapidly in recent years. This could be due to the fact that documents are being digitalized more and more and therefore, these are not as easily forged as they were in the past. 

We make a map of SF to show where the forgery/counterfeiting takes place.

In [None]:
randomdata = {
    'CENTRAL': 0.283805288999638,
    'SOUTHERN': 0.8882636532075772,
    'BAYVIEW': 0.45059924801053985,
    'MISSION': 0.6000904430914474,
    'PARK': 0.6362552416309091,
    'RICHMOND': 0.3371857964893169,
    'INGLESIDE': 0.09876749056377487,
    'TARAVAL': 0.009436215026031758,
    'NORTHERN': 0.44884916837512767,
    'TENDERLOIN': 0.06616710190569974
}

In [None]:
import requests
import plotly.express as px
import json

url = "https://raw.githubusercontent.com/suneman/socialdata2022/main/files/sfpd.geojson"
response = requests.get(url)

# Ensure the request was successful
if response.status_code == 200:
    with open("sfpd.geojson", 'wb') as f:
        f.write(response.content)
else:
    print("Failed to download the file.")

# Open the GeoJSON file and load it into a GeoJSON object
with open("sfpd.geojson") as f:
    geojson = json.load(f)

# Convert your GeoJSON object to a DataFrame
dfgeo = pd.json_normalize(geojson['features'])


In [None]:
# Crime category
forgery = df[df['Category'] == 'FORGERY/COUNTERFEITING']

# Aggregate data by police district
district_df = forgery.groupby(['PdDistrict'])['Category'].count()
district_df = district_df.to_frame()
district_df.reset_index(inplace=True)
district_df.rename(columns={'PdDistrict': 'Count'}, inplace=True)
print(district_df)

In [None]:
fig = px.choropleth_mapbox(district_df, geojson=geojson, locations='Count', color='Category',
                           color_continuous_scale="Bluered",
                           mapbox_style="carto-positron",
                           zoom=11, center = {"lat": 37.77, "lon": -122.4},
                           opacity=0.5,
                           labels={'PdDistrict':'District'}
                          )

fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})

fig.show()

Here we see that the most forgeries take place in the Southern district.

In [None]:
# Make a HeatMapWithTime for 'FORGERY/COUNTERFEITING'
from folium.plugins import HeatMapWithTime

# Delete rows where location is nan
df = df.dropna(subset=['X', 'Y', 'Time'])

# Convert 'Time' column to datetime format
df['Time'] = pd.to_datetime(df['Time'])

# Group by year and month
df['YearMonth'] = df['Time'].dt.to_period('M')

# Filter 'BAD CHECKS' category
forgery_df = df[df['Category'] == 'FORGERY/COUNTERFEITING']

# Prepare data for HeatMapWithTime
data = [
    [[row['Y'], row['X']] for index, row in forgery_df[forgery_df['YearMonth'] == t].iterrows()]
    for t in forgery_df['YearMonth'].sort_values().unique()
]

# Create map
m = folium.Map([forgery_df['Y'].mean(), forgery_df['X'].mean()], zoom_start=13)

# Create HeatMapWithTime and add it to the map
hmwt = HeatMapWithTime(data)
hmwt.add_to(m)

# Display map
m
