In [139]:
import pandas as pd
import geopandas as gpd
%matplotlib inline
import matplotlib as mpl
import folium

In [140]:
mpl.style.use('ggplot')
df_san = pd.read_csv('Police_Department_Incidents_-_Previous_Year__2016_.csv')

category_counts = df_san['Category'].value_counts()

# Select the three rarest categories
rarest_categories = category_counts.nsmallest(3)

print("Three most rare crime categories:")
print(rarest_categories)  

rarest_data = df_san[df_san['Category'].isin(rarest_categories.index)]

# Create a map centered on San Francisco
sf_map = folium.Map(location=[37.7749, -122.4194], zoom_start=12)

# Define different icons for each category
icons = {
    rarest_categories.index[0]: 'cloud', # TRESPASSING
    rarest_categories.index[1]: 'flag',  # PORNOGRAPHY/OBSCENE MAT
    rarest_categories.index[2]: 'star'   # GAMBLING
}

# Add markers to the map for each crime incident
for index, row in rarest_data.iterrows():
    folium.Marker([row['Y'], row['X']],
                  icon=folium.Icon(icon=icons[row['Category']])).add_to(sf_map)

# Save the map to an HTML file
sf_map.save('map/sf_crime_map.html')

Three most rare crime categories:
Category
TREA                        3
PORNOGRAPHY/OBSCENE MAT     4
GAMBLING                   20
Name: count, dtype: int64


In [141]:
day_counts = df_san['DayOfWeek'].value_counts()

# Find the day with the highest number of crimes
most_crime_day = day_counts.idxmax()
print("Day with the most crimes recorded in the city:", most_crime_day)

# Filter data for the day with the most crimes
most_crime_data = df_san[df_san['DayOfWeek'] == most_crime_day]

# Create a map centered on San Francisco
sf_map = folium.Map(location=[37.7749, -122.4194], zoom_start=12)

# Add markers to the map for each crime incident
for index, row in most_crime_data.iterrows():
    folium.Marker([row['Y'], row['X']]).add_to(sf_map)

# Save the map to an HTML file
sf_map.save('map/most_crime_day_map.html')

Day with the most crimes recorded in the city: Friday


In [142]:
df_san['Datetime'] = pd.to_datetime(df_san['Date'] + ' ' + df_san['Time'], format='mixed')

# Extract day and hour from the datetime column
df_san['Day'] = df_san['Datetime'].dt.day_name()
df_san['Hour'] = df_san['Datetime'].dt.hour

# Find the day and hour with the most incidents
most_common_day = df_san['Day'].value_counts().idxmax()
most_common_hour = df_san['Hour'].value_counts().idxmax()
print("Most common day and hour when incidents occur:", most_common_day, most_common_hour)
print('categories', df_san['Category'].value_counts())

# Filter data for the most common day and hour
most_common_data = df_san[(df_san['Day'] == most_common_day) & (df_san['Hour'] == most_common_hour)]

# Create a map centered on San Francisco
sf_map = folium.Map(location=[37.7749, -122.4194], zoom_start=12)
# print("unique categories", most_common_data['Category'].unique()[:10])

# Define different icons for each category of incidents
icons = {
    category: folium.Icon(color=color) for category, color in zip(most_common_data['Category'].unique()[:10], ['cadetblue', 'gray', 'darkblue','red', 'blue', 'green', 'beige', 'orange', 'pink', 'purple'])
}

print("icons: ", icons.keys())
keys = most_common_data['Category'].unique()[:10]

# Add markers to the map for each incident
for index, row in most_common_data.iterrows():
    folium.Marker([row['Y'], row['X']]).add_to(sf_map)

# Save the map to an HTML file
sf_map.save('map/most_common_day_hour_map.html')

Most common day and hour when incidents occur: Friday 18
categories Category
LARCENY/THEFT                  40409
OTHER OFFENSES                 19599
NON-CRIMINAL                   17866
ASSAULT                        13577
VANDALISM                       8589
VEHICLE THEFT                   6419
WARRANTS                        5914
BURGLARY                        5802
SUSPICIOUS OCC                  5782
MISSING PERSON                  4338
DRUG/NARCOTIC                   4243
ROBBERY                         3299
FRAUD                           2635
SECONDARY CODES                 1841
TRESPASS                        1812
WEAPON LAWS                     1658
SEX OFFENSES, FORCIBLE           940
STOLEN PROPERTY                  882
RECOVERED VEHICLE                736
DISORDERLY CONDUCT               658
PROSTITUTION                     641
FORGERY/COUNTERFEITING           619
DRUNKENNESS                      465
DRIVING UNDER THE INFLUENCE      378
ARSON                            28

In [143]:
district_counts = df_san['PdDistrict'].value_counts().reset_index()
district_counts.columns = ['PdDistrict', 'Incidents']

# Read the shapefile into a GeoDataFrame
district_shapes = gpd.read_file('geo_bounds.shp')

district_shapes.crs = 'EPSG:4326'

# Merge GeoDataFrame with incident counts
district_data = district_shapes.merge(district_counts, left_index=True, right_index=True)

# Create a choropleth map
sf_map = folium.Map(location=[37.7749, -122.4194], zoom_start=12)

folium.Choropleth(
    geo_data=district_data,
    name='choropleth',
    data=district_data,
    columns=['PdDistrict', 'Incidents'],
    key_on='feature.properties.PdDistrict',
    fill_color='YlOrRd',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Number of Incidents'
).add_to(sf_map)

# Add a layer control to the map
folium.LayerControl().add_to(sf_map)

# Save the map to an HTML file
sf_map.save('map/police_districts_map.html')