## Visualization of San Fansisco Crime Dataset

In [11]:
import numpy as np
import pandas as pd 
import folium
# Folium is a python library wrapping the leaflet.js javascript library.
# Folium builds interactive maps with several kind of background tiles


In [12]:
df = pd.read_csv('san.csv')
df.head()

Unnamed: 0,IncidntNum,Category,Descript,DayOfWeek,Date,Time,PdDistrict,Resolution,Address,X,Y,Location,PdId
0,120058272,WEAPON LAWS,POSS OF PROHIBITED WEAPON,Friday,01/29/2016 12:00:00 AM,11:00,SOUTHERN,"ARREST, BOOKED",800 Block of BRYANT ST,-122.403405,37.775421,"(37.775420706711, -122.403404791479)",12005827212120
1,120058272,WEAPON LAWS,"FIREARM, LOADED, IN VEHICLE, POSSESSION OR USE",Friday,01/29/2016 12:00:00 AM,11:00,SOUTHERN,"ARREST, BOOKED",800 Block of BRYANT ST,-122.403405,37.775421,"(37.775420706711, -122.403404791479)",12005827212168
2,141059263,WARRANTS,WARRANT ARREST,Monday,04/25/2016 12:00:00 AM,14:59,BAYVIEW,"ARREST, BOOKED",KEITH ST / SHAFTER AV,-122.388856,37.729981,"(37.7299809672996, -122.388856204292)",14105926363010
3,160013662,NON-CRIMINAL,LOST PROPERTY,Tuesday,01/05/2016 12:00:00 AM,23:50,TENDERLOIN,NONE,JONES ST / OFARRELL ST,-122.412971,37.785788,"(37.7857883766888, -122.412970537591)",16001366271000
4,160002740,NON-CRIMINAL,LOST PROPERTY,Friday,01/01/2016 12:00:00 AM,00:30,MISSION,NONE,16TH ST / MISSION ST,-122.419672,37.76505,"(37.7650501214668, -122.419671780296)",16000274071000


In [13]:
df.shape

(150500, 13)

In [17]:
import ipywidgets as widgets # a slider widgets

style = {'description_width': 'initial'}

limit_case = widgets.IntSlider(
    value=1000, # default number of rows when running
    min=100, # min and max range value of the slider
    max=5000,
    step=1, # each value increment or decrement when slide 
    description='Max Number of Case:',
    disabled=False,
    style=style)

In [18]:
def update_df_length(limit):
    df = pd.read_csv('san.csv')
    df = df.iloc[0:limit, :]
    
    print("Number of rows in the dataset that have been successfully loaded:"+str(len(df)))


In [19]:
widgets.interactive(update_df_length, limit=limit_case)

interactive(children=(IntSlider(value=1000, description='Max Number of Case:', max=5000, min=100, style=Slider…

In [29]:
from ipywidgets import Layout 

df = pd.read_csv('san.csv')

unique_district = df.PdDistrict.unique()

# can pick more than one values of distinct
district = widgets.SelectMultiple( 
    options = unique_district.tolist(),
    value = ['BAYVIEW', 'NORTHERN'],
    description='District',
    disabled=False,
    layout = Layout(width='50%', height='80px')
)
# can select multiple when holding control
district

SelectMultiple(description='District', index=(1, 4), layout=Layout(height='80px', width='50%'), options=('SOUT…

In [30]:
# similarly, multiple-select widget for category of crime: df.Category

unique_cat = df.Category.unique()

# can pick more than one values of distinct
category = widgets.SelectMultiple( 
    options = unique_cat.tolist(),
    value = ['VANDALISM', 'ASSAULT', 'ROBBERY'],
    description='Criminal Case',
    disabled=False,
    layout = Layout(width='50%', height='80px', display='flex')
)
# can select multiple when holding control
category

SelectMultiple(description='Criminal Case', index=(14, 3, 9), layout=Layout(display='flex', height='80px', wid…

In [33]:
import matplotlib.pyplot as plt
from folium import plugins

def update_map(district, category, limit):
    
    df = pd.read_csv('san.csv')
    df = df.iloc[0:limit, :]
    
    latitude = 37.77
    longitude = -122.42
    
    df_dist = df.loc[df['PdDistrict'].isin(np.array(district))]
    df_category = df_dist.loc[df_dist['Category'].isin(np.array(category))]
    
    cat_unique = df_category['Category'].value_counts()
    cat_unique = cat_unique.reset_index()
    
    dist_unique = df_category['PdDistrict'].value_counts()
    dist_unique = dist_unique.reset_index()
    
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20,10))

    # create map and display it
    sanfran_map = folium.Map(location=[latitude, longitude], 
                             zoom_start=12)

    
    incidents = plugins.MarkerCluster().add_to(sanfran_map)

# loop through the dataframe and add each data point to the mark cluster
    for lat, lng, label, in zip(df_category.Y, 
                                df_category.X, 
                                df_category.Category):
        folium.Marker(
        location=[lat, lng],
        icon=None,
        popup=label,
        ).add_to(incidents)
# show map
    display(sanfran_map)
    
    ax1.bar(cat_unique['index'], 
            cat_unique['Category'])
    
    ax1.set_title('Amount of Criminal Case Based on Category')
    
    ax2.bar(dist_unique['index'], 
            dist_unique['PdDistrict'])
    
    ax2.set_title('Amount of Criminal Case in Selected District')
    
    plt.show()

In [32]:
widgets.interactive(update_map, 
                    district = district, 
                    category = category, 
                    limit=limit_case)

interactive(children=(SelectMultiple(description='District', index=(1, 4), layout=Layout(height='80px', width=…

Can also make it a distinct dashboard with Voila where Voilà turns Jupyter notebooks into standalone web applications.