# 1 Housekeeping

## 1.1 Load libraries

In [23]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import geojson
import mapbox
from shapely.geometry import MultiPoint, MultiLineString, MultiPolygon
from branca.element import Figure
import folium
import missingno as msno
from datetime import datetime

In [24]:
pd.set_option("display.max_columns", 100)
pd.set_option("display.max_rows", 100)

## 1.2 Load data

### CBS theft

In [82]:
df = pd.read_csv(r'D:\Jupyter Notebooks\cbs-diefstal\data\cbs_diefstallen.csv', sep = ';', na_values ='       .')
df = df[df['GebruikVanGeweld']== 'T001540']
df = df.drop('GebruikVanGeweld', axis=1)
df = df[df['SoortDiefstal']!= 'CRI1134' ]
df['Perioden'] = df['Perioden'].str.split('JJ00').str[0].astype('int')

df = df.rename(columns = {'TotaalGeregistreerdeDiefstallen_1': 'count', 'GeregistreerdeDiefstallenPer1000Inw_3' : 'Rel_diefstal',
                         'Perioden': 'year'})
df = df.drop('Rel_diefstal', axis=1)

In [83]:
df['SoortDiefstal'] = np.where(df['SoortDiefstal']== 'CRI1122', 'bike', df['SoortDiefstal']  )
df['SoortDiefstal'] = np.where(df['SoortDiefstal']== 'CRI1123', 'moped', df['SoortDiefstal']  )

df['SoortDiefstal'] = np.where(df['SoortDiefstal']== 'CRI1124', 'motorcycle/scooter', df['SoortDiefstal']  )
df['SoortDiefstal'] = np.where(df['SoortDiefstal']== 'CRI1125', 'car', df['SoortDiefstal']  )

df['SoortDiefstal'] = np.where(df['SoortDiefstal']== 'CRI1127', 'boat', df['SoortDiefstal']  )
#df['SoortDiefstal'] = np.where(df['SoortDiefstal']== 'CRI1128', 'Diefstal uit/vanaf personenauto', df['SoortDiefstal']  )

df['SoortDiefstal'] = np.where(df['SoortDiefstal']== 'CRI1131', 'animal', df['SoortDiefstal']  )
df['SoortDiefstal'] = np.where(df['SoortDiefstal']== 'CRI1132', 'street robbery', df['SoortDiefstal']  )

df['SoortDiefstal'] = np.where(df['SoortDiefstal']== 'CRI1133', 'pickpocketing', df['SoortDiefstal']  )
df['SoortDiefstal'] = np.where(df['SoortDiefstal']== 'CRI1135', 'home burglary', df['SoortDiefstal'] )

df['SoortDiefstal'] = np.where(df['SoortDiefstal']== 'CRI1137', 'shoplifting', df['SoortDiefstal']  )
df['SoortDiefstal'] = np.where(df['SoortDiefstal']== 'CRI1144', 'heist', df['SoortDiefstal'] )

df = df[df['SoortDiefstal']!= 'CRI1128']

In [84]:
df.head()

Unnamed: 0,ID,SoortDiefstal,RegioS,year,count
0,6116,bike,GM1680,2018,25.0
1,6117,bike,GM1680,2019,50.0
2,6118,bike,GM1680,2020,20.0
3,6119,bike,GM1680,2021,30.0
4,6128,bike,GM0738,2018,5.0


### Gemeentes

In [85]:
gemeentes_2018= pd.read_csv(r'D:\Jupyter Notebooks\cbs-diefstal\data\gemeente_2018.csv')
gemeentes_2019= pd.read_csv(r'D:\Jupyter Notebooks\cbs-diefstal\data\gemeente_2019.csv')
gemeentes_2020= pd.read_csv(r'D:\Jupyter Notebooks\cbs-diefstal\data\gemeente_2020.csv')
gemeentes_2021= pd.read_csv(r'D:\Jupyter Notebooks\cbs-diefstal\data\gemeente_2021.csv')

#merge
gemeentes = gemeentes_2018.append(gemeentes_2019).append(gemeentes_2020).append(gemeentes_2021)
gemeentes = gemeentes.drop_duplicates(subset=['Gemeentecode', 'GemeentecodeGM', 'Gemeentenaam'], keep='last')
gemeentes.head()


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



Unnamed: 0,Gemeentecode,GemeentecodeGM,Gemeentenaam,Provinciecode,ProvinciecodePV,Provincienaam
1,738,GM0738,Aalburg,30,PV30,Noord-Brabant
25,5,GM0005,Bedum,20,PV20,Groningen
40,585,GM0585,Binnenmaas,28,PV28,Zuid-Holland
46,9,GM0009,Ten Boer,20,PV20,Groningen
64,611,GM0611,Cromstrijen,28,PV28,Zuid-Holland


### Geojson

In [86]:
#html_link
gemeente_json_html = f"https://gis.vng.nl/v2/assets/geojson/g2020.geojson" 

#geojson
gemeente_json = geojson.load(open('geojson_gemeente_2020.geojson', 'r'))
gemeente_json ['features'][1]


{"geometry": {"coordinates": [[[4.14489, 51.931409], [4.135763, 51.906156], [4.155279, 51.881603], [4.117245, 51.857958], [4.067286, 51.845403], [4.030059, 51.884463], [4.05306, 51.914742], [4.038961, 51.923734], [3.992016, 51.915495], [4.065845, 51.932619], [4.14489, 51.931409]]], "type": "Polygon"}, "properties": {"code": "0614", "fid": 2, "gemeentena": "Westvoorne"}, "type": "Feature"}

In [87]:
#create id in geojson
for feature in gemeente_json['features']:
    feature['id'] = feature['properties']['code']

### Merge and pivot

In [88]:
data = df.merge(gemeentes[['GemeentecodeGM', 'Gemeentenaam', 'Provincienaam']], how = 'inner', left_on = 'RegioS', right_on = 'GemeentecodeGM')
data = data[['ID', 'Provincienaam', 'GemeentecodeGM',  'Gemeentenaam', 'SoortDiefstal',  'year', 'count']]
data.head()

Unnamed: 0,ID,Provincienaam,GemeentecodeGM,Gemeentenaam,SoortDiefstal,year,count
0,6116,Drenthe,GM1680,Aa en Hunze,bike,2018,25.0
1,6117,Drenthe,GM1680,Aa en Hunze,bike,2019,50.0
2,6118,Drenthe,GM1680,Aa en Hunze,bike,2020,20.0
3,6119,Drenthe,GM1680,Aa en Hunze,bike,2021,30.0
4,11996,Drenthe,GM1680,Aa en Hunze,moped,2018,0.0


In [89]:
#create pivot
data_pivot = pd.pivot_table(data.drop('ID', axis=1) , index = ['Provincienaam', 'GemeentecodeGM',  'Gemeentenaam', 'year'], 
                            columns = 'SoortDiefstal',
                           aggfunc = np.mean,
                           fill_value = np.nan)

#create list of multi-level column names
mi = data_pivot.columns
mi.tolist()


#only keep the second element in the list
ind = pd.Index( [e[0] + "_" + e[1] for e in mi.tolist()])

#set the list as the column title
data_pivot.columns = ind

#flatten multlevel index
data_pivot = data_pivot.rename_axis(['Provincienaam', 'GemeentecodeGM',  'Gemeentenaam', 'year']).reset_index()

#create id
data_pivot['id'] = data_pivot['GemeentecodeGM'].str.split('GM').str[1]

#lowercase
data_pivot.columns = [x.lower() for x in data_pivot.columns]

#rename
data_pivot = data_pivot.rename(columns = {'provincienaam': 'province', 'gemeentecodegm' : 'municipality_id', 'gemeentenaam': 'municipality'})

data_pivot.head()

Unnamed: 0,province,municipality_id,municipality,year,count_animal,count_bike,count_boat,count_car,count_heist,count_moped,count_motorcycle/scooter,count_pickpocketing,count_shoplifting,count_street robbery,id
0,Drenthe,GM0106,Assen,2018,5,375,0,15,5,35,5,20,135,15,106
1,Drenthe,GM0106,Assen,2019,0,410,0,10,0,55,15,25,235,5,106
2,Drenthe,GM0106,Assen,2020,5,275,0,5,0,55,10,15,170,10,106
3,Drenthe,GM0106,Assen,2021,0,385,0,5,0,40,5,20,110,5,106
4,Drenthe,GM0109,Coevorden,2018,0,110,0,15,0,15,0,5,20,0,109


# 2 EDA

In [90]:
data_pivot[data_pivot['year']==2020].nlargest(10, 'count_boat')['municipality']


795            Amsterdam
787              Alkmaar
1040              Zwolle
93            Leeuwarden
109       Smallingerland
133      Súdwest-Fryslân
137     De Fryske Marren
458             Roermond
779               Altena
783             Aalsmeer
Name: municipality, dtype: object

#### Barchart

In [91]:
bar = data_pivot.copy()
bar['year'] = bar['year'].astype('str')
bar = bar.groupby('year')['count_bike'].sum().reset_index()


timestamp = datetime.now()

fig = px.bar(data_frame = bar, 
             x = 'year',
             y = 'count_bike',
            title = 'Total bike theft by year',
            color = 'year',
            color_discrete_map = {'2018': 'rgb(0,0,128)', '2019': 'rgb(235,207,52)'},
            height = 550)

fig.update_layout({ 'title': {'text': 'Total bike thefts in The Netherlands by year'},
                   'xaxis': {'title': {'text' : 'Year'}} ,
                   'annotations': [{"text": f"This graph was generated at {timestamp}",
                                  'showarrow': False, 'x': 0.5, 'y' : 1.1, 'xref' : 'paper', 'yref': 'paper'}]
                  })

#or
#fig.update_xaxes(title_text = 'Years')
fig.update_yaxes(title_text = 'Bike thefts')

fig.show()

#### Button to switch plottypes

In [92]:
# Create the buttons
my_buttons = [{'label': "Bar plot", 
               'method': "update",
               'args': [{"type": "bar"}]},
{'label': "scatterplot", 
 'method': "update", 
 'args': [{"type": "scatter", 'mode': 'markers'}]}]


# Add buttons to the plot and show
fig.update_layout({
    'updatemenus': [{
      'type': "buttons",'direction': 'down',
      'x': 1.3,'y': 0.5,
      'showactive': True,'active': 0,
      'buttons': my_buttons}]})
fig.show()



In [93]:
bar = data_pivot.copy()
bar['year'] = bar['year'].astype('str')
bar = bar.groupby('year')['count_bike'].sum().reset_index()

my_scale = ['rgb(255, 0, 0)', 'rgb(3, 252, 40)']
fig = px.bar(data_frame = bar, 
             x = 'year',
             y = 'count_bike',
            title = 'Total bike theft by year',
            color = 'count_bike',
            #color_continuous_scale = 'inferno', 
             color_continuous_scale = my_scale)

fig.update_layout({ 'title': {'text': 'Total bike thefts in The Netherlands by year'}})
fig.show()

In [95]:
# Create the basic figure
fig = go.Figure()

# Add a trace per metric
fig.add_trace(go.Bar(x=data_pivot["year"], y=data_pivot["count_bike"], name='bike'))
fig.add_trace(go.Bar(x=data_pivot["year"], y=data_pivot["count_car"], name='car'))

# Create annotations
bike_annotations=[{'text': '2021 was the best', 'showarrow': True, 'x': 2021 , 'y': 60000}]
car_annotations=[{'text': '2021 was the best' ,'showarrow': True, 'x':  2021, 'y': 10000 }]

# Create buttons
my_buttons = [
{'label': "Bike", 'method': "update", 'args': [{}, {"annotations": bike_annotations}]},
{'label': "Car", 'method': "update", 'args': [{}, {"annotations": car_annotations}]}
]

# Add the buttons
fig.update_layout({
    'updatemenus':[{
            'type': "buttons",
            'direction': 'down',
            'x': 1.3,'y': 0.5, 'buttons': my_buttons
            }]})

# Take a look at your plot so far
fig.show()

#### Histogram

In [97]:
hist = data_pivot.copy()

ind_color_map = {'2018': 'rgb(124, 250, 120)', '2019': 'rgb(112, 128, 144)', 
                 '2020': 'rgb(137, 109, 247)', '2021': 'rgb(255, 0, 0)'}

fig = px.histogram (data_frame = hist, 
                    x = 'count_bike',
                    nbins = 10,
                   orientation = 'v',
                    log_y = True,  #create log scale
                    color = 'year',  #specifying color results in stacked barchart
                    color_discrete_map = ind_color_map,  #optional argument
                   histfunc = 'avg')  #bin aggregation


fig.show()

In [98]:
#option with subplots 
##option 1: add three tyears manually
##option 2: loop through the years

fig = make_subplots( rows=4, cols = 1, shared_xaxes = True)

#loop
row_num = 1
for year in [2018, 2019, 2020, 2021]:
    hist_sub = hist[hist['year']== year]
    fig.add_trace(
    go.Scatter(x =hist_sub['count_bike'],
              y = hist_sub['count_bike'],
              name = year, mode = 'markers'),
        row= row_num, col=1)
    row_num +=1


 
fig.show()

#### Boxplot

In [99]:
box = data_pivot.copy()

fig = px.box (data_frame = box,
             y = 'count_bike',
             hover_data = ['municipality', 'year'] ,
             color = 'province')  #results in side by side boxplots

fig.show()

#### Scatterplot

In [100]:
scatter = data_pivot.copy()
scatter['year'] = scatter['year'].astype('str')

fig = px.scatter(data_frame = scatter,
                 x = 'count_bike',
                 y = 'count_car',
                color = 'year',
                hover_name = 'municipality',
               hover_data = {'year' : False, 'province' : True} ,#exclude from hover
                #hover_data = ['province', 'municipality'] 
                )   


#annotations
loss_annotation = {'x': 1000, 'y': 400, 'showarrow': True, 'arrowhead': 4,
                    'font': {'color': 'black'}, 'text': "Annotation 1"}
gain_annotation = {'x': 2000, 'y': 2500, 'showarrow': True, 'arrowhead': 4,
                    'font': {'color': 'black'}, 'text': "Annotation 2"}

#update layout
fig.update_layout ({'showlegend':True,
                   'legend': {'title': 'years', 'x': 0.5, 'y': 1.1,  #percentage of axis
                              'bgcolor': 'white',
                              'borderwidth': 3},
                    'annotations': [loss_annotation, gain_annotation],
                    'xaxis': {'range': [0 ,  6000]}
                   })

fig.show()

### Lineplot

In [101]:
line = data_pivot.copy()
line = line[line['municipality']=='Amsterdam']
line['year'] = pd.to_datetime(line['year'], format = '%Y')

fig = px.line(data_frame = line,
             x= 'year',
             y = 'count_bike',
             title = 'Bike theft in Amsterdam over time')

#annotate
message_annotation = {
  # Set the correct coordinates
   'x': 0.5, 'y': 0.95, 'xref': 'paper', 'yref': 'paper',
  # Set format the text and box
  'text': 'text',
  'font': {'size': 20, 'color': 'white'},
  'bgcolor': 'rgb(237, 64, 200)', 'showarrow': False}


#add time button
date_buttons = [
   {'count': 2, 'label': "YTD", 'step': "year", 'stepmode': "todate"}]

#update layout
fig.update_layout({'annotations': [message_annotation],
                 'xaxis':{'rangeselector': {'buttons': date_buttons}}
                  })


fig.show()

#### Heatmap

In [102]:
cr = data_pivot.corr(method = 'pearson')

fig = go.Figure(go.Heatmap(
               x = cr.columns,
               y = cr.columns,
               z = cr.values.tolist(),
               colorscale = 'rdylgn', zmin = -1, zmax=1))

fig.show()

#### Subplots

In [104]:
#go works better than express for subplots. Syntax is also slightly different for plotting.

fig = make_subplots( rows=1, cols = 2,
                   subplot_titles = ['Histogram 1', 'Histogram 2']
                   )
#add traces
fig.add_trace (
    go.Histogram( x = hist['count_bike'], nbinsx = 5, name = 'Bike'), row=1, col=1)
fig.add_trace (
    go.Histogram( x = hist['count_car'], nbinsx = 5, name = 'Car', showlegend = False), row=1, col=2)

#format
fig.update_layout({'title': {'text':'Distribution of Theft', 'x': 0.5, 'y': 0.9}})

fig.show()

### Overlaying plots

In [108]:
line = data_pivot.copy()
line = line[line['municipality']=='Amsterdam']


fig = go.Figure()
fig.add_trace (go.Bar(x = bar['year'],
                     y = bar['count_bike'],
                     name = 'Yearly bike theft'))

#add line chart (through go.scatter)
fig.add_trace(go.Scatter(x = line['year'],
                        y = line['count_bike'],
                        name = 'Amsterdam',
                        mode = 'lines+markers'))


fig.show()

# 3 Plotting


## 3.2 Where do the crimes occur?

In [132]:
options = ['count_animal', 'count_bike', 'count_boat',
           'count_car', 'count_heist', 'count_moped',
       'count_motorcycle/scooter', 'count_pickpocketing', 
           'count_shoplifting', 'count_street robbery']

print (options)

['count_animal', 'count_bike', 'count_boat', 'count_car', 'count_heist', 'count_moped', 'count_motorcycle/scooter', 'count_pickpocketing', 'count_shoplifting', 'count_street robbery']


**Select a crime**

In [151]:
theft = input('Select a type of threft:')
print()
print('You choose ' + theft) 

Select a type of threft:count_bike

You choose count_bike


**Select a year**

In [112]:
year= input('Select a year:')
print()
print('You chose ' + year) 

Select a year:2019

You choose 2019


**Create map**

In [152]:
#create a custom df
data_plotly = data_pivot.copy()
data_plotly['year'] = data_plotly['year'].astype('str')
data_plotly ['log scale'] = np.log10(data_plotly [theft]+1)
data_plotly=data_plotly[data_plotly['year']==year]

In [169]:
#create label to display in visual
if theft =='count_shoplifting':
    label = 'Total shoplifting'
elif theft == 'count_bike':
    label = 'Total bike theft'
elif theft == 'count_animal':
    label = 'Total animal theft'
elif theft == 'count_boat':
    label = 'Total boat theft'
elif theft == 'count_car':
    label = 'Total car theft'
elif theft == 'count_heist':
    label = 'Total heist'
elif theft == 'count_moped':
    label = 'Total mopet theft'
elif theft == 'count_motorcycle/scooter':
    label = 'Total motorcycle and scooter theft'
elif theft == 'count_pickpocketing':
    label = 'Total pickpocketing'
elif theft == 'count_shoplifting':
    label = 'Total shoplifting'
elif theft == 'count_street robbery':
    label = 'Total robbery'
else:
    label = 'other'
    

#timestamp
timestamp = datetime.now().date()
        
#create map
fig = px.choropleth_mapbox(data_frame = data_plotly,
              locations= "id",
              geojson = gemeente_json,
              mapbox_style = 'carto-positron',
              center = {'lat': 52.153, 'lon':5.3842 },
             zoom = 6.5,
             height=800 ,
             width=800, 
             #COLOR
             color = 'log scale',
             opacity  = 0.4,
             color_continuous_scale="Purples",
                #color_continuous_scale = px.colors.diverging.Bluered,
                #color_continuous_midpoint = 0,
            range_color=(0, data_plotly['log scale'].max()),
            #HOVER INFO
            hover_name = 'municipality',
            hover_data = {'id': False, 'log scale': False, theft: True},
            labels={theft: label}
                          )


#update layout
fig.update_layout({ 'title': {'text': 'Registered thefts in The Netherlands in '+ year, 'x': 0.5},
                  # 'annotations': [{"text": f"This graph was generated at {timestamp}",
                  #                'showarrow': False, 'x': 0.5, 'y' : 1.05, 'xref' : 'paper', 'yref': 'paper'}],
                     'legend': {'title': 'log scale'},
                  })


fig.show()

## 3.2 What is the most common type of theft?

In [305]:
common_crime = data.groupby(['year', 'SoortDiefstal'])['count'].sum().reset_index()
common_crime = common_crime.sort_values(['year', 'count'], ascending=[True,False]) 
common_crime['rank'] = common_crime.groupby(by=['year'])['count'].transform(lambda x: x.rank(ascending=False))
common_crime = common_crime.rename(columns ={'SoortDiefstal': 'type_theft'})
common_crime['year'] = common_crime['year'].astype('str')
#common_crime['rank'] = common_crime['rank'].astype('str')
common_crime.head()

Unnamed: 0,year,type_theft,count,rank
1,2018,bike,68130.0,1.0
8,2018,shoplifting,37995.0,2.0
7,2018,pickpocketing,15490.0,3.0
5,2018,moped,14045.0,4.0
3,2018,car,7590.0,5.0


In [388]:
#year= input('Select a year:')
#if year in [str(2018), str(2019), str(2020), str(2021)]:
    #print('You choose ' + year) 
#else: 
    #print( 'Please pick a year between 2018 and 2021')
    #year= input('Select a year:')

In [389]:
top = input('I want to see the top :')
if top in [str(1), str(2), str(3), str(4), str(5), str(6), str(7), str(8), str(9), str(10)]:
    print('You want to see the top ' + top) 
else: 
    print( 'Please pick a number between 1 and 10')
    top = input('I want to see the top :')

I want to see the top :
Please pick a number between 1 and 10
I want to see the top :2


In [390]:
#create df
top = int(top)
top_rank = np.arange(1, top+1 , 1).tolist()
common_crime_short = common_crime[(common_crime['rank'].isin(top_rank)) & (common_crime['year']== year)]

fig = px.bar(data_frame = common_crime_short, 
             x = 'type_theft',
             y = 'count',
            title = 'Total bike theft by year',
            color_discrete_map = {'2018': 'rgb(0,0,128)', '2019': 'rgb(235,207,52)'},
            height = 550)
#fig.show()

In [460]:
#print(px.colors.qualitative.Prism)

['rgb(95, 70, 144)', 'rgb(29, 105, 150)', 'rgb(56, 166, 165)', 'rgb(15, 133, 84)', 'rgb(115, 175, 72)', 'rgb(237, 173, 8)', 'rgb(225, 124, 5)', 'rgb(204, 80, 62)', 'rgb(148, 52, 110)', 'rgb(111, 64, 112)', 'rgb(102, 102, 102)']


In [392]:
#create df
top = int(top)
top_rank = np.arange(1, top+1 , 1).tolist()
common_crime_year_short = common_crime[(common_crime['rank'].isin(top_rank))]

#create bar plot
fig = px.bar(data_frame = common_crime_year_short, 
             x = 'year',
             y = 'count',
             height = 500,
            #  text_auto=True,
             #text_auto='.2s',
             color = 'type_theft',
         #   color_discrete_map = {'bike': 'rgb(0,0,128)', 'shoplifting': 'rgb(235,207,52)'},
             color_discrete_sequence = px.colors.qualitative.Prism,
             hover_name = 'type_theft',
            hover_data = {'type_theft': False, 'year': False, 'count': True}
            )

#update layout
fig.update_layout({ 'title': {'text': 'Top ' + str(top) + ' theft crimes by year', 'x': 0.5},
                     'legend': {'title': 'Type of theft'},
                  })

fig.show()

### 3.3 Crime in your municipality

In [393]:
municipality_crime = data.groupby(['year',  'Gemeentenaam', 'SoortDiefstal'])['count'].sum().reset_index()
municipality_crime = municipality_crime.sort_values(['Gemeentenaam', 'year', 'count'], ascending=[True,True, True]) 
municipality_crime ['rank'] = municipality_crime .groupby(by=['Gemeentenaam', 'year'])['count'].transform(lambda x: x.rank(ascending=False))
municipality_crime = municipality_crime.rename(columns ={'SoortDiefstal': 'type_theft', 'Gemeentenaam': 'municipality'})
municipality_crime ['year'] = municipality_crime ['year'].astype('str')
municipality_crime .head()

Unnamed: 0,year,municipality,type_theft,count,rank
0,2018,'s-Gravenhage,animal,0.0,10.0
2,2018,'s-Gravenhage,boat,10.0,9.0
4,2018,'s-Gravenhage,heist,50.0,8.0
6,2018,'s-Gravenhage,motorcycle/scooter,150.0,7.0
9,2018,'s-Gravenhage,street robbery,215.0,6.0


In [394]:
year= input('Select a year: ')
if year in [str(2018), str(2019), str(2020), str(2021)]:
    print('You choose ' + year) 
else: 
    print( 'Please pick a year between 2018 and 2021')
    year= input('Select a year:')

Select a year: 2021
You choose 2021


In [397]:
municipality = input('Please select a municipality: ')
print('You choose ' + municipality )

Please select a municipality: Groningen
You choose Groningen


In [497]:
#create a df
municipality_crime_short = municipality_crime[(municipality_crime['year']==year) & (municipality_crime['municipality']==municipality)]
municipality_crime_short = municipality_crime_short .sort_values('rank')

#create bar plot
fig = px.bar(data_frame = municipality_crime_short ,
             x = 'type_theft',
             y = 'count',
             height = 500,
              text_auto=True,
            # text_auto='.2s',
             color_discrete_sequence = px.colors.qualitative.Prism,
            hover_data = {'type_theft': False,  'count': False}
            )

#update layout
fig.update_layout({ 'title': {'text': 'Most common theft crimes in ' + municipality + ' in ' + year  , 'x': 0.5},
                   'xaxis': {'title': {'text' : ''}} ,
                  })

fig.show()

### Crime trends

In [482]:
municipality_trend = data.groupby(['year',  'Gemeentenaam', 'SoortDiefstal'])['count'].sum().reset_index()
municipality_trend  =municipality_trend .sort_values(['Gemeentenaam', 'SoortDiefstal', 'year','count'], ascending=[True,True, True, True]) 
municipality_trend  = municipality_trend .rename(columns ={'SoortDiefstal': 'type_theft', 'Gemeentenaam': 'municipality'})
municipality_trend ['year'] = municipality_crime ['year'].astype('str')


In [480]:
municipality = input('Please select a municipality: ')
print('You choose ' + municipality )

Please select a municipality: Groningen
You choose Groningen


In [481]:
theft = input('Select a type of threft:')
print()
print('You choose ' + theft) 

Select a type of threft:bike

You choose bike


In [506]:
#create df
municipality_trend_short = municipality_trend[(municipality_trend ['municipality']== municipality) & (municipality_trend ['type_theft']== theft)  ]

fig = px.line(data_frame = municipality_trend_short ,
             x= 'year',
             y = 'count',
             color_discrete_sequence  = ['rgb(56, 166, 165)'],
            hover_data = {'year': False,  'count': True}
             )

#set custom label
if theft =='street robbery':
    label = 'Street robbery'
elif theft == 'shoplifting':
    label = 'Shoplifting'
elif theft == 'pickpocketing':
    label = 'Pickpocketing'
elif theft == 'motorcycle/scooter':
    label = 'Motorcycle/scooter theft'
elif theft == 'moped':
    label = 'Moped theft'
elif theft == 'heist':
    label = 'Heist'
elif theft == 'car':
    label = 'Car theft'
elif theft == 'boat':
    label = 'Boat theft'
elif theft == 'bike':
    label = 'Bike theft'
elif theft == 'animal':
    label = 'Animal theft'
else:
    label = 'other'

    #update layout
fig.update_layout({ 'title': {'text':  label + ' trend in ' +  municipality , 'x': 0.5},
                  })

fig.show()

### 3.4 Worst place to park your bike

In [456]:
worst_crime = data.groupby(['year',  'Gemeentenaam', 'SoortDiefstal'])['count'].sum().reset_index()
worst_crime = worst_crime.sort_values(['year', 'SoortDiefstal', 'count'], ascending=[True,False, False]) 

worst_crime ['rank'] = worst_crime .groupby(by=['year', 'SoortDiefstal'])['count'].transform(lambda x: x.rank(ascending=False))


worst_crime = worst_crime.rename(columns ={'SoortDiefstal': 'type_theft', 'Gemeentenaam': 'municipality'})
worst_crime = worst_crime[['year', 'type_theft', 'rank', 'count', 'municipality']]
worst_crime ['year'] = worst_crime ['year'].astype('str')
#worst_crime.head()

In [457]:
theft = input('Select a type of threft:')
print()
print('You choose ' + theft) 

Select a type of threft:bike

You choose bike


In [451]:
year= input('Select a year:')
if year in [str(2018), str(2019), str(2020), str(2021)]:
    print('You choose ' + year) 
else: 
    print( 'Please pick a year between 2018 and 2021')
    year= input('Select a year:')

Select a year:2020
You choose 2020


In [500]:
worst_crime_short 

Unnamed: 0,year,type_theft,rank,count,municipality
7981,2020,bike,1.0,8330.0,Amsterdam
10591,2020,bike,2.0,4295.0,Rotterdam
7801,2020,bike,3.0,3085.0,'s-Gravenhage
11031,2020,bike,4.0,3075.0,Utrecht
10081,2020,bike,5.0,1830.0,Nijmegen
8811,2020,bike,6.0,1510.0,Eindhoven
10941,2020,bike,7.0,1485.0,Tilburg
8851,2020,bike,8.0,1475.0,Enschede
9041,2020,bike,9.0,1465.0,Groningen
9091,2020,bike,10.0,875.0,Haarlem


In [504]:
#create df
worst_crime_short = worst_crime[(worst_crime['type_theft']== theft)  & (worst_crime['year']== year)]
worst_crime_short  = worst_crime_short [0:10]
worst_crime_short  = worst_crime_short .sort_values('rank', ascending = False)
#create bar plot
fig = px.bar(data_frame = worst_crime_short  ,
             y = 'municipality',
             x = 'count',
             height = 500,
              text_auto=True,
            # text_auto='.2s',
             orientation = 'h',
             color_discrete_sequence  = ['rgb(29, 105, 150)'],
            hover_data = {'municipality': False,  'count': False}
            )

#set custom label
if theft =='street robbery':
    label = 'street robbery'
elif theft == 'shoplifting':
    label = 'shoplifting'
elif theft == 'pickpocketing':
    label = 'pickpocketing'
elif theft == 'motorcycle/scooter':
    label = 'motorcycle/scooter theft'
elif theft == 'moped':
    label = 'moped theft'
elif theft == 'heist':
    label = 'heists'
elif theft == 'car':
    label = 'car theft'
elif theft == 'boat':
    label = 'boat theft'
elif theft == 'bike':
    label = 'bike theft'
elif theft == 'animal':
    label = 'animal theft'
else:
    label = 'other'


#update layout
fig.update_layout({ 'title': {'text': 'Top 10 municipalities with the highest rate of ' + label + ' in ' + year , 'x': 0.5},
                   'xaxis': {'title': {'text' : ''}} ,
                  })

fig.show()
