In [196]:
import geopandas as gpd
import pandas as pd
import fiona
import os
import matplotlib.pyplot as plt
import folium
from zipfile import ZipFile
from folium.plugins import MarkerCluster, HeatMap, BeautifyIcon
from folium.map import LayerControl, Layer, FeatureGroup
import seaborn as sns
from shapely.geometry import Point, LineString, MultiPoint
import numpy as np
import contextily as ctx
import requests
from io import StringIO, BytesIO
import json
import datetime as dt
from ast import literal_eval
from shapely.wkt import loads
import plotly.express as px
import re

# Data

In [218]:
root = 'raw_data/police_complaints/'
files = os.listdir(root)

dfs = {}
for file in files:
    if '.csv' in file:
        df = pd.read_csv(f'{root}{file}')
        dfs[f'{file}{df.shape[1]}'] = df
    else:
        df = pd.read_excel(f'{root}{file}')
        dfs[f'{file}{df.shape[1]}'] = df
        
dfs['2019_2.csv1'] = pd.read_csv(f'{root}2019_2.csv', sep=';') #fuckup

In [219]:
# concatenateing all complaints files
all_complaints = pd.concat(dfs.values())
all_complaints['Data'] = pd.to_datetime(all_complaints['Data'], format="%d/%m/%Y")
all_complaints['Sottocategoria Criminologica'].value_counts()

Altro                         3977
Decoro e degrado urbano       2439
Disturbi da locali             768
Rumori molesti                 639
Comportamenti molesti          377
Disturbi Cani                  275
Veicoli abbandonati             72
Uso improprio parti comuni      45
Aggregazioni giovanili          24
Atti di vandalismo              20
Disturbi altri animali          10
Name: Sottocategoria Criminologica, dtype: int64

In [220]:
all_complaints.head()

Unnamed: 0,Categoria criminologa,Sottocategoria Criminologica,Circoscrizione,Localita,Area Verde,Data,Ora
0,Allarme Sociale,Altro,8.0,D'AZEGLIO/MASSIMO (CORSO) ...,,2018-07-16,
1,Allarme Sociale,Altro,1.0,REGINA MARGHERITA/(CORSO) ...,,2018-07-17,
2,Allarme Sociale,Altro,10.0,DUINO/(VIA) ...,,2018-09-14,
3,Allarme Sociale,Altro,,,,2018-10-02,9.4
4,Allarme Sociale,Altro,9.0,CARDUCCI/GIOSUE' (PIAZZA) ...,,2018-11-27,11.53


In [224]:
# filter by noisy categories
all_complaints_noise = all_complaints[(all_complaints['Sottocategoria Criminologica'] == 'Rumori molesti')\
                                     | (all_complaints['Sottocategoria Criminologica'] == 'Aggregazioni giovanili')\
                                     | (all_complaints['Sottocategoria Criminologica'] == 'Disturbi da locali')\
                                     | (all_complaints['Sottocategoria Criminologica'] == 'Uso improprio parti comuni')\
                                     | (all_complaints['Sottocategoria Criminologica'] == 'Altro')\
                                     ]
all_complaints_noise.shape


(5453, 7)

In [225]:
# filter to 2018
complaints_2018 = all_complaints_noise[(all_complaints_noise['Data'] > '2018-01-01') \
                                       & (all_complaints_noise['Data'] < '2018-12-31')].drop(columns=['Categoria criminologa', 
                                                                                              'Circoscrizione', 
                                                                                              'Localita', 
                                                                                              'Area Verde',
                                                                                             'Ora'])
# only use entries with hours
# complaints_2018 = complaints_2018.dropna(subset=['Ora'])

In [226]:
complaints_2018.to_csv('raw_data/complaints_filtered.csv')

In [213]:
complaints_2018.columns

Index(['Categoria criminologa', 'Sottocategoria Criminologica',
       'Circoscrizione', 'Localita', 'Area Verde', 'Data', 'Ora'],
      dtype='object')

In [203]:
# concat date with hour
complaints_2018['date_time'] = complaints_2018.apply(lambda x: pd.to_datetime(f'{str(x.Data)[:10]} {x.Ora}', 
                                                                              format='%Y-%m-%d %H.%M',
                                                                             errors='coerce'), axis=1)
# only get the hour (remove minute)
complaints_2018['final_date'] = complaints_2018.date_time.apply(lambda x: x.floor('H'))

complaints_with_hour = complaints_2018.drop(columns=['Categoria criminologa', 
                                                        'Sottocategoria Criminologica',
                                                        'Circoscrizione', 
                                                        'Localita', 
                                                        'Area Verde', 
                                                        'Data', 
                                                        'Ora',
                                                        'date_time'])

complaints_with_hour['count_complaint'] = 1
complaints_with_hour_grouped = complaints_with_hour.groupby('final_date').count()

In [206]:
# put in 2018 time series
r = pd.date_range('2018-01-01', '2018-12-31', freq='h')
complaints_fin = complaints_with_hour_grouped.reindex(r).rename_axis('final_date').reset_index()
complaints_fin['count_complaint'] = complaints_fin.count_complaint.fillna(0)


In [207]:
complaints_fin.to_csv('raw_data/complaints_2018.csv')

In [209]:
# rechecking
complaints_fin.sort_values(by='count_complaint')

Unnamed: 0,final_date,count_complaint
0,2018-01-01 00:00:00,0.0
5773,2018-08-29 13:00:00,0.0
5772,2018-08-29 12:00:00,0.0
5771,2018-08-29 11:00:00,0.0
5770,2018-08-29 10:00:00,0.0
...,...,...
1715,2018-03-13 11:00:00,2.0
5962,2018-09-06 10:00:00,2.0
1692,2018-03-12 12:00:00,3.0
5314,2018-08-10 10:00:00,3.0


# Merging with zones

In [30]:
zones = gpd.read_file('raw_data/zone_statistiche_geo')
zones['ZONASTAT'] = zones['ZONASTAT'].apply(lambda x: re.sub("[^0-9]", "", x)).astype(int)

In [31]:
complaints_date = all_complaints_noise.groupby(['Data', 'Circoscrizione']).count()[['Localita']].reset_index()
complaints_shp = complaints_date.merge(zones, 
                                       left_on='Circoscrizione',
                                       right_on='ZONASTAT')

In [32]:
mapable = gpd.GeoDataFrame(complaints_shp.groupby('ZONASTAT').agg({'Localita':'mean',
                                               'geometry':'first'}))
mapable = mapable.set_crs('EPSG:3003')
mapable = mapable.to_crs('EPSG:3857')

In [33]:
mapable

Unnamed: 0_level_0,Localita,geometry
ZONASTAT,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1.25,"POLYGON ((854420.153 5632875.411, 854470.197 5..."
2,1.037037,"POLYGON ((855400.704 5632952.711, 855446.466 5..."
3,1.133333,"POLYGON ((855189.904 5632505.860, 855233.672 5..."
4,1.096154,"POLYGON ((854345.767 5631838.946, 854395.435 5..."
5,1.04902,"POLYGON ((853633.229 5633395.257, 853646.291 5..."
6,1.110169,"POLYGON ((856059.578 5631808.449, 856095.141 5..."
7,1.135417,"POLYGON ((855106.531 5631252.773, 855231.830 5..."
8,1.153846,"POLYGON ((853012.386 5632317.806, 853087.993 5..."
9,1.017241,"POLYGON ((854593.661 5630534.173, 854610.146 5..."
10,1.074074,"POLYGON ((853728.403 5630611.333, 853805.263 5..."


In [34]:
# get sensors
sensors = pd.read_csv('raw_data/noise_sensor_list.csv',
                     sep=';',
                     decimal=',')

sensors['geometry'] = sensors.apply(lambda x: Point(x.Lat, x.Long), axis=1)
sensors_geo = gpd.GeoDataFrame(sensors)
sensors_geo = sensors_geo.set_crs(epsg=3857)
# sensors_geo = sensors_geo.to_crs('EPSG:3003')

In [35]:
sensors_geo

Unnamed: 0,code,address,Lat,Long,streaming,geometry
0,s_01,"Via Saluzzo, 26 Torino",45.059172,7.678986,https://userportal.smartdatanet.it/userportal/...,POINT (45.05917 7.67899)
1,s_02,"Via Principe Tommaso, 18bis Torino",45.057837,7.681555,https://userportal.smartdatanet.it/userportal/...,POINT (45.05784 7.68156)
2,s_03,Largo Saluzzo Torino,45.058518,7.678854,https://userportal.smartdatanet.it/userportal/...,POINT (45.05852 7.67885)
3,s_05,Via Principe Tommaso angolo via Baretti Torino,45.057603,7.681348,https://userportal.smartdatanet.it/userportal/...,POINT (45.05760 7.68135)
4,s_06,"Corso Marconi, 27 Torino",45.055554,7.68259,https://userportal.smartdatanet.it/userportal/...,POINT (45.05555 7.68259)


In [36]:
# map zones
m = folium.Map([45.072433300397435, 7.668045740145775], zoom_start=13)

folium.Choropleth(mapable,
#                  data=mapable,
                 toolip).add_to(m)

for i, row in sensors.iterrows():
    folium.Marker(location=[row.Lat, row.Long]).add_to(m)

m

NameError: name 'toolip' is not defined