In [1]:
import geopandas
from matplotlib import pyplot
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('seaborn')


---
## filtering data

In [2]:
# load data and filter by south wales
path = open('document.txt', 'r').read().strip()
df_data = pd.read_feather(f"{path}/{2015}_street.feather")
df_data = df_data.drop(['Context', 'Crime ID'], axis=1)
wales = df_data['Falls within'].str.contains('South Wales')
df_data = df_data[wales].sort_values(by=['Month'])
df_data = df_data[~df_data['LSOA name'].isna()]
total = len(df_data)

df_wales = pd.DataFrame([])
names = ['Bridgend', 'Cardiff', 'Merthyr Tydfil',
         'Neath Port Talbot', 'Rhondda Cynon Taf', 'Swansea', 'Glamorgan']

# extract lsoa names in wales
for i in names:
    test = df_data[df_data['LSOA name'].str.contains(i)]
    df_wales = pd.concat([df_wales, test], axis=0)
    # print(f'{i}: {len(test)}, {len(df_wales)}')
del df_data

# assign weights to crimes
# crimes = df_wales['Crime type'].unique()
# weights = [2, 1, 1, 5, 1, 1, 2, 3, 1, 1, 1, 1, 1, 3]
# weights = dict(zip(crimes, weights))

# Ileana weights
weights = {'Criminal damage and arson': 4,
           'Burglary': 3,
           'Anti-social behaviour': 1,
           'Violence and sexual offences': 5,
           'Public order': 3,
           'Other theft': 1,
           'Vehicle crime': 2,
           'Drugs': 3,
           'Other crime': 1,
           'Bicycle theft': 1,
           'Shoplifting': 1,
           'Theft from the person': 2,
           'Robbery': 3,
           'Possession of weapons': 3}

df_wales['Crime type'] = df_wales["Crime type"].apply(lambda x: weights.get(x))


---
## Map

In [3]:
# loading files and matching lsoa codes
path = open('geopandas.txt', 'r').read().strip()
lsoa = df_wales['LSOA code'].unique()
df_boundaries = geopandas.read_file(path)
df_boundaries = df_boundaries[df_boundaries['LSOA11CD'].isin(lsoa)]
count = df_wales.groupby('LSOA code')[
    ['Crime type']].sum().to_dict()['Crime type']


In [4]:
# normalizing data by area and total crimes in uk
df_boundaries['score'] = df_boundaries['LSOA11CD'].apply(lambda x: count[x])
df_boundaries['score'] = df_boundaries['score']/df_boundaries['Shape__Area']
df_boundaries['score'] = df_boundaries['score']/total
df_boundaries['score'] = np.log2(df_boundaries['score'])


In [5]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

# Scale from 1-10
df_boundaries['score'] = scaler.fit_transform(
    df_boundaries['score'].values.reshape(-1, 1))
df_boundaries['score'] = df_boundaries['score']*10


In [7]:
df_boundaries.explore(column='score',
                      tooltip=["score", 'LSOA11NM'],
                      legend=True,
                      cmap='Purples')
