In [None]:
import plotly.express as px
import pandas as pd
import numpy as np
import geopandas as gpd
import json

import seaborn as sns
import matplotlib.pyplot as plt

from libpysal.weights import Queen, W
from spreg import ML_Lag

from esda.moran import Moran, Moran_Local
from splot.esda import moran_scatterplot, plot_moran, lisa_cluster

In [None]:
# crimes from 2019
crimes = pd.read_csv('data/2019-data.csv')

In [None]:
# clean data
crimes = crimes[(crimes['Falls within'] == 'Metropolitan Police Service')]
crimes = crimes[crimes['Crime type'] == 'Burglary']
crimes = crimes.drop(['Context', 'Crime type', 'Crime ID', 'Reported by', 'Falls within',
                      'Last outcome category', 'LSOA name', 'LSOA code', 'Location'], axis=1)
burglaries = crimes.dropna().reset_index(drop=True)

In [None]:
burglaries

In [None]:
# Load wards and set CRS
wards = gpd.read_file('geodata/wards2019.geojson')
wards = wards.to_crs(epsg=4326)

# Assign each burglary point a geometry
gdf_burg = gpd.GeoDataFrame(
    burglaries,
    geometry=gpd.points_from_xy(burglaries['Longitude'], burglaries['Latitude']),
    crs="EPSG:4326"
)

# Join burglary points to wards, keeping GSS_Code and Name
burg_gdf_wards = gpd.sjoin(
    gdf_burg,
    wards[['geometry', 'NAME', 'GSS_CODE']],
    how='left',
    predicate='within'
)

# Filter out City of London and nulls
burg_gdf_wards = burg_gdf_wards[
    (burg_gdf_wards['NAME'] != 'City of London') & 
    (burg_gdf_wards['GSS_CODE'].notna())
].reset_index(drop=True)

# Count burglaries per GSS_Code
wards_burglary_counts = (
    burg_gdf_wards
    .groupby('GSS_CODE')
    .size()
    .reset_index(name='Count')
)

# Merge counts into full wards GeoDataFrame
wards = wards.merge(wards_burglary_counts, on='GSS_CODE', how='left')
wards['Count'] = wards['Count'].fillna(0)

# Drop unnecessary columns
wards = wards[wards['DISTRICT'] != 'City and County of the City of London']
wards = wards.drop(columns=['DISTRICT', 'LAGSSCODE', 'HECTARES', 'NONLD_AREA'])

In [None]:
deprivation = pd.read_csv('data/wards-deprivation-2019.csv')
deprivation = deprivation.rename(columns={'Ward Code': 'GSS_CODE'})
deprivation = deprivation[deprivation['Borough'] != 'City of London']

indep_vars = ['Child population', '16-59 population']
wards = wards.merge(deprivation[['GSS_CODE', 'Population'] + indep_vars], on='GSS_CODE', how='left')

wards['CrimeRate'] = (wards['Count'] / wards['Population']) * 1000
wards['Child population %'] = (wards['Child population'] / wards['Population'] * 100).round(1)
wards['16-59 population %'] = (wards['16-59 population'] / wards['Population'] * 100).round(1)

In [None]:
# Use Queen contiguity to define wards as neighbors when their borders touch
w = Queen.from_dataframe(wards, use_index = False)
w.transform = 'r'

In [None]:
y = wards['CrimeRate'].values.reshape(-1, 1)       # dependent variable
X = wards[['Child population %', '16-59 population %']].values         # independent variables

In [None]:
model = ML_Lag(y, X, w=w, name_y='CrimeRate', name_x=indep_vars)
print(model.summary)

In [None]:
mi = Moran(y, w)

mi.I, mi.p_sim

In [None]:
fig, ax = moran_scatterplot(mi, aspect_equal=True)
plt.show()

In [None]:
lisa = Moran_Local(y, w)

# Plot cluster map
fig, ax = plt.subplots(1, figsize=(10, 6))
lisa_cluster(lisa, wards, p=0.05, ax=ax)
plt.title('')
plt.show()

In [None]:
fig = px.choropleth_map(
    wards,
    geojson=json.loads(wards.to_json()),
    locations='GSS_CODE',
    featureidkey="properties.GSS_CODE",
    color='Child population %',
    range_color=(0, 40),
    color_continuous_scale="OrRd",
    map_style="open-street-map",
    zoom=9,
    center={"lat": 51.5072, "lon": -0.1276},
    opacity=0.6,
    height=600
)

fig.update_layout(title='Child Population % Heatmap by London Ward')
fig.show()