In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import folium
import requests
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon

In [2]:
# open parquet file as dataframe
df = pd.read_parquet("data\street.parquet")

# Select rows where "Crime type" is "Burglary"
df_burglary = df.query("`Crime type` == 'Burglary'")
df_burglary.head()


ImportError: Unable to find a usable engine; tried using: 'pyarrow', 'fastparquet'.
A suitable version of pyarrow or fastparquet is required for parquet support.
Trying to import the above resulted in these errors:
 - Missing optional dependency 'pyarrow'. pyarrow is required for parquet support. Use pip or conda to install pyarrow.
 - Missing optional dependency 'fastparquet'. fastparquet is required for parquet support. Use pip or conda to install fastparquet.

In [3]:
wards_barnet = ["High Barnet", "Underhill", "Barnet Vale", "East Barnet", "Friern Barnet","Woodhouse", 
                "Whetstone", "Brunswick Park", "Totteridge and Woodside", "Mill Hill", "Cricklewood",
                 "Edgwarebury", "Burnt Oak", "Edgware", "Colindale South", "West Hendon", "Colindale North","Hendon",
                 "West Finchley", "East Finchley", "Garden Suburb", "Finchley Church End", "Golders Green", "Childs Hill"]

In [None]:

# set the url
url = "https://data.police.uk/api/metropolitan/neighbourhoods"

# make a get request to the url
response = requests.get(url)

# convert the response to json format
data = response.json()

# Dataframe from the json data and select only where name value is in the wards_barnet list
df_wards = pd.DataFrame(data).query("name in @wards_barnet")

df_wards.set_index("id", inplace=True)

df_wards["name"].values

In [None]:
ids = df_wards.index.tolist()

long = []
lat = []

for id in ids:
    # set the url
    url = f"https://data.police.uk/api/metropolitan/{id}/boundary"
    # make a get request to the url
    df = pd.DataFrame(requests.get(url).json())
    # append the longitude and latitude to the lists
    long.append(df["longitude"].to_numpy().astype(float))
    lat.append(df["latitude"].to_numpy().astype(float))

df_wards["longitude"] = long
df_wards["latitude"] = lat
# remove the first row because it was a duplicate
df_wards = df_wards.iloc[1:]
# new column boundaries which is a list of tuples of the longitude and latitude
df_wards["boundaries"] = df_wards.apply(lambda x: list(zip(x["longitude"], x["latitude"])), axis=1)

df_wards.head()

In [None]:
m = folium.Map(location=[df_wards.iloc[0]["latitude"].mean(), df_wards.iloc[0]["longitude"].mean()], zoom_start=12)
colors = ["red", "blue", "black"]

idx = 0
for lg, lt in df_wards[["longitude", "latitude"]].to_numpy():
    # create a map object
    
    # add the boundary to the map
    df_temp = pd.DataFrame({"longitude": lg, "latitude": lt})
    # create temp dataframe of longitude and latitude
    np_array = df_temp[["latitude", "longitude"]].to_numpy()
    

    folium.Polygon(locations=np_array, fill=True, color = colors[idx % len(colors)], fill_color = colors[idx % len(colors)]).add_to(m)
    idx += 1
    
display(m)

In [None]:

def find_ward(row):
    """
    Finds the ward where a crime took place. The function takes a row 
    from the df_burglary dataframe as input and returns the name of the ward
    where the crime took place.
    """
    
    point = Point(row["Longitude"], row["Latitude"])
    for idx in range(len(df_wards)):
        # create a polygon from the ward boundaries
        polygon = Polygon(df_wards.iloc[idx]["boundaries"])
        # check if the burglary point is in the ward polygon
        if polygon.contains(point):
            return df_wards.iloc[idx]["name"]

# create a new column in df_burglary with the ward name
df_burglary["ward"] = df_burglary.apply(find_ward, axis=1)

# Changed month to datetime format with the format %Y-%m
f = "%Y-%m"
df_burglary["Month"] = pd.to_datetime(df_burglary["Month"], format=f)

# save the burglary dataframe as a parquet file
df_burglary.to_parquet(r"data\burglary.parquet")