## Before running any cells
### Download files and setup directory
1. In the same directory as this file, make a folder called 'Data'
2. Dowload the following files and put them in the 'Data' folder:
   - Full burglary data (extract Data.zip in here)
   - Code Lookup.csv
   - Ethnic Diversity 2021.csv
   - Property Prices.csv
   - Income Estimates 2020.csv
   - LSOA Boundaries 2021.geojson

### Install necessary packages
These 3 cells are originally commented out. Uncomment them by highlighting the lines and pressing ctrl + /

If you have run them once on your computer, you shouldn't have to run these lines again. Comment the blocks out again using ctrl + /

In [1]:
# !pip install osmnx==1.3.0

In [2]:
# !pip install folium

In [3]:
# !pip install osmnx geopandas

### Import necessary libraries

In [4]:
import osmnx as ox
import osmnx.folium as ox_folium
import pandas as pd
import geopandas as gpd
import folium
import numpy as np
import networkx as nx
import random
from collections import deque
from shapely.geometry import Point, LineString, MultiPolygon
import matplotlib.colors as mcolors
import ipywidgets as widgets
from IPython.display import display, clear_output
import warnings

### Prepare burglary dataframe

In [5]:
# List of boroughs in London excluding "City of London"
# LSOA names start with the name of the borough the LSOA is in
boroughs = ["Westminster", "Kensington and Chelsea", "Hammersmith and Fulham", "Wandsworth", "Lambeth", "Southwark", "Tower Hamlets", "Hackney",
            "Islington", "Camden", "Brent", "Ealing", "Hounslow", "Richmond upon Thames", "Kingston upon Thames", "Merton", "Sutton", "Croydon",
            "Bromley", "Lewisham", "Greenwich", "Bexley", "Havering", "Barking and Dagenham", "Redbridge", "Newham", "Waltham Forest", "Haringey",
            "Enfield", "Barnet", "Harrow", "Hillingdon"]

In [6]:
# Create dictionary with month_year as key
# The value will be each corresponding csv file
from datetime import datetime, timedelta

# Start and end dates
start_date = datetime(2021, 1, 1)
end_date = datetime(2025, 2, 1)

# Dictionary to hold the variables
month_vars = {}

# Month abbreviation lookup
month_abbrs = ['jan', 'feb', 'mar', 'apr', 'may', 'jun',
               'jul', 'aug', 'sep', 'oct', 'nov', 'dec']

# Generate all month keys from start to end
current = start_date
while current <= end_date:
    key = f"{month_abbrs[current.month - 1]}_{str(current.year)[-2:]}"
    month_vars[key] = None
    # Move to next month
    next_month = current.month % 12 + 1
    next_year = current.year + (current.month // 12)
    current = datetime(next_year, next_month, 1)

In [7]:
# Open the corresponding csv file for every month from start_date to end_date
start_year = int(start_date.strftime("%y"))
counter = int(start_date.strftime("%m")) - 1
for key, value in month_vars.items():
    file_name = "Data/20%02d-%02d/20%02d-%02d-metropolitan-street.csv" % (start_year + (counter // 12), (counter % 12) + 1, start_year + (counter // 12), (counter % 12) + 1)
    month_vars[key] = pd.read_csv(file_name)
    counter += 1

In [8]:
for key, value in month_vars.items():
    month_vars[key].dropna(subset = ['LSOA name'], inplace = True)

In [9]:
# Create list with names of the months (keys)
key_list = [None] * len(month_vars)
index = 0
for key, value in month_vars.items():
    key_list[index] = key
    index += 1

In [10]:
# This cell takes lots of computation power. Only has to be run once.
# For each of the month, only take rows which have LSOA name that starts with one of the boroughs in London
# Corresponding to boroughs list
b = len(boroughs)
months_x_boroughs = len(month_vars) * len(boroughs)
df_list = [None] * months_x_boroughs
for borough in range(months_x_boroughs):
    df_list[borough] = month_vars[key_list[borough // b]][month_vars[key_list[borough // b]]['LSOA name'].str.startswith(boroughs[borough % b])]

In [11]:
# Find boroughs without any crime in a given month
# Take them out of df_list if there wasn't any crime
no_crime = []
for borough in range(months_x_boroughs):
    if df_list[borough].empty:
        no_crime.append(borough)
if len(no_crime) == 0:
    sentence = 'Every borough had at least one crime in every month from %s %s to %s %s' % (start_date.strftime("%B"), start_date.strftime("%Y"), end_date.strftime("%B"), end_date.strftime("%Y"))
    print(sentence)
else:
    for i in range(len(no_crime)):
        del df_list[no_crime[i]]

Every borough had at least one crime in every month from January 2021 to February 2025


In [12]:
# Merge all dataframes in df_list into one dataframe
merged_df = pd.concat(df_list, ignore_index = True)

In [13]:
burg_df = merged_df[merged_df['Crime type'] == 'Burglary'].reset_index(drop = True)

### Prepare code lookup dataframe

In [14]:
# CSV containing LSOA and MSOA codes
codes_df = pd.read_csv('Data/Code Lookup.csv', encoding="latin1", low_memory=False)
codes_df.rename(columns = {'lsoa21cd': 'LSOA code'}, inplace = True)
codes_df.rename(columns = {'msoa21cd': 'MSOA code'}, inplace = True)
codes_df.rename(columns = {'msoa21nm': 'MSOA name'}, inplace = True)

In [15]:
# Get rows within the 32 districts of Greater London (not City of London)
districts_df = codes_df[codes_df['ladnm'].isin(boroughs)]

In [16]:
msoa_lsoa = districts_df[['LSOA code', 'MSOA code', 'MSOA name']].copy()

In [17]:
msoa_to_lsoa = msoa_lsoa.drop_duplicates(subset = ['LSOA code'], keep = 'first')

In [18]:
# Adds MSOA code column to burg_df
burglar_df = pd.merge(
    burg_df,
    msoa_to_lsoa,
    on = 'LSOA code',
    how = 'left'
)

#### burglar_df
Main dataframe for burglary counts

In [19]:
# Adds Coordinate column to burglar_df
burglar_df['Coordinate'] = list(zip(burglar_df['Longitude'], burglar_df['Latitude']))

In [20]:
burglar_df.sample()

Unnamed: 0,Crime ID,Month,Reported by,Falls within,Longitude,Latitude,Location,LSOA code,LSOA name,Crime type,Last outcome category,Context,MSOA code,MSOA name,Coordinate
75984,ea8188dcb4ee8a2363b900937c460a832fb9eab47c16c8...,2022-06,Metropolitan Police Service,Metropolitan Police Service,-0.150729,51.360833,On or near Railway Approach,E01004170,Sutton 017B,Burglary,Status update unavailable,,E02000856,Sutton 017,"(-0.150729, 51.360833)"


### Load Geographical Data
Use geopandas (gpd) to read geojson file of geographical data per LSOA

*After running this once, lsoas.gpkg will be a file in this directory. Comment out cell below.

In [21]:
# # Load the huge GeoJSON once (may take a while)
# lsoa_gdf = gpd.read_file("Data/LSOA Boundaries 2021.geojson")

# # Save it as a much faster binary format
# lsoa_gdf.to_file("lsoas.gpkg", driver="GPKG")

In [22]:
lsoa_gdf = gpd.read_file("lsoas.gpkg")
lsoa_gdf.rename(columns = {'LSOA21CD': 'LSOA code'}, inplace = True)
lsoa_gdf.rename(columns = {'LSOA21NM': 'LSOA name'}, inplace = True)

In [23]:
lsoa_gdf.sample()

Unnamed: 0,LSOA code,LSOA name,LSOA21NMW,BNG_E,BNG_N,LAT,LONG,Shape__Area,Shape__Length,GlobalID,geometry
11798,E01012431,Halton 012F,,351412,382634,53.33838,-2.73117,783466.300911,8043.406248,76e860b4-d325-433b-98f8-b31d8aa2c9c2,"MULTIPOLYGON (((-2.72285 53.34449, -2.72246 53..."


### Create GeoDataFrame per MSOA

In [24]:
# Get a list of all unique LSOAs
msoa_list = msoa_to_lsoa['MSOA code'].unique().tolist()

# Gather all LSOAs per MSOA
lsoa_per_msoa = [None] * len(msoa_list)
for i in range(len(msoa_list)):
    lsoa_per_msoa[i] = msoa_to_lsoa[msoa_to_lsoa["MSOA code"] == msoa_list[i]]["LSOA code"].unique()

# Make list of LSOAs for every MSOA in the same order as msoa_list
msoa_gdf = [None] * len(msoa_list)
for i in range(len(msoa_gdf)):
    msoa_gdf[i] = lsoa_gdf[lsoa_gdf["LSOA code"].isin(lsoa_per_msoa[i])]

## Route Making Algorithms

In [25]:
def find_max_weight_routes(G, edges, nodes, k=5, m=1000, M=5000, max_iterations=1000):
    # Create working graph (same as before)
    working_G = nx.DiGraph()
    for (u, v, key), row in edges.iterrows():
        length = row.get('length', 0)
        weight = row.get('weight', length)
        working_G.add_edge(u, v, length=length, weight=weight, key=key)
        working_G.add_edge(v, u, length=length, weight=weight, key=key)
    
    routes_with_weights = []
    used_start_edges = set()
    
    # Get all edges sorted by weight (descending)
    all_edges = sorted(
        [(u, v) for u, v in working_G.edges()],
        key=lambda e: -working_G.edges[e]['weight']
    )
    
    for start_edge in all_edges:
        if len(routes_with_weights) >= k:
            break
            
        if start_edge in used_start_edges:
            continue
            
        start_node, next_node = start_edge
        best_route, best_weight = None, 0
        stack = deque([(
            next_node, 
            [start_node, next_node], 
            working_G.edges[start_edge]['length'],
            working_G.edges[start_edge]['weight'],
            {start_edge}
        )])
        
        while stack:
            node, route, length, weight, visited = stack.pop()
            
            # Check cycle completion
            if working_G.has_edge(node, start_node):
                return_edge = (node, start_node)
                if return_edge not in visited:
                    total_length = length + working_G.edges[return_edge]['length']
                    total_weight = weight + working_G.edges[return_edge]['weight']
                    
                    if m <= total_length <= M and total_weight > best_weight:
                        best_route = route + [start_node]
                        best_weight = total_weight
                        continue  # Keep looking for heavier cycles
            
            # Explore neighbors sorted by weight (descending)
            for neighbor in sorted(
                working_G.neighbors(node),
                key=lambda n: -working_G.edges[node, n]['weight']
            ):
                edge = (node, neighbor)
                if edge not in visited:
                    edge_data = working_G.edges[edge]
                    new_length = length + edge_data['length']
                    new_weight = weight + edge_data['weight']
                    
                    if new_length <= M:
                        new_visited = visited.copy()
                        new_visited.add(edge)
                        stack.append((neighbor, route + [neighbor], new_length, new_weight, new_visited))
        
        if best_route:
            # Check for duplicates
            if not any(r == best_route for r, _ in routes_with_weights):
                routes_with_weights.append((best_route, best_weight))
                used_start_edges.add(start_edge)
    
    return sorted(routes_with_weights, key=lambda x: -x[1]), working_G

In [26]:
def find_max_weight_routes_fast(G, edges, nodes, k=5, m=1000, M=5000, max_iterations=100, beam_width=3, neighbor_sample=5, max_overlap=5):
    # Create working graph
    working_G = nx.DiGraph()
    for (u, v, key), row in edges.iterrows():
        length = row.get('length', 0)
        weight = row.get('weight', length)
        working_G.add_edge(u, v, length=length, weight=weight, key=key)
        working_G.add_edge(v, u, length=length, weight=weight, key=key)
    
    routes_with_weights = []
    used_start_edges = set()
    all_used_edges = []  # List of sets containing used edges for each route
    
    # Pre-sort all edges by weight
    all_edges = sorted(
        [(u, v) for u, v in working_G.edges()],
        key=lambda e: -working_G.edges[e]['weight']
    )
    
    for start_edge in all_edges:
        if len(routes_with_weights) >= k:
            break
        if start_edge in used_start_edges:
            continue
            
        start_node, next_node = start_edge
        best_route, best_weight = None, 0
        initial_state = (
            next_node,
            [start_node, next_node],
            working_G.edges[start_edge]['length'],
            working_G.edges[start_edge]['weight'],
            {start_edge, (next_node, start_node)}  # Track both directions
        )
        beam = [initial_state]
        
        while beam and len(routes_with_weights) < k:
            new_beam = []
            
            for state in beam:
                node, route, length, weight, visited = state
                
                # Check cycle completion
                if working_G.has_edge(node, start_node):
                    return_edge = (node, start_node)
                    if return_edge not in visited:
                        total_length = length + working_G.edges[return_edge]['length']
                        total_weight = weight + working_G.edges[return_edge]['weight']
                        
                        if m <= total_length <= M and total_weight > best_weight:
                            candidate_route = route + [start_node]
                            candidate_edges = visited.union({return_edge, (start_node, node)})
                            
                            # Check overlap with existing routes
                            valid = True
                            for used_edges in all_used_edges:
                                overlap = len(candidate_edges.intersection(used_edges))
                                if overlap > max_overlap:
                                    valid = False
                                    break
                            
                            if valid:
                                best_route = candidate_route
                                best_weight = total_weight
                                best_edges = candidate_edges
                
                # Skip if no hope of reaching min length
                min_possible_length = length + nx.shortest_path_length(working_G, node, start_node, weight='length')
                if min_possible_length > M:
                    continue
                
                # Get top-k heaviest neighbors
                neighbors = sorted(
                    working_G.neighbors(node),
                    key=lambda n: -working_G.edges[node, n]['weight']
                )[:neighbor_sample]
                
                for neighbor in neighbors:
                    edge = (node, neighbor)
                    reverse_edge = (neighbor, node)
                    if edge not in visited and reverse_edge not in visited:
                        edge_data = working_G.edges[edge]
                        new_length = length + edge_data['length']
                        new_weight = weight + edge_data['weight']
                        
                        if new_length <= M:
                            new_visited = visited.copy()
                            new_visited.add(edge)
                            new_visited.add(reverse_edge)
                            new_state = (neighbor, route + [neighbor], new_length, new_weight, new_visited)
                            new_beam.append(new_state)
            
            # Keep only top beam_width states by weight
            beam = sorted(new_beam, key=lambda x: -x[3])[:beam_width]
        
        if best_route:
            routes_with_weights.append((best_route, best_weight))
            all_used_edges.append(best_edges)
            used_start_edges.add(start_edge)
            # Also add reverse direction to prevent starting from it
            used_start_edges.add((start_edge[1], start_edge[0]))
    
    return sorted(routes_with_weights, key=lambda x: -x[1]), working_G

### Find k closest streets to a coordinate

In [27]:
def get_k_closest_streets(G, coord, k=5):
    """
    Get the k closest street segments to a given (lon, lat) coordinate.
    
    Parameters:
    - G: street network graph (from osmnx)
    - coord: tuple of (longitude, latitude)
    - k: number of closest street segments to return

    Returns:
    - GeoDataFrame with k closest edges and distances in meters
    """
    # Convert graph edges to GeoDataFrame
    edges = ox.graph_to_gdfs(G, nodes=False, edges=True)

    # Create Point from (lon, lat)
    point = Point(coord)

    # Ensure CRS is projected in meters for distance calculation
    if edges.crs.is_geographic:
        edges = edges.to_crs(epsg=3857)
        point_proj = gpd.GeoSeries([point], crs="EPSG:4326").to_crs(epsg=3857).iloc[0]
    else:
        point_proj = point

    # Compute distance from point to each street (edge)
    edges["distance"] = edges.geometry.distance(point_proj)

    # Get k closest
    closest_edges = edges.nsmallest(k, "distance")

    return closest_edges[["osmid", "geometry", "distance"]]

In [37]:
# Handle osmid being a list
def explode_osmid(df):
    # Turn rows with list osmids into multiple rows (one per osmid)
    return df.explode("osmid") if df["osmid"].apply(lambda x: isinstance(x, list)).any() else df

def top_close_streets_gdf(G, coordinate, distance=500, k=1000):   
    # Use large k to ensure finding unique streets
    EDGES = get_k_closest_streets(G, coordinate, k)
    EDGES = explode_osmid(EDGES)
    
    # Drop duplicates by osmid, keeping the one with the shortest distance
    unique_osmid = EDGES.sort_values("distance").drop_duplicates("osmid", keep="first")
    unique_streets = unique_osmid.sort_values("distance").drop_duplicates("distance", keep = "first")
    
    # Take top-k unique streets (if k large enough, all streets within distance meters of coordinate)
    top_k_streets = unique_streets[unique_streets['distance'] < distance]
    return top_k_streets

### Show coordinate and closest streets on a map

In [29]:
def visualize_streets_and_point(coord, street_gdf):
    """
    Visualize a point and its closest street segments using Folium.

    Parameters:
    - coord: tuple (lon, lat)
    - street_gdf: GeoDataFrame with 'geometry' column in lat/lon (EPSG:4326)
    """
    # Make sure the GeoDataFrame is in lat/lon
    if street_gdf.crs != "EPSG:4326":
        street_gdf = street_gdf.to_crs(epsg=4326)

    # Initialize folium map centered at the coordinate
    m = folium.Map(location=(coord[1], coord[0]), zoom_start=16)

    # Add the point marker
    folium.Marker(
        location=(coord[1], coord[0]),
        popup="Query Point",
        icon=folium.Icon(color='red')
    ).add_to(m)

    # Add each street as a polyline
    for _, row in street_gdf.iterrows():
        coords = [(lat, lon) for lon, lat in row.geometry.coords]
        folium.PolyLine(
            locations=coords,
            tooltip=f"osmid: {row.osmid}",
            color="blue",
            weight=5,
            opacity=0.7
        ).add_to(m)

    return m

### Burglary Count per Unique Coordinate
At an MSOA level

In [30]:
# This drops all the rows that do not have a MSOA name (Discuss with the group)
burglar_df = burglar_df.dropna(subset=['MSOA name'])

In [31]:
def coords_per_msoa(msoa_input):
    # Filter and clean
    filtered_df = burglar_df[
        (burglar_df["MSOA name"] == msoa_input) &
        (burglar_df["Coordinate"].notna()) &                      
        (burglar_df["Coordinate"] != "") &           
        (burglar_df["Coordinate"].str.lower() != "no coordinate") 
    ]

    # Count crimes by MSOA and Location
    location_counts = (
        filtered_df.groupby(["MSOA code", "Coordinate"])
        .size()
        .reset_index(name="Crime Count")
        .sort_values(by=["MSOA code", "Crime Count"], ascending=[True, False])
    )

    coord_count = {}

    for index, row in location_counts.iterrows():
        coord = row["Coordinate"]
        count = row["Crime Count"]
        coord_count[coord] = count

    return coord_count

### Add weights to an MSOA's street network
For each coordinate:  
    &nbsp;&nbsp;&nbsp;&nbsp;Find all streets within a given distance  
    &nbsp;&nbsp;&nbsp;&nbsp;For each close street:  
        &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Add coord_count[coordinate] * (1 - (current_distance / 400)) to its current weight  

coord_count[coordinate] is the burglary count for that unique coordinate  
current_distance is the distance of the edge to the unique coordinate  

***ignore the PerformanceWarnings**

In [32]:
# Ensure the LSOA code column in codes_df matches the casing of lsoa_gdf
codes_df['LSOA code'] = codes_df['LSOA code'].str.upper()

# Merge MSOA names into the LSOA geometries
merged = lsoa_gdf.merge(
    codes_df[['LSOA code', 'MSOA name']],
    left_on='LSOA code',
    right_on='LSOA code'
)

In [33]:
# Function that gets all edges from a MSOA and assigns them weights
warnings.filterwarnings("ignore")

def edges_weights_msoa(msoa_input):
    
    ### Creating the MSOA street graph

    # Filter by the MSOA name input
    subset = merged[merged['MSOA name'] == msoa_input]

    if subset.empty:
        raise ValueError(f"No LSOAs found for MSOA '{msoa_input}'")

    # Combine all LSOA geometries into one polygon
    combined_polygon = subset.unary_union

    # Simplify the geometry
    simplified_polygon = combined_polygon.simplify(0.001)

    # Step 2: Get the street network within that area
    G = ox.graph_from_polygon(simplified_polygon, network_type="drive")

    # Step 3: Convert graph to a folium map
    folium_map = ox.folium.plot_graph_folium(G, tiles="cartodbpositron")


    # Optional: add LSOA boundary overlay
    folium.GeoJson(
        subset,
        name=f"msoa_input" + "LSOAs",
        style_function=lambda x: {
            "fillColor": "none",
            "color": "blue",
            "weight": 2,
        },
    ).add_to(folium_map)

    
    ### Assigning the weights

    G_cop = ox.graph_to_gdfs(G, nodes=False, edges=True)
    G_copy = explode_osmid(G_cop)
    G_copy['weight'] = 0

    # Gets the nodes and esges from the MSOA graph
    nodes, edges = ox.graph_to_gdfs(G, nodes=True, edges=True)

    # Reset the index so 'osmid' becomes a column
    edges = edges.reset_index()

    coord_count = coords_per_msoa(msoa_input)

    for coord in coord_count:
        close_streets = top_close_streets_gdf(G, coord, distance=500, k=1000)

        # Order of close_distances and close_streets is the same
        for index, street in close_streets.iterrows():
            current_streets = G_copy.loc[G_copy['osmid'] == street[0]].copy()
            for idx, edge in current_streets.iterrows():
                current_weight = edge['weight']
                current_distance = street['distance']
                G_copy.loc[idx, 'weight'] =  current_weight + (coord_count[coord] * (1 - (current_distance/500)))

    # Confirm we now have u, v, key
    required_columns = ['u', 'v', 'key']
    if all(col in edges.columns for col in required_columns):
        edges = edges.set_index(required_columns)
    else:
        raise ValueError(f"Missing one of the required columns: {required_columns}")
    
    if not G_copy.index.names == ['u', 'v', 'key']:
        G_copy = G_copy.set_index(['u', 'v', 'key'])

    # Get all unique node IDs from edges
    u_nodes = G_copy.index.get_level_values('u')
    v_nodes = G_copy.index.get_level_values('v')
    node_ids = set(u_nodes).union(set(v_nodes))

    # Extract node coordinates from the edge geometries (endpoints)
    coords = {}
    for _, row in G_copy.iterrows():
        line = row['geometry']
        coords[row.name[0]] = line.coords[0]  # u node
        coords[row.name[1]] = line.coords[-1]  # v node

    # Create node GeoDataFrame
    nodes_data = {
        'x': [coords[n][0] for n in coords],
        'y': [coords[n][1] for n in coords],
        'geometry': [Point(coords[n]) for n in coords],
    }
    nodes_gdf = gpd.GeoDataFrame(nodes_data, index=list(coords.keys()), crs=G_copy.crs)

    # Rebuild the graph now that 'hot' is assigned
    G_final = ox.utils_graph.graph_from_gdfs(nodes_gdf, G_copy)

    # Get nodes and edges from the rebuilt graph
    nodes_final, edges_final = ox.graph_to_gdfs(G_final)

    return G_final, nodes_final, edges_final, subset, simplified_polygon   

### Visualize Graph with Routes

In [34]:
warnings.filterwarnings("ignore")

def routes_visualize(msoa_input, k=5, m=1000, M=5000, max_iterations=100, beam_width=3, neighbor_sample=5, max_overlap=5):
    # Get graph & data
    G_final, nodes_final, edges_final, subset, simplified_polygon = edges_weights_msoa(msoa_input)

    # Find routes using correct variables and passed parameters
    routes, working_G = find_max_weight_routes_fast(
        G_final, edges_final, nodes_final,
        k=k,
        m=m,
        M=M,
        max_iterations=max_iterations,
        beam_width=beam_width,
        neighbor_sample=neighbor_sample,
        max_overlap=max_overlap
    )
    
    # Base map
    map_final = ox.folium.plot_graph_folium(G_final, tiles="cartodbpositron")

    # Step 4: Add LSOA boundary overlay
    folium.GeoJson(
        subset,
        name=f"{msoa_input} LSOAs",
        style_function=lambda x: {
            "fillColor": "none",
            "color": "blue",
            "weight": 2,
        },
    ).add_to(map_final)

    # Draw each route as red lines
    for i, (route, _) in enumerate(routes, start=1):
        route_geoms = []
        for u, v in zip(route[:-1], route[1:]):
            if G_final.has_edge(u, v, 0):
                data = G_final.edges[u, v, 0]
            elif G_final.has_edge(v, u, 0):
                data = G_final.edges[v, u, 0]
            else:
                continue

            if 'geometry' in data:
                route_geoms.append(data['geometry'])
            else:
                u_pt = Point(G_final.nodes[u]['x'], G_final.nodes[u]['y'])
                v_pt = Point(G_final.nodes[v]['x'], G_final.nodes[v]['y'])
                route_geoms.append(LineString([u_pt, v_pt]))

        if route_geoms:
            gdf_route = gpd.GeoDataFrame(geometry=route_geoms, crs="EPSG:4326")
            folium.GeoJson(
                gdf_route,
                name=f"Route {i}",
                style_function=lambda x: {
                    "color": "red",
                    "weight": 5,
                    "opacity": 1,
                }
            ).add_to(map_final)

    folium.LayerControl().add_to(map_final)
    return map_final

## User Friendly Menu Section

In [35]:
# This creates a list with all the MSOA names so that it can be used as a dropdown
all_msoas = sorted(codes_df['MSOA name'].dropna().unique())

In [36]:
warnings.filterwarnings("ignore")

# Step 1: Dropdowns for borough and MSOA

# Loading label
loading_label = widgets.Label(value="")  # Initially blank

# Borough choices
borough_dropdown = widgets.Dropdown(options=boroughs, description='Borough:')

# MSOA choices
msoa_dropdown = widgets.Dropdown(description='MSOA:')
output_map = widgets.Output()

# New: Widgets for route parameters
k_widget = widgets.IntSlider(value=5, min=1, max=20, step=1, description='Number of routes (k):')
m_widget = widgets.IntText(value=1000, description='Minimum route distance (m):')
M_widget = widgets.IntText(value=5000, description='Maximum route distance (M):')
max_iter_widget = widgets.IntText(value=1000, description='Maximum iterations (max_iter):')
beam_width_widget = widgets.IntText(value=3, description='Beam width:')
neighbor_sample_widget = widgets.IntText(value=5, description='Neighbor sample:')
max_overlap_widget = widgets.IntText(value=5, description='Max overlap:')

# Step 2: Update MSOAs when borough is selected
def update_msoa_options(*args):
    selected_borough = borough_dropdown.value
    filtered_msoas = sorted(codes_df[codes_df['ladnm'] == selected_borough]['MSOA name'].unique())
    msoa_dropdown.options = filtered_msoas

borough_dropdown.observe(update_msoa_options, names='value')

# Step 3: Display map when MSOA selected
def show_map(button):
    with output_map:
        clear_output(wait=True)
        loading_label.value = "⏳ Loading map..."
        display(loading_label)
        try:
            selected_msoa = msoa_dropdown.value
            if selected_msoa:
                print(f"Parameters: k={k_widget.value}, m={m_widget.value}, M={M_widget.value}, "
                      f"max_iterations={max_iter_widget.value}, beam_width={beam_width_widget.value}, "
                      f"neighbor_sample={neighbor_sample_widget.value}, max_overlap={max_overlap_widget.value}")
                map_obj = routes_visualize(
                    selected_msoa,
                    k=k_widget.value,
                    m=m_widget.value,
                    M=M_widget.value,
                    max_iterations=max_iter_widget.value,
                    beam_width=beam_width_widget.value,
                    neighbor_sample=neighbor_sample_widget.value,
                    max_overlap=max_overlap_widget.value
                )
                clear_output(wait=True)
                display(map_obj)
        except Exception as e:
            clear_output(wait=True)
            print(f"Error: {e}")
        finally:
            loading_label.value = ""

# Step 4: Button to trigger visualization
run_button = widgets.Button(description="Show Routes", button_style='success')
run_button.on_click(show_map)

# Initial call to populate MSOAs
update_msoa_options()

# Step 5: Combine everything into a layout, including parameter widgets
ui = widgets.VBox([
    borough_dropdown,
    msoa_dropdown,
    k_widget,
    m_widget,
    M_widget,
    max_iter_widget,
    beam_width_widget,
    neighbor_sample_widget,
    max_overlap_widget,
    run_button,
    output_map
])

display(ui)

VBox(children=(Dropdown(description='Borough:', options=('Westminster', 'Kensington and Chelsea', 'Hammersmith…