In [None]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import osmnx as ox
import geopandas as gpd
from shapely.geometry import Point, box
from pyproj import Transformer
from libpysal.weights import KNN
from esda.moran import Moran
import spaghetti as spgh
import glob

In [None]:
# Task 2 Part A: Road Network Analysis in Leeds with Accident Data

# Load all accident data files from 2009 to 2019
accident_files = glob.glob("data/Accidents_*_Leeds.csv")
accident_data_list = [pd.read_csv(file, encoding='ISO-8859-1') for file in accident_files]
accident_data = pd.concat(accident_data_list, ignore_index=True)

# Convert accident data to GeoDataFrame using British National Grid (EPSG:27700)
accident_gdf = gpd.GeoDataFrame(
    accident_data,
    geometry=gpd.points_from_xy(accident_data["Grid Ref: Easting"], accident_data["Grid Ref: Northing"]),
    crs="EPSG:27700"
)
# Define Leeds city center coordinates in British National Grid
leeds_center = (430000, 434000)

# Create 1km x 1km grid over Leeds
xmin, ymin, xmax, ymax = 425000, 430000, 435000, 440000  # Define grid extent
grid_size = 1000  # 1 km
rows = int((ymax - ymin) / grid_size)
cols = int((xmax - xmin) / grid_size)

grid_cells = []
for i in range(cols):
    for j in range(rows):
        x_left = xmin + (i * grid_size)
        y_bottom = ymin + (j * grid_size)
        x_right = x_left + grid_size
        y_top = y_bottom + grid_size
        grid_cells.append(box(x_left, y_bottom, x_right, y_top))

grid = gpd.GeoDataFrame(geometry=grid_cells, crs="EPSG:27700")

In [None]:
# Perform spatial join to count accidents in each grid cell
grid["accident_count"] = grid.apply(lambda cell: accident_gdf.within(cell.geometry).sum(), axis=1)

In [None]:
# Select the grid with the most accidents, and closest to city center
grid["centroid_distance"] = grid.geometry.centroid.apply(lambda x: x.distance(Point(leeds_center)))
max_accident_count = grid["accident_count"].max()
print(f"Maximum accident count in a single grid cell: {max_accident_count}")
grid_sorted = grid.sort_values(by=["accident_count", "centroid_distance"], ascending=[False, True])
selected_grid = grid_sorted[(grid_sorted["accident_count"] == grid_sorted["accident_count"].max())].iloc[0]

In [None]:
# Extract selected accident-prone area
selected_accidents = accident_gdf[accident_gdf.within(selected_grid.geometry)]
selected_center = selected_grid.geometry.centroid.coords[0]

In [None]:
# Convert selected center to WGS84 for OSMnx
transformer_to_wgs84 = Transformer.from_crs("EPSG:27700", "EPSG:4326", always_xy=True)
selected_center_wgs84 = transformer_to_wgs84.transform(selected_center[1], selected_center[0])
print(f"Selected center (WGS84): {selected_center_wgs84}")

In [None]:
# Download road network using WGS84 coordinates
selected_center_wgs84 = (53.7996, -1.5491)  # Leeds city center (manual override)
network_type = "drive"
G = ox.graph_from_point(selected_center_wgs84, dist=1000, network_type=network_type, retain_all=True, simplify=True)

In [None]:
# Convert road network back to British National Grid (EPSG:27700)
transformer_to_bng = Transformer.from_crs("EPSG:4326", "EPSG:27700", always_xy=True)
for node, data in G.nodes(data=True):
    x, y = transformer_to_bng.transform(data["x"], data["y"])
    data["x"], data["y"] = x, y

In [None]:
# Convert to undirected graph
G_undirected = nx.Graph(G)

In [None]:
# Compute basic road network characteristics
num_nodes = len(G_undirected.nodes)
num_edges = len(G_undirected.edges)
density = num_edges / num_nodes
avg_street_length = np.mean([d['length'] for u, v, d in G_undirected.edges(data=True)])

In [None]:
# Compute intersection density (nodes with degree >2)
intersection_nodes = [node for node, degree in dict(G_undirected.degree()).items() if degree > 2]
intersection_density = len(intersection_nodes) / num_nodes

In [None]:
# Compute network diameter (longest shortest path in largest component)
largest_cc = max(nx.connected_components(G_undirected), key=len)
G_largest = G_undirected.subgraph(largest_cc)
sp_length = dict(nx.all_pairs_shortest_path_length(G_largest))
network_diameter = max([max(lengths.values()) for lengths in sp_length.values()])

In [None]:
# Compute circuitry (edges / minimal spanning tree edges)
mst = nx.minimum_spanning_tree(G_undirected)
circuitry = num_edges / len(mst.edges)

In [None]:
# Check if the road network is planar
is_planar, _ = nx.check_planarity(G_undirected)

In [None]:
# Print results
print("\n--- Leeds High Accident Area Road Network Analysis ---")
print(f"Selected area center coordinates: {selected_center}")
print(f"Total accidents: {selected_grid['accident_count']}")
print(f"Number of road network nodes: {num_nodes}")
print(f"Number of road network edges: {num_edges}")
print(f"Network density: {density:.4f}")
print(f"Average street length: {avg_street_length:.2f} meters")
print(f"Intersection density: {intersection_density:.4f}")
print(f"Network diameter: {network_diameter}")
print(f"Road network circuitry: {circuitry:.4f}")
print(f"Is the road network planar? {is_planar}")

In [None]:
# Plot road network, grid, and accident locations
fig, ax = plt.subplots(figsize=(10, 8))

# Plot the selected grid boundary
selected_grid_gdf = gpd.GeoDataFrame(geometry=[selected_grid.geometry], crs="EPSG:27700")
selected_grid_gdf.boundary.plot(ax=ax, color="red", linewidth=2)

# Plot accidents
selected_accidents.plot(ax=ax, color="blue", markersize=5, alpha=0.7, label="Accidents")

# Plot the road network
# Make sure G is projected to the same CRS as your other data
if G.graph.get('crs') != 'epsg:27700':
    G = ox.project_graph(G, to_crs='epsg:27700')

# Use the ox.plot_graph function with fixed parameters
ox.plot_graph(
    G, 
    ax=ax, 
    node_size=10, 
    edge_linewidth=0.5, 
    bgcolor='white', 
    show=False, 
    close=False
)

# Add legend and title
plt.legend()
plt.title("Leeds High Accident Area & Road Network")
# Set equal aspect ratio without the problematic calculation
ax.set_aspect('equal')
plt.tight_layout()
plt.show()
