In [16]:
import pandas as pd
import numpy as np
import geopandas as gpd
import json
from shapely.geometry import Point
import numpy as np


# Cities

In [36]:
def filter_cities(df, min_distance_km):
    # Define an appropriate projection
    df['geometry'] = gpd.points_from_xy(df.lng, df.lat)
    df = df.set_geometry('geometry')
    df = df.set_crs(epsg=4326)  # WGS 84
    df = df.to_crs(epsg=32633)  # Change to a suitable UTM zone for your data

    # Sort cities by population in descending order
    #df = df.sort_values(by='population', ascending=False).reset_index(drop=True)

    # Buffer distance in meters
    buffer_distance = min_distance_km * 1000

    # Prepare to keep track of non-overlapping geometries
    selected_geometries = []

    # Initialize an empty GeoDataFrame to store cities that are kept
    final_cities = gpd.GeoDataFrame(columns=df.columns, crs=df.crs)

    # Iterate through sorted cities and select cities without overlap
    for index, city in df.iterrows():
        print(f'Processing {city.city} with population {city.population}...')
        city_buffer = city.geometry.buffer(buffer_distance)
        city_union = gpd.GeoSeries(selected_geometries).unary_union if selected_geometries else None
        
        if city_union is None or city_buffer.disjoint(city_union):
            final_cities = pd.concat([final_cities, gpd.GeoDataFrame([city])], ignore_index=True)
            selected_geometries.append(city_buffer)

    # Return to original projection if needed
    final_cities = final_cities.to_crs(epsg=4326)

    return final_cities


In [55]:
gdf = gpd.read_file('geoData/raw/cities_centroids.csv')
gdf = gdf[['city', 'lat', 'lng', 'population']]
gdf = gdf.head(70)
# add in a few missing cities

missing = pd.DataFrame([{
    'city' : "Edinburgh",
    'lat' : 55.9533,
    'lng' : -3.1883,
    'population' : "524930",
}])

gdf = pd.concat([gdf, missing], ignore_index=True)

non_gb = ["Belfast"]
gdf['gb'] = np.where(gdf['city'].isin(non_gb), 0, 1)

gdf = filter_cities(gdf, 35)

# drop Cambridge, sorry
gdf = gdf[gdf['city'] != "Cambridge"]

gdf['rank'] = gdf['population'].rank(ascending=False)


gdf.to_csv('geoData/cleaned/uk_cities.csv', index=False)

gdf

Processing London with population 11262000...
Processing Birmingham with population 2919600...
Processing Portsmouth with population 855679...
Processing Southampton with population 855569...
Processing Nottingham with population 729977...
Processing Bristol with population 707412...
Processing Manchester with population 547627...
Processing Liverpool with population 513441...
Processing Leicester with population 508916...
Processing Worthing with population 474485...
Processing Coventry with population 362690...
Processing Belfast with population 345006...
Processing Bradford with population 293277...
Processing Derby with population 270468...
Processing Plymouth with population 267918...
Processing Westminster with population 255324...
Processing Wolverhampton with population 250970...
Processing Northampton with population 245899...
Processing Norwich with population 213166...
Processing Luton with population 213052...
Processing Solihull with population 206674...
Processing Islingt



Unnamed: 0,city,lat,lng,population,gb,geometry,rank
0,London,51.5072,-0.1275,11262000,1,POINT (-0.12750 51.50720),15.0
1,Birmingham,52.48,-1.9025,2919600,1,POINT (-1.90250 52.48000),8.0
2,Portsmouth,50.8058,-1.0872,855679,1,POINT (-1.08720 50.80580),2.0
3,Nottingham,52.9561,-1.1512,729977,1,POINT (-1.15120 52.95610),3.0
4,Bristol,51.4536,-2.5975,707412,1,POINT (-2.59750 51.45360),4.0
5,Manchester,53.479,-2.2452,547627,1,POINT (-2.24520 53.47900),5.0
6,Belfast,54.5964,-5.93,345006,0,POINT (-5.93000 54.59640),7.0
7,Plymouth,50.3714,-4.1422,267918,1,POINT (-4.14220 50.37140),9.0
8,Northampton,52.2304,-0.8938,245899,1,POINT (-0.89380 52.23040),10.0
9,Norwich,52.6286,1.2928,213166,1,POINT (1.29280 52.62860),11.0


In [48]:
filtered_cities = filter_cities(gdf, min_distance_km=30)

Processing London with population 11262000...
Processing Birmingham with population 2919600...
Processing Portsmouth with population 855679...
Processing Southampton with population 855569...
Processing Nottingham with population 729977...
Processing Bristol with population 707412...
Processing Manchester with population 547627...
Processing Liverpool with population 513441...
Processing Leicester with population 508916...
Processing Worthing with population 474485...
Processing Coventry with population 362690...
Processing Belfast with population 345006...
Processing Bradford with population 293277...
Processing Derby with population 270468...
Processing Plymouth with population 267918...
Processing Westminster with population 255324...
Processing Wolverhampton with population 250970...
Processing Northampton with population 245899...
Processing Norwich with population 213166...
Processing Luton with population 213052...
Processing Solihull with population 206674...
Processing Islingt

  final_cities = pd.concat([final_cities, gpd.GeoDataFrame([city])], ignore_index=True)


In [49]:
gdf

Unnamed: 0,city,lat,lng,population,gb,rank,geometry
0,London,51.5072,-0.1275,11262000,1,63.0,POINT (-0.12750 51.50720)
1,Birmingham,52.4800,-1.9025,2919600,1,29.0,POINT (-1.90250 52.48000)
2,Portsmouth,50.8058,-1.0872,855679,1,15.0,POINT (-1.08720 50.80580)
3,Southampton,50.9025,-1.4042,855569,1,16.0,POINT (-1.40420 50.90250)
4,Nottingham,52.9561,-1.1512,729977,1,19.0,POINT (-1.15120 52.95610)
...,...,...,...,...,...,...,...
66,Fulham,51.4828,-0.1950,87161,1,13.0,POINT (-0.19500 51.48280)
67,Nuneaton,52.5230,-1.4680,86552,1,14.0,POINT (-1.46800 52.52300)
68,Ealing,51.5175,-0.2988,85014,1,17.0,POINT (-0.29880 51.51750)
69,Aylesbury,51.8168,-0.8124,83407,1,18.0,POINT (-0.81240 51.81680)


In [50]:
filtered_cities

Unnamed: 0,city,lat,lng,population,gb,rank,geometry
0,London,51.5072,-0.1275,11262000,1,63.0,POINT (-0.12750 51.50720)
1,Birmingham,52.48,-1.9025,2919600,1,29.0,POINT (-1.90250 52.48000)
2,Portsmouth,50.8058,-1.0872,855679,1,15.0,POINT (-1.08720 50.80580)
3,Nottingham,52.9561,-1.1512,729977,1,19.0,POINT (-1.15120 52.95610)
4,Bristol,51.4536,-2.5975,707412,1,20.0,POINT (-2.59750 51.45360)
5,Manchester,53.479,-2.2452,547627,1,21.0,POINT (-2.24520 53.47900)
6,Belfast,54.5964,-5.93,345006,0,27.0,POINT (-5.93000 54.59640)
7,Plymouth,50.3714,-4.1422,267918,1,31.0,POINT (-4.14220 50.37140)
8,Northampton,52.2304,-0.8938,245899,1,34.0,POINT (-0.89380 52.23040)
9,Norwich,52.6286,1.2928,213166,1,35.0,POINT (1.29280 52.62860)


# LADs

In [3]:
with open('geoData/raw/lad.geoJSON') as f:
    LADs = json.load(f)

LADs = LADs['features']
for LAD in LADs:
    # keep just the LAD21CD, LAD21NM, LAT, and LONG from the properties and rename to geo_name, geo_id, lat, and long
    LAD['properties'] = {
        'geo_name': LAD['properties']['LAD21NM'],
        'geo_id': LAD['properties']['LAD21CD'],
        'lat': LAD['properties']['LAT'],
        'long': LAD['properties']['LONG']
    }

with open('geoData/cleaned/lad.geoJSON', 'w') as f:
    json.dump(LADs, f)

In [6]:
LADs

[{'type': 'Feature',
  'geometry': {'type': 'Polygon',
   'coordinates': [[[-1.239631477999978, 54.723875089000046],
     [-1.180807560999938, 54.70240667100006],
     [-1.198188326999968, 54.68543699600008],
     [-1.180545990999974, 54.65991165600008],
     [-1.147540267999943, 54.64798683500004],
     [-1.157984666999937, 54.62948959600004],
     [-1.187314286999936, 54.63172272200006],
     [-1.212628622999944, 54.621726729000045],
     [-1.299014272999955, 54.62798188800008],
     [-1.380898315999957, 54.643917068000064],
     [-1.341375058999972, 54.65018898900007],
     [-1.346046617999946, 54.664470994000055],
     [-1.290477033999935, 54.71839258400007],
     [-1.270640929999956, 54.72702718800008],
     [-1.239631477999978, 54.723875089000046]]]},
  'properties': {'OBJECTID': 1,
   'LAD21CD': 'E06000001',
   'LAD21NM': 'Hartlepool',
   'BNG_E': 447160,
   'BNG_N': 531474,
   'LONG': -1.27018,
   'LAT': 54.67614,
   'SHAPE_Length': 0.8385582038104017,
   'SHAPE_Area': 0.013702