# Step 1 - Pre-process neighbourhood data
## Project: Growing Urban Bicycle Networks with an LTN twist

This notebook takes in the outputs of the ltnDetection (https://github.com/Froguin99/LTN-Detection) and prepares them for later use within the bike network growth project

Contact: Chris Larkin (c.larkin@ncl.ac.uk) 

Created: 2025-03-26  
Last modified: 2025-03-28

## Preliminaries

### Parameters

In [1]:
debug = False # If True, will produce plots and/or verbose output to double-check
%run -i "../parameters/parameters.py"

Loaded parameters.



### Setup

In [2]:
%run -i path.py
%run -i setup.py

%load_ext watermark
#%watermark -n -v -m -g -iv

Loaded PATH.

Setup finished.



### Functions

In [3]:
%run -i functions.py

Loaded functions.



## Read in neighbourhoods and extract "ltns" from here

TODO:
- store ltns on a public location to avoid manually having to find the data. Currently hosting Tyne & Wear on Github as geopackages, not very flexible
- set up fuzzy matching of place names to growbike's use of placenames (e.g. Newcastle Upon Tyne --> newcastle)

### Get LTNs and Normal Neighbourhoods

In [4]:
## Old manual way of getting locations
# placename = "Gateshead"
# neighbourhoods = gpd.read_file(r'C:\Users\b8008458\OneDrive - Newcastle University\2022 to 2023\PhD\ltnDetection\LTN-Detection\data\scored_neighbourhoods\scored_neighbourhoods_{}.gpkg'.format(placename))

In [6]:
## Read in from Github
github_link = "https://raw.githubusercontent.com/Froguin99/LTN-Detection/main/data/Tyne%26Wear/"
raw_files = [
    "scored_neighbourhoods_Gateshead.gpkg",
    "scored_neighbourhoods_Newcastle Upon Tyne.gpkg",
    "scored_neighbourhoods_South Tyneside.gpkg",
    "scored_neighbourhoods_Sunderland.gpkg",
    "scored_neighbourhoods_North Tyneside.gpkg"] # just Tyne & Wear for now...

# Manual map: filename place → folder name. Tried a fuzzy matching (lower down adn commented out) but haven't got it quite right yet...
folder_map = {
    "Gateshead": "gateshead",
    "Newcastle Upon Tyne": "newcastle",
    "South Tyneside": "south_tyneside",
    "Sunderland": "sunderland",
    "North Tyneside": "north_tyneside"
}

columns_to_convert = [
    "rat_run_score", "mean_distance_diff_score",
    "filter_road_density_score", "overall_score", "cluster_label"
]


# save just LTN neighbourhoods based on the 
for fname in raw_files:
    place = fname.replace("scored_neighbourhoods_", "").replace(".gpkg", "")
    folder = folder_map.get(place)
    if not folder:
        print(f"Skipping {place}: no folder mapping.")
        continue

    # Build final filename and download path
    new_fname = f"scored_neighbourhoods_{folder}.gpkg"
    url = github_link + fname

    # Download
    download_path = os.path.join(PATH["data"], folder, new_fname)
    os.makedirs(os.path.dirname(download_path), exist_ok=True)
    with open(download_path, "wb") as f:
        f.write(requests.get(url).content)

    # Load and clean
    gdf = gpd.read_file(download_path)
    gdf[columns_to_convert] = gdf[columns_to_convert].apply(pd.to_numeric, errors="coerce")
    ltns = gdf[gdf["overall_score"] > ltn_plausiablity_score]

    # Save to correct folder
    output_path = os.path.join(PATH["data"], folder, new_fname)
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    if os.path.exists(output_path):
        os.remove(output_path)
    ltns.to_file(output_path, layer=new_fname.replace(".gpkg", ""), driver="GPKG", overwrite=True)


    print(f"Saved: {output_path}")


# save all neighbourhoods, regardless of how much traffic they have within them
for fname in raw_files:
    place = fname.replace("scored_neighbourhoods_", "").replace(".gpkg", "")
    folder = folder_map.get(place)
    if not folder:
        print(f"Skipping {place}: no folder mapping.")
        continue
    new_fname = f"neighbourhoods_{folder}.gpkg"
    url = github_link + fname
    download_path = os.path.join(PATH["data"], folder, new_fname)
    os.makedirs(os.path.dirname(download_path), exist_ok=True)
    with open(download_path, "wb") as f:
        f.write(requests.get(url).content)
    gdf = gpd.read_file(download_path)
    gdf[columns_to_convert] = gdf[columns_to_convert].apply(pd.to_numeric, errors="coerce")
    out_name = f"neighbourhoods_{folder}.gpkg"
    output_path = os.path.join(PATH["data"], folder, out_name)
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    if os.path.exists(output_path):
        os.remove(output_path)
    gdf.to_file(output_path, layer=out_name.replace(".gpkg", ""), driver="GPKG", overwrite=True)
    print(f"Saved: {output_path}")

Saved: ../../bikenwgrowth_external/data/gateshead\scored_neighbourhoods_gateshead.gpkg
Saved: ../../bikenwgrowth_external/data/newcastle\scored_neighbourhoods_newcastle.gpkg
Saved: ../../bikenwgrowth_external/data/south_tyneside\scored_neighbourhoods_south_tyneside.gpkg
Saved: ../../bikenwgrowth_external/data/sunderland\scored_neighbourhoods_sunderland.gpkg
Saved: ../../bikenwgrowth_external/data/north_tyneside\scored_neighbourhoods_north_tyneside.gpkg
Saved: ../../bikenwgrowth_external/data/gateshead\neighbourhoods_gateshead.gpkg
Saved: ../../bikenwgrowth_external/data/newcastle\neighbourhoods_newcastle.gpkg
Saved: ../../bikenwgrowth_external/data/south_tyneside\neighbourhoods_south_tyneside.gpkg
Saved: ../../bikenwgrowth_external/data/sunderland\neighbourhoods_sunderland.gpkg
Saved: ../../bikenwgrowth_external/data/north_tyneside\neighbourhoods_north_tyneside.gpkg


In [None]:
# # fuzzy matching attempt
# from fuzzywuzzy import process


# columns_to_convert = ["rat_run_score", "mean_distance_diff_score", "filter_road_density_score", "overall_score", "cluster_label"]


# def place_from_filename(fname):
#     return fname.replace("scored_neighbourhoods_", "").replace(".gpkg", "").strip()

# # Function to fuzzy match the place to the folder names
# def get_matching_folder(place_name, folder_names):
#     match = process.extractOne(place_name, folder_names)
#     return match[0] if match[1] >= 80 else None  # threshold of 80% similarity

# # Get available folder names in PATH['data']
# available_folders = [folder for folder in os.listdir(PATH["data"]) if os.path.isdir(os.path.join(PATH["data"], folder))]

# # Process each file
# for fname in raw_files:
#     place_name = place_from_filename(fname)
#     matching_folder = get_matching_folder(place_name, available_folders)
    
#     if not matching_folder:
#         print(f"No match found for {place_name}. Skipping.")
#         continue

#     # Download the file
#     url = github_link + fname
#     local_tmp = fname  # Save temporarily with original name
#     with open(local_tmp, "wb") as f:
#         f.write(requests.get(url).content)

#     # Read and filter
#     gdf = gpd.read_file(local_tmp)
#     gdf[columns_to_convert] = gdf[columns_to_convert].apply(pd.to_numeric, errors="coerce")
#     ltns = gdf[gdf["overall_score"] > 50]

#     # Define the output path using the matched folder name
#     output_path = os.path.join(PATH["data"], matching_folder, fname)
#     os.makedirs(os.path.dirname(output_path), exist_ok=True)

#     # Save the file
#     ltns.to_file(output_path, driver="GPKG")
#     print(f"Saved: {output_path}")

In [None]:
# for placeid, placeinfo in tqdm(cities.items(), desc = "Cities"):
#     if placeinfo["nominatimstring"] != '':
#         location = ox.geocoder.geocode_to_gdf(placeinfo["nominatimstring"])
#         if location.geometry[0].geom_type == 'MultiPolygon':
#             location = location.explode(index_parts=False).reset_index(drop=True)
#         location = fill_holes(extract_relevant_polygon(placeid, shapely.geometry.shape(location['geometry'][0])))
#         if debug: # Draw location polygons and their holes
#             try:
#                 color = cm.rainbow(np.linspace(0,1,len(location)))
#                 for poly,c in zip(location, color):
#                     plt.plot(*poly.exterior.xy, c = c)
#                     for intr in poly.interiors:
#                         plt.plot(*intr.xy, c = "red")
#             except:
#                 plt.plot(*location.exterior.xy)
#             plt.show()
#     else:
#         # https://gis.stackexchange.com/questions/113799/how-to-read-a-shapefile-in-python
#         shp = fiona.open(PATH["data"] + placeid + "/" + placeid + ".shp")
#         first = next(iter(shp))
#         try:
#             location = Polygon(shapely.geometry.shape(first['geometry'])) # If shape file is given as linestring
#         except:
#             location = shapely.geometry.shape(first['geometry'])

#     Gs = {}
#     for parameterid, parameterinfo in tqdm(osmnxparameters.items(), desc = "Networks", leave = False):
#         for i in range(0,10): # retry
#             try:
#                 Gs[parameterid] = ox.graph_from_polygon(location, 
#                                        network_type = parameterinfo['network_type'],
#                                        custom_filter = (parameterinfo['custom_filter']),
#                                        retain_all = parameterinfo['retain_all'],
#                                        simplify = False)
#             except ValueError:
#                 Gs[parameterid] = nx.empty_graph(create_using = nx.MultiDiGraph)
#                 print(placeid + ": No OSM data for graph " + parameterid + ". Created empty graph.")
#                 break
#             except ConnectionError or UnboundLocalError:
#                 print("ConnectionError or UnboundLocalError. Retrying.")
#                 continue
#             except:
#                 print("Other error. Retrying.")
#                 continue
#             break
#         if parameterinfo['export']: ox_to_csv(Gs[parameterid], PATH["data"] + placeid + "/", placeid, parameterid), ox.save_graph_geopackage(Gs[parameterid], filepath = PATH["data"] + placeid + "/" + placeid + "_" + parameterid + ".gpkg", directed = False)

#     # if we have any LTNs, get the neighbourhood streets and save them to 

#     # if we have any LTNs, get the neighbourhood streets and save them to 
#     neighbourhoods = load_neighbourhoods(PATH["data"] + placeid + "/")
#     if not neighbourhoods:
#         print(placeid + ": No LTN dataset found.")
#     else:
#         neighbourhoods = prepare_neighbourhoods(neighbourhoods)
#         city_neighbourhood_streets = {}

#         for city_name, gdf in neighbourhoods.items():
#             if debug:
#                 print(f"Processing streets for {city_name}...")
#             nodes, edges, neighbourhood_graphs = get_neighbourhood_street_graph(gdf, debug)  # get streets within neighbourhoods
#             neighbourhood_graphs = process_lists(neighbourhood_graphs)
#             city_neighbourhood_streets[city_name] = {'nodes': nodes,'edges': edges, 'neighbourhood_graphs': neighbourhood_graphs}
            



#     # Compose special cases biketrack, bikeable, biketrackcarall, ltnstreets
#     parameterid = 'biketrack'
#     if city_name in city_neighbourhood_streets:
#         neighbourhood_graph = city_neighbourhood_streets[city_name]['neighbourhood_graphs']
#         Gs[parameterid] = nx.compose_all([
#             Gs['bike_cyclewaylefttrack'],
#             Gs['bike_cyclewaytrack'],
#             Gs['bike_highwaycycleway'],
#             Gs['bike_bicycleroad'],
#             Gs['bike_cyclewayrighttrack'],
#             Gs['bike_designatedpath'],
#             Gs['bike_cyclestreet']
#             ,neighbourhood_graph
#             ])
#     ox_to_csv(Gs[parameterid], PATH["data"] + placeid + "/", placeid, parameterid)
#     ox.save_graph_geopackage(Gs[parameterid], filepath = PATH["data"] + placeid + "/" + placeid + "_" + parameterid + ".gpkg", directed = False)

     
#     parameterid = 'bikeable'
#     Gs[parameterid] = nx.compose_all([Gs['biketrack'], Gs['car30'], Gs['bike_livingstreet']]) 
#     ox_to_csv(Gs[parameterid], PATH["data"] + placeid + "/", placeid, parameterid)
#     ox.save_graph_geopackage(Gs[parameterid], filepath = PATH["data"] + placeid + "/" + placeid + "_" + parameterid + ".gpkg", directed = False)

#     parameterid = 'biketrackcarall'
#     Gs[parameterid] = nx.compose(Gs['biketrack'], Gs['carall']) # Order is important
#     ox_to_csv(Gs[parameterid], PATH["data"] + placeid + "/", placeid, parameterid)
#     ox.save_graph_geopackage(Gs[parameterid], filepath = PATH["data"] + placeid + "/" + placeid + "_" + parameterid + ".gpkg", directed = False)

#     #parameterid = 'ltnstreets'
#     #Gs[parameterid] = neighbourhoods_G
#     #ox_to_csv(Gs[parameterid], PATH["data"] + placeid + "/", placeid, parameterid)

#     parameterid = 'biketrack_no_ltn'
#     Gs[parameterid] = nx.compose_all([
#         Gs['bike_cyclewaylefttrack'],
#         Gs['bike_cyclewaytrack'],
#         Gs['bike_highwaycycleway'],
#         Gs['bike_bicycleroad'],
#         Gs['bike_cyclewayrighttrack'],
#         Gs['bike_designatedpath'],
#         Gs['bike_cyclestreet']
#         ])
#     ox_to_csv(Gs[parameterid], PATH["data"] + placeid + "/", placeid, parameterid)
#     ox.save_graph_geopackage(Gs[parameterid], filepath = PATH["data"] + placeid + "/" + placeid + "_" + parameterid + ".gpkg", directed = False)


#     for parameterid in networktypes[:-2]:
#         #G_temp = nx.MultiDiGraph(ox.utils_graph.get_digraph(ox.simplify_graph(Gs[parameterid]))) # This doesnt work - cant get rid of multiedges
#         ox_to_csv(ox.simplify_graph(Gs[parameterid]), PATH["data"] + placeid + "/", placeid, parameterid, "_simplified")

#     # Handle unhashable list issue with LTN streets (source issue to do with OSM edges having lists as attributes, which shouldn't be the case!!)
#     # for u, v, data in neighbourhoods_G.edges(data=True):
#     #     for attr, value in data.items():
#     #         if isinstance(value, list):
#     #             data[attr] = str(value)
#     # ox_to_csv(ox.simplify_graph(neighbourhoods_G), PATH["data"] + placeid + "/", placeid, "ltnstreets", "_simplified")


In [None]:
Audio(sound_file, autoplay=True)