# Step 1 - Pre-process neighbourhood data
## Project: Growing Urban Bicycle Networks with an LTN twist

This notebook takes in the outputs of the ltnDetection (https://github.com/Froguin99/LTN-Detection) and prepares them for later use within the bike network growth project

Contact: Chris Larkin (c.larkin@ncl.ac.uk) 

Created: 2025-03-26  
Last modified: 2025-03-28

## Preliminaries

### Parameters

In [None]:
debug = False # If True, will produce plots and/or verbose output to double-check
%run -i "../parameters/parameters.py"

### Setup

In [None]:
%run -i path.py
%run -i setup.py

%load_ext watermark
#%watermark -n -v -m -g -iv

### Functions

In [None]:
%run -i functions.py

## Read in neighbourhoods and extract "ltns" from here

TODO:
- store ltns on a public location to avoid manually having to find the data. Currently hosting Tyne & Wear on Github as geopackages, not very flexible
- set up fuzzy matching of place names to growbike's use of placenames (e.g. Newcastle Upon Tyne --> newcastle)

### Get LTNs and Normal Neighbourhoods

In [None]:
## Read in from Github
github_link = "https://raw.githubusercontent.com/Froguin99/LTN-Detection/main/data/Tyne%26Wear/"
raw_files = [
    "scored_neighbourhoods_Gateshead.gpkg",
    "scored_neighbourhoods_Newcastle Upon Tyne.gpkg",
    "scored_neighbourhoods_South Tyneside.gpkg",
    "scored_neighbourhoods_Sunderland.gpkg",
    "scored_neighbourhoods_North Tyneside.gpkg"] # just Tyne & Wear for now...

# Manual map: filename place → folder name. Tried a fuzzy matching (lower down adn commented out) but haven't got it quite right yet...
folder_map = {
    "Gateshead": "gateshead",
    "Newcastle Upon Tyne": "newcastle",
    "South Tyneside": "south_tyneside",
    "Sunderland": "sunderland",
    "North Tyneside": "north_tyneside"
}

columns_to_convert = [
    "rat_run_score", "mean_distance_diff_score",
    "filter_road_density_score", "overall_score", "cluster_label"
]


# save just LTN neighbourhoods based on the 
for fname in raw_files:
    place = fname.replace("scored_neighbourhoods_", "").replace(".gpkg", "")
    folder = folder_map.get(place)
    if not folder:
        print(f"Skipping {place}: no folder mapping.")
        continue

    new_fname = f"scored_neighbourhoods_{folder}.gpkg"
    url = github_link + fname

    # Download
    download_path = os.path.join(PATH["data"], folder, new_fname)
    os.makedirs(os.path.dirname(download_path), exist_ok=True)
    with open(download_path, "wb") as f:
        f.write(requests.get(url).content)
    gdf = gpd.read_file(download_path)
    gdf[columns_to_convert] = gdf[columns_to_convert].apply(pd.to_numeric, errors="coerce")

    # Filter by scores
    ltns_current = gdf[gdf["overall_score"] > ltn_plausiablity_score]
    scenario_path = os.path.join(PATH["data"], folder, "current_ltn_scenario")
    os.makedirs(scenario_path, exist_ok=True)
    output_path = os.path.join(scenario_path, new_fname)
    if os.path.exists(output_path):
        os.remove(output_path)
    ltns_current.to_file(output_path, layer=new_fname.replace(".gpkg", ""), driver="GPKG", overwrite=True)
    print(f"Saved: {output_path} (current LTNs scenario)")

    ltns_more = gdf[gdf["overall_score"] > lower_ltn_plausiablity_score]
    scenario_path = os.path.join(PATH["data"], folder, "more_ltn_scenario")
    os.makedirs(scenario_path, exist_ok=True)
    output_path = os.path.join(scenario_path, new_fname)
    if os.path.exists(output_path):
        os.remove(output_path)
    ltns_more.to_file(output_path, layer=new_fname.replace(".gpkg", ""), driver="GPKG", overwrite=True)
    print(f"Saved: {output_path} (more LTNs scenario)")



# save all neighbourhoods, regardless of how much traffic they have within them
for fname in raw_files:
    place = fname.replace("scored_neighbourhoods_", "").replace(".gpkg", "")
    folder = folder_map.get(place)
    if not folder:
        print(f"Skipping {place}: no folder mapping.")
        continue
    new_fname = f"neighbourhoods_{folder}.gpkg"
    url = github_link + fname
    download_path = os.path.join(PATH["data"], folder, new_fname)
    os.makedirs(os.path.dirname(download_path), exist_ok=True)
    with open(download_path, "wb") as f:
        f.write(requests.get(url).content)
    gdf = gpd.read_file(download_path)
    gdf[columns_to_convert] = gdf[columns_to_convert].apply(pd.to_numeric, errors="coerce")
    out_name = f"neighbourhoods_{folder}.gpkg"
    output_path = os.path.join(PATH["data"], folder, out_name)
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    if os.path.exists(output_path):
        os.remove(output_path)
    gdf.to_file(output_path, layer=out_name.replace(".gpkg", ""), driver="GPKG", overwrite=True)
    print(f"Saved: {output_path}")

In [None]:
# # fuzzy matching attempt
# from fuzzywuzzy import process


# columns_to_convert = ["rat_run_score", "mean_distance_diff_score", "filter_road_density_score", "overall_score", "cluster_label"]


# def place_from_filename(fname):
#     return fname.replace("scored_neighbourhoods_", "").replace(".gpkg", "").strip()

# # Function to fuzzy match the place to the folder names
# def get_matching_folder(place_name, folder_names):
#     match = process.extractOne(place_name, folder_names)
#     return match[0] if match[1] >= 80 else None  # threshold of 80% similarity

# # Get available folder names in PATH['data']
# available_folders = [folder for folder in os.listdir(PATH["data"]) if os.path.isdir(os.path.join(PATH["data"], folder))]

# # Process each file
# for fname in raw_files:
#     place_name = place_from_filename(fname)
#     matching_folder = get_matching_folder(place_name, available_folders)
    
#     if not matching_folder:
#         print(f"No match found for {place_name}. Skipping.")
#         continue

#     # Download the file
#     url = github_link + fname
#     local_tmp = fname  # Save temporarily with original name
#     with open(local_tmp, "wb") as f:
#         f.write(requests.get(url).content)

#     # Read and filter
#     gdf = gpd.read_file(local_tmp)
#     gdf[columns_to_convert] = gdf[columns_to_convert].apply(pd.to_numeric, errors="coerce")
#     ltns = gdf[gdf["overall_score"] > 50]

#     # Define the output path using the matched folder name
#     output_path = os.path.join(PATH["data"], matching_folder, fname)
#     os.makedirs(os.path.dirname(output_path), exist_ok=True)

#     # Save the file
#     ltns.to_file(output_path, driver="GPKG")
#     print(f"Saved: {output_path}")

In [None]:
Audio(sound_file, autoplay=True)