# Step 5 - Analysis of bicycle network results
## Project: Growing Urban Bicycle Networks with LTNs

This notebook takes the existing infrastructure, the results from 03_poi_based_generation and calculates/analyzes a number of measures:
* cost (length)
* coverage  
* directness  
* efficiency
* overlap with existing networks

TODO

- find neighbourhoods where large amounts of residiental streets are used to potentially convert to LTNs
- Comparison between LTN prioirtised and "normal" growth
- only runs for one place at at time currently (my bad coding skills + getting stuck down rabbitholes!)

In [1]:
# import libraries
from src import utils
PATH = utils.PATH # shortening the var name so that we don't have to change it below

# System
import csv
import os
import dill as pickle
import itertools
import random
from collections import defaultdict
import pprint
pp = pprint.PrettyPrinter(indent=4)
from tqdm.notebook import tqdm
import glob
from concurrent.futures import ThreadPoolExecutor
from copy import deepcopy
import yaml
import json

# Math/Data
import numpy as np
import pandas as pd


# Network
import networkx as nx

# Plotting
import matplotlib.pyplot as plt
import matplotlib.animation as animation


# Geo
import osmnx as ox
ox.settings.log_file = True
ox.settings.requests_timeout = 300
ox.settings.logs_folder = PATH["logs"]
import geopandas as gpd
import json


## Preliminaries

### Parameters

In [2]:
debug = False # If True, will produce plots and/or verbose output to double-check
# if not debug: # Only do this if sure the code is bug-free!
#     warnings.filterwarnings('ignore')
rerun_existing = True # If True, will re-run the costly analysis of existing infra even if files already exist.
rerun = True # If True, recompute the analysis. If false, just re-make the plots

In [3]:
params = yaml.load(
    open("../parameters/parameters.yml"), 
    Loader=yaml.FullLoader)
osmnxparameters = json.load(open("../parameters/osmnxparameters.json", "r"))
plotparam = json.load(open("../parameters/plotparam.json", "r"))
plotparam_analysis = json.load(open("../parameters/plotparam_analysis.json", "r"))

### Network weighting by tags

In [4]:
tag_lts = json.load(open("../parameters/tag_lts.json", "r"))
distance_cost = json.load(open("../parameters/distance_cost.json", "r"))

### Load Cities

In [5]:
# load cities
cities = utils.load_cities(PATH, debug)

## Loading

### Load Results

In [6]:
# betweenness 
betweenness_results = {}
for scenario in params["scenarios"]:
    betweenness_results[scenario] = {}
    for placeid in cities:
        filename = (PATH["results"] + placeid + "/" + scenario + "/" + f"{placeid}_poi_{params['poi_source']}_betweenness_weighted_" + scenario + ".pickle")
        abs_path = os.path.abspath(filename)
        if os.path.exists(abs_path):
            with open(abs_path, "rb") as f:
                betweenness_results[scenario][placeid] = pickle.load(f)
        else:
            print(f"File {abs_path} does not exist.")
            print("Please run the betweenness analysis first.")
            print(f"No betweenness files found for {placeid} in scenario {scenario}.")

In [7]:
# random (many runs to get a distribution)
random_results = {}
for scenario in params["scenarios"]:
    random_results[scenario] = {}
    for placeid in cities:
        pattern = (PATH["results"] + placeid + "/" + scenario + "/" +
                   f"{placeid}_poi_{params['poi_source']}_random_weighted_{scenario}_run*.pickle")
        random_files = sorted(glob.glob(os.path.abspath(pattern)))[:3]  # only take the first 3 whilst we debug :D
        if random_files:
            random_results[scenario][placeid] = []
            for fn in random_files:
                abs_path = os.path.abspath(fn)
                with open(abs_path, "rb") as f:
                    res = pickle.load(f)
                random_results[scenario][placeid].append(res)
        else:
            print(f"No random files found for {placeid} in scenario {scenario}.")
            print("Please run the random growth analysis first.")


In [8]:
# demand
demand_results = {}
for scenario in params["scenarios"]:
    demand_results[scenario] = {}
    for placeid in cities:
        filename = (PATH["results"] + placeid + "/" + scenario + "/" + f"{placeid}_poi_{params['poi_source']}_demand_weighted_" + scenario + ".pickle")
        abs_path = os.path.abspath(filename)
        if os.path.exists(abs_path):
            with open(abs_path, "rb") as f:
                demand_results[scenario][placeid] = pickle.load(f)
        else:
            print(f"File {abs_path} does not exist.")
            print("Please run the demand analysis first.")
            print(f"No demand files found for {placeid} in scenario {scenario}.")


In [9]:
# demand LTN priority
demand_ltn_priority_results = {}
for scenario in params["scenarios"]:
    demand_ltn_priority_results[scenario] = {}
    for placeid in cities:
        filename = (PATH["results"] + placeid + "/" + scenario + "/" + f"{placeid}_poi_{params['poi_source']}_demand_ltn_priority_weighted_" + scenario + ".pickle")
        abs_path = os.path.abspath(filename)
        if os.path.exists(abs_path):
            with open(abs_path, "rb") as f:
                demand_ltn_priority_results[scenario][placeid] = pickle.load(f)
        else:
            print(f"File {abs_path} does not exist.")
            print("Please run the demand LTN priority analysis first.")
            print(f"No demand LTN priority files found for {placeid} in scenario {scenario}.")


In [10]:
# betweenness LTN priority
betweenness_ltn_priority_results = {}
for scenario in params["scenarios"]:
    betweenness_ltn_priority_results[scenario] = {}
    for placeid in cities:
        filename = (PATH["results"] + placeid + "/" + scenario + "/" + f"{placeid}_poi_{params['poi_source']}_betweenness_ltn_priority_weighted_" + scenario + ".pickle")
        abs_path = os.path.abspath(filename)
        if os.path.exists(abs_path):
            with open(abs_path, "rb") as f:
                betweenness_ltn_priority_results[scenario][placeid] = pickle.load(f)
        else:
            print(f"File {abs_path} does not exist.")
            print("Please run the betweenness LTN priority analysis first.")
            print(f"No betweenness LTN priority files found for {placeid} in scenario {scenario}.")


Find investment level, split results into GTs, GT_abstracts 

In [11]:
for scenario_name in params["scenarios"]:
    for placeid in cities:

        # Demand 
        if placeid in demand_results.get(scenario_name, {}):
            demand_dict = demand_results[scenario_name][placeid]
            investment_levels_demand = demand_dict["prune_quantiles"]
            GTs_demand               = demand_dict["GTs"]
            GT_abstracts_demand      = demand_dict["GT_abstracts"]
        else:
            print(f"No demand results for {placeid} in scenario '{scenario_name}'")
            investment_levels_demand = []
            GTs_demand               = []
            GT_abstracts_demand      = []


        # Betweenness‐LTN‐priority 
        if placeid in betweenness_ltn_priority_results.get(scenario_name, {}):
            betweenness_ltn_dict = betweenness_ltn_priority_results[scenario_name][placeid]
            investment_levels_betw = betweenness_ltn_dict["prune_quantiles"]
            GTs_betw               = betweenness_ltn_dict["GTs"]
            GT_abstracts_betw      = betweenness_ltn_dict["GT_abstracts"]
        else:
            # e.g. scenario == "no_ltn_scenario" has no betweenness‐LTN‐priority data
            investment_levels_betw = []
            GTs_betw               = []
            GT_abstracts_betw      = []

        # Betweenness
        if placeid in betweenness_results.get(scenario_name, {}):
            betweenness_dict = betweenness_results[scenario_name][placeid]
            investment_levels_betweenness = betweenness_dict["prune_quantiles"]
            GTs_betweenness               = betweenness_dict["GTs"]
            GT_abstracts_betweenness      = betweenness_dict["GT_abstracts"]
        else:
            investment_levels_betweenness = []
            GTs_betweenness               = []
            GT_abstracts_betweenness      = []

        # Demand‐LTN‐priority 
        if placeid in demand_ltn_priority_results.get(scenario_name, {}):
            dem_ltn_dict = demand_ltn_priority_results[scenario_name][placeid]
            investment_levels_dem_ltn = dem_ltn_dict["prune_quantiles"]
            GTs_dem_ltn               = dem_ltn_dict["GTs"]
            GT_abstracts_dem_ltn      = dem_ltn_dict["GT_abstracts"]
        else:
            investment_levels_dem_ltn = []
            GTs_dem_ltn               = []
            GT_abstracts_dem_ltn      = []

        # Random‐runs (loads all run*.pickle files)
        random_runs_list = random_results.get(scenario_name, {}).get(placeid, [])
        if random_runs_list:
            all_GTs_random       = [run_dict["GTs"]          for run_dict in random_runs_list]
            all_GTabs_random      = [run_dict["GT_abstracts"]  for run_dict in random_runs_list]
            investment_levels_random = random_runs_list[0]["prune_quantiles"]
        else:
            all_GTs_random          = []
            all_GTabs_random         = []
            investment_levels_random = []



### Load existing networks, nodes, GeoDataframe



In [12]:
G_biketracks_dict               = {}  # (placeid, scenario) → biketrack graph
G_biketrack_no_ltns_dict       = {}  # (placeid, scenario) → biketrack_no_ltn graph
G_biketrackcaralls_dict        = {}  # (placeid, scenario) → biketrackcarall graph
G_biketrackcarall_edges_dict    = {}  # (placeid, scenario) → GeoDataFrame of biketrackcarall edges
boundary_gdfs               = {}  # placeid → boundary GeoDataFrame (same for all scenarios)
tess_points_dict            = {}  # (placeid, scenario) → tessellation points GeoDataFrame
ltn_points_dict             = {}  # (placeid, scenario) → LTN points GeoDataFrame
combined_points_dict        = {}  # (placeid, scenario) → combined points GeoDataFrame

for scenario in params["scenarios"]:
    for placeid, placeinfo in cities.items():
        base_folder = os.path.join(PATH["data"], placeid, scenario)

        # Load biketrack graph
        biketrack_gpkg = os.path.join(base_folder, f"{placeid}_biketrack.gpkg")
        if os.path.exists(biketrack_gpkg):
            G_biketrack = utils.ox_gpkg_to_graph(biketrack_gpkg)
            G_biketrack.remove_nodes_from(list(nx.isolates(G_biketrack)))
            G_biketracks_dict[(placeid, scenario)] = G_biketrack
        else:
            print(f"Missing: {biketrack_gpkg}")
            G_biketracks_dict[(placeid, scenario)] = None

        # Load biketrack_no_ltn graph
        biketrack_no_ltn_gpkg = os.path.join(base_folder, f"{placeid}_biketrack_no_ltn.gpkg")
        if os.path.exists(biketrack_no_ltn_gpkg):
            G_no_ltn = utils.ox_gpkg_to_graph(biketrack_no_ltn_gpkg)
            G_no_ltn.remove_nodes_from(list(nx.isolates(G_no_ltn)))
            G_biketrack_no_ltns_dict[(placeid, scenario)] = G_no_ltn
        else:
            print(f"Missing: {biketrack_no_ltn_gpkg}")
            G_biketrack_no_ltns_dict[(placeid, scenario)] = None

        # Load biketrackcarall graph
        biketrackcarall_gpkg = os.path.join(base_folder, f"{placeid}_biketrackcarall.gpkg")
        if os.path.exists(biketrackcarall_gpkg):
            G_carall = utils.ox_gpkg_to_graph(biketrackcarall_gpkg)
            G_carall.remove_nodes_from(list(nx.isolates(G_carall)))
            G_biketrackcaralls_dict[(placeid, scenario)] = G_carall

            # also store edges GeoDataFrame
            edges_gdf = ox.graph_to_gdfs(G_carall, nodes=False)
            G_biketrackcarall_edges_dict[(placeid, scenario)] = edges_gdf
        else:
            print(f"Missing: {biketrackcarall_gpkg}")
            G_biketrackcaralls_dict[(placeid, scenario)] = None
            G_biketrackcarall_edges_dict[(placeid, scenario)] = None

        #  Load boundary once per placeid (it won’t change by scenario)
        if placeid not in boundary_gdfs:
            boundary_gdf = ox.geocode_to_gdf(placeinfo["nominatimstring"])
            boundary_gdfs[placeid] = boundary_gdf

        # get nodes
        tess_points_gpkg = os.path.join(base_folder, f"{placeid}_tessellation_points.gpkg")
        if os.path.exists(tess_points_gpkg):
            tess_points = gpd.read_file(tess_points_gpkg)
            tess_points_dict[(placeid, scenario)] = tess_points
        else:
            print(f"Missing: {tess_points_gpkg}")
            tess_points_dict[(placeid, scenario)] = None
        
        # get ltn points
        if scenario != "no_ltn_scenario":
            ltn_points_gpkg = os.path.join(base_folder, f"{placeid}_ltn_points.gpkg")
            if os.path.exists(ltn_points_gpkg):
                ltn_points = gpd.read_file(ltn_points_gpkg)
                ltn_points_dict[(placeid, scenario)] = ltn_points
            else:
                print(f"Missing: {ltn_points_gpkg}")
                ltn_points_dict[(placeid, scenario)] = None
        
        # get combined points
        combined_points_gpkg = os.path.join(base_folder, f"{placeid}_combined_points.gpkg")
        if os.path.exists(combined_points_gpkg):
            combined_points = gpd.read_file(combined_points_gpkg)
            combined_points_dict[(placeid, scenario)] = combined_points
        else:
            print(f"Missing: {combined_points_gpkg}")
            combined_points_dict[(placeid, scenario)] = None

        # get all neighbourhoods (ragardless of their low traffic status. This doesn't change by scenario)
        all_neighbourhoods = gpd.read_file(PATH["data"] + placeid + "/" + 'neighbourhoods_'+  placeid + '.gpkg')
        all_neighbourhoods_centroids = all_neighbourhoods.geometry.centroid
        all_neighbourhoods_centroids = gpd.GeoDataFrame(geometry= all_neighbourhoods_centroids, crs=all_neighbourhoods.crs)



Missing: ../../bikenwgrowth_external/data/newcastle\no_ltn_scenario\newcastle_tessellation_points.gpkg



  all_neighbourhoods_centroids = all_neighbourhoods.geometry.centroid


Missing: ../../bikenwgrowth_external/data/newcastle\current_ltn_scenario\newcastle_tessellation_points.gpkg



  all_neighbourhoods_centroids = all_neighbourhoods.geometry.centroid


Missing: ../../bikenwgrowth_external/data/newcastle\more_ltn_scenario\newcastle_tessellation_points.gpkg



  all_neighbourhoods_centroids = all_neighbourhoods.geometry.centroid


In [13]:
# setup

def csv_to_ox(p, placeid, parameterid):
    '''
    Load graph from csv files (nodes and edge)
    Include OSMID, length, highway, x, y attributes
    '''

    prefix = placeid + '_' + parameterid
    compress = utils.check_extract_zip(p, prefix)
    
    with open(p + prefix + '_edges.csv', 'r') as f:
        header = f.readline().strip().split(",")
        lines = []
        for line in csv.reader(f, quotechar='"', delimiter=',', quoting=csv.QUOTE_ALL, skipinitialspace=True):
            line_list = [c for c in line]
            osmid = str(eval(line_list[header.index("osmid")])[0]) if isinstance(eval(line_list[header.index("osmid")]), list) else line_list[header.index("osmid")]
            length = str(eval(line_list[header.index("length")])[0]) if isinstance(eval(line_list[header.index("length")]), list) else line_list[header.index("length")]
            highway = line_list[header.index("highway")]
            if highway.startswith("[") and highway.endswith("]"):
                highway = highway.strip("[]").split(",")[0].strip(" '")
            line_string = f"{line_list[header.index('u')]} {line_list[header.index('v')]} {osmid} {length} {highway}"
            lines.append(line_string)
        G = nx.parse_edgelist(lines, nodetype=int, data=(("osmid", int), ("length", float), ("highway", str)), create_using=nx.MultiDiGraph)
    
    with open(p + prefix + '_nodes.csv', 'r') as f:
        header = f.readline().strip().split(",")
        values_x = {}
        values_y = {}
        for line in csv.reader(f, quotechar='"', delimiter=',', quoting=csv.QUOTE_ALL, skipinitialspace=True):
            line_list = [c for c in line]
            osmid = int(line_list[header.index("osmid")])
            values_x[osmid] = float(line_list[header.index("x")])
            values_y[osmid] = float(line_list[header.index("y")])
        nx.set_node_attributes(G, values_x, "x")
        nx.set_node_attributes(G, values_y, "y")
    
    if compress:
        os.remove(p + prefix + '_nodes.csv')
        os.remove(p + prefix + '_edges.csv')
    return G



# Analyse

### Analysis saving setup

In [14]:
analysis_res_pickle_paths = {}  
analysis_res_json_paths    = {}  
analysis_results          = {}
for scenario in params["scenarios"]:
    analysis_res_pickle_paths[scenario] = os.path.join(PATH["results"] + placeid + "/" + scenario + "/" + f"{placeid}_{scenario}_analysis_results.pickle")
    analysis_res_json_paths[scenario] = os.path.join(PATH["results"], placeid + "/" + scenario + "/" + f"{placeid}_{scenario}_analysis_results.json")
    analysis_results[scenario] = {}


### Prelimiary Length

Length - finding the distance of the connected network, along with the investment distance (length - existing infrastructure)

In [15]:
for scenario in params["scenarios"]:
    G_biketrack        = G_biketracks_dict.get((placeid, scenario))
    G_biketrack_no_ltn = G_biketrack_no_ltns_dict.get((placeid, scenario))
    GTs                = demand_results.get(scenario, {}).get(placeid, {}).get("GTs", [])

    if not (G_biketrack and G_biketrack_no_ltn and GTs):
        print(f"Missing data for {placeid} - {scenario}. Skipping.")
        continue

    # File paths
    analysis_res_pickle = os.path.join(PATH["results"], placeid, scenario, f"{placeid}_{scenario}_analysis_results.pickle")
    analysis_res_csv    = os.path.join(PATH["results"], placeid, scenario, f"{placeid}_{scenario}_analysis_results.csv")
    output_path         = os.path.join(PATH["plots"], placeid, scenario, "allLengths.png")

    # Load existing results
    if os.path.exists(analysis_res_pickle):
        with open(analysis_res_pickle, 'rb') as f:
            analysis_results[scenario] = pickle.load(f)
    else:
        analysis_results[scenario] = {}

    # Calculations
    total_biketrack        = sum(nx.get_edge_attributes(G_biketrack, 'length').values())
    total_biketrack_no_ltn = sum(nx.get_edge_attributes(G_biketrack_no_ltn, 'length').values())
    total_network          = sum(nx.get_edge_attributes(GTs[-1], 'length').values())
    investment_length      = sum(
        data.get('length', 0) * distance_cost.get(data.get('highway', 'unclassified'), 1)
        for _, _, data in GTs[-1].edges(data=True))

    length_stats = {'length_comparison_labels': [ "Existing Cycle Infrastructure (Including LTNs)", "Existing Cycle Infrastructure (Excluding LTNs)", "LTNs", "Fully Connected Cycle Network", "Investment Distance"],
        'length_comparison_values': [total_biketrack, total_biketrack_no_ltn, abs(total_biketrack - total_biketrack_no_ltn), total_network, investment_length],
        'length_comparison_colors': ['deepskyblue'] * 5,
        'total_network_length': total_network,
        'total_biketrack_length': total_biketrack,
        'total_biketrack_no_ltn_length': total_biketrack_no_ltn,
        'length_difference': abs(total_biketrack - total_biketrack_no_ltn),
        'total_investment_length': investment_length}

    # Save to pickle & CSV
    analysis_results[scenario].update(length_stats)
    with open(analysis_res_pickle, 'wb') as f:
        pickle.dump(analysis_results[scenario], f)
    analysis_res_json = os.path.join(PATH["results"], placeid, scenario, f"{placeid}_{scenario}_analysis_results.json")
    with open(analysis_res_json, 'w') as f:
        json.dump(analysis_results[scenario], f, indent=2)
    # removed csv - can't take columns with different lengths
    #pd.DataFrame({k: [v] for k, v in analysis_results[scenario].items()}).to_csv(analysis_res_csv, index=False)

    # Plot
    plt.figure(figsize=(10, 6))
    plt.bar(
        analysis_results[scenario]['length_comparison_labels'],
        analysis_results[scenario]['length_comparison_values'],
        color=analysis_results[scenario]['length_comparison_colors']
    )
    plt.xlabel('Network Type')
    plt.ylabel('Total Length (meters)')
    plt.title(f'{placeid} - {scenario} - Lengths of Cycle Networks')
    plt.tight_layout()
    plt.savefig(output_path, dpi=300)
    plt.close()


    ltn_difference = abs(total_biketrack - total_biketrack_no_ltn)
    labels = ["Total Cycle Infrastructure", "Protected Cycle Infrastructure","LTNs"]
    values = [total_biketrack, total_biketrack_no_ltn, ltn_difference]
    plt.figure(figsize=(10, 6))
    plt.bar(labels, values, color=['deepskyblue'] * 3)
    plt.xlabel('Network Type')
    plt.ylabel('Total Length (meters)')
    plt.title(f'{placeid} - {scenario} - Total Lengths of Cycle Infrastructure')
    plt.tight_layout()
    output_path_total = os.path.join(PATH["plots"], placeid, scenario, "TotalLengthsCycleNet.png")
    plt.savefig(output_path_total, dpi=300)
    plt.close()
    
    print(f"Completed {placeid} - {scenario}")



Completed newcastle - no_ltn_scenario
Completed newcastle - current_ltn_scenario
Completed newcastle - more_ltn_scenario


#### Pure length - how is the budget used per iteration

In [16]:
for scenario in params["scenarios"]:
    # load scenario-specific results and data
    analysis_res_pickle = analysis_res_pickle_paths[scenario]
    analysis_res_json = analysis_res_json_paths[scenario]
    analysis_results[scenario] = utils.load_results(analysis_res_pickle)
    GTs_betweenness = betweenness_results[scenario].get(placeid, {}).get("GTs", [])
    GTs_demand = demand_results[scenario].get(placeid, {}).get("GTs", [])
    if scenario != "no_ltn_scenario":
        GTs_demand_ltn_priority = demand_ltn_priority_results[scenario].get(placeid, {}).get("GTs", [])
        GTs_betweenness_ltn_priority = betweenness_ltn_priority_results[scenario].get(placeid, {}).get("GTs", [])
    random_runs = random_results[scenario].get(placeid, [])

    if rerun or 'total_lengths' not in analysis_results[scenario]:
        results_list = []
        total_lengths_betweenness = utils.compute_total_lengths(GTs_betweenness)
        total_lengths_demand = utils.compute_total_lengths(GTs_demand)
        total_lengths_random_runs = [utils.compute_total_lengths(run["GTs"]) for run in random_runs]
        total_lengths_random_mean = np.mean(total_lengths_random_runs, axis=0).tolist()

        # save results
        results_list.append(("Betweenness Growth - Total Length", total_lengths_betweenness))
        results_list.append(("Demand Growth - Total Length", total_lengths_demand))
        for i, run_lengths in enumerate(total_lengths_random_runs):
            results_list.append((f"Random Run {i+1} - Total Length", run_lengths))
        results_list.append(("Random Growth (mean) - Total Length", total_lengths_random_mean))
        if scenario != "no_ltn_scenario":
            total_lengths_demand_ltn_priority = utils.compute_total_lengths(GTs_demand_ltn_priority)
            total_lengths_betweenness_ltn_priority = utils.compute_total_lengths(GTs_betweenness_ltn_priority)
            results_list.append(("Demand LTN Priority Growth - Total Length", total_lengths_demand_ltn_priority))
            results_list.append(("Betweenness LTN Priority Growth - Total Length", total_lengths_betweenness_ltn_priority))
        utils.save_results(results_list, analysis_res_pickle, analysis_res_json)
        analysis_results[scenario] = {label: data for label, data in results_list}
        print(f"Updated analysis results for {scenario} in {placeid}")

    plt.figure(figsize=(10, 6))
    for i in range(1, 100):
        key = f"Random Run {i} - Total Length"
        if key in analysis_results[scenario]:
            plt.plot(analysis_results[scenario][key], color='lightgray', linewidth=1, alpha=0.5)
        else:
            break
    plt.plot(analysis_results[scenario]['Random Growth (mean) - Total Length'], linestyle='--', linewidth=2, label='Random Growth (mean)')
    plt.plot(analysis_results[scenario]['Betweenness Growth - Total Length'], '-', label='Betweenness Growth', color='orange')
    plt.plot(analysis_results[scenario]['Demand Growth - Total Length'], '-.', label='Demand Growth', color='red')
    if scenario != "no_ltn_scenario":
        plt.plot(analysis_results[scenario]['Demand LTN Priority Growth - Total Length'], ':', label='Demand LTN Priority Growth', color='green')
        plt.plot(analysis_results[scenario]['Betweenness LTN Priority Growth - Total Length'], '-', label='Betweenness LTN Priority Growth', color='purple')
    plt.xlabel('Investment Iteration')
    plt.ylabel('Total Length (meters)')
    plt.title(f'Length of Invested Cycle Network for {scenario} - {placeid}')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    output_path = os.path.join(PATH["plots"], placeid, scenario, "L_of_Investment.png")
    plt.savefig(output_path, dpi=300)
    plt.close()
    print(f"Plots saved for {placeid} - {scenario}")

Updated analysis results for no_ltn_scenario in newcastle
Plots saved for newcastle - no_ltn_scenario
Updated analysis results for current_ltn_scenario in newcastle
Plots saved for newcastle - current_ltn_scenario
Updated analysis results for more_ltn_scenario in newcastle
Plots saved for newcastle - more_ltn_scenario


Deviation from random - pure length

In [17]:
for scenario in params["scenarios"]:
    analysis_res_pickle = analysis_res_pickle_paths[scenario]
    analysis_res_json = analysis_res_json_paths[scenario]
    analysis_results[scenario] = utils.load_results(analysis_res_pickle)

    if rerun or not any(k.endswith("Deviation from Random") for k in analysis_results[scenario]):
        baseline = analysis_results[scenario]['Random Growth (mean) - Total Length']
        results_list = []
        # Calculate deviation from random baseline
        results_list.append((
            "Betweenness Growth - Total Length Deviation from Random",
            utils.compute_abs_deviation(analysis_results[scenario]["Betweenness Growth - Total Length"], baseline)))
        results_list.append((
            "Demand Growth - Total Length Deviation from Random",
            utils.compute_abs_deviation(analysis_results[scenario]["Demand Growth - Total Length"], baseline)))

        # Calculate mean deviation for random runs
        random_runs_keys = [key for key in analysis_results[scenario] if key.startswith("Random Run") and "Total Length" in key]
        random_runs = [analysis_results[scenario][key] for key in random_runs_keys]
        random_runs_deviations = [utils.compute_abs_deviation(run, baseline) for run in random_runs]
        random_deviations_mean = np.mean(random_runs_deviations, axis=0).tolist()
        results_list.append(("Random Growth (mean) - Total Length Deviation from Random", random_deviations_mean))
        if scenario != "no_ltn_scenario":
            results_list.append(("Demand LTN Priority Growth - Total Length Deviation from Random",
                utils.compute_abs_deviation(analysis_results[scenario]["Demand LTN Priority Growth - Total Length"], baseline)))
            results_list.append(("Betweenness LTN Priority Growth - Total Length Deviation from Random",
                utils.compute_abs_deviation(analysis_results[scenario]["Betweenness LTN Priority Growth - Total Length"], baseline)))

        # Add random runs deviations
        for i, dev in enumerate(random_runs_deviations):
            results_list.append((f"Random Run {i+1} - Total Length Deviation from Random", dev))

        # Save all results as list of (label, data)
        utils.save_results(results_list, analysis_res_pickle, analysis_res_json)
        analysis_results[scenario] = {label: data for label, data in results_list}
        print(f"Saved absolute deviation results for {scenario} in {placeid}")





     # Plotting
    plt.figure(figsize=(10, 6))
    for key in analysis_results[scenario]:
        if key.startswith("Random Run") and "Deviation from Random" in key:
            plt.plot(analysis_results[scenario][key], color='lightgray', linewidth=1, alpha=0.4)
    plt.axhline(0, color='blue', linestyle='--', linewidth=2, label='Random Growth (mean)')
    plot_lines = [
        ("Betweenness Growth - Total Length Deviation from Random", '-', 'orange', 'Betweenness Growth'),
        ("Demand Growth - Total Length Deviation from Random", '-.', 'red', 'Demand Growth'),]
    if scenario != "no_ltn_scenario": plot_lines += [ ("Demand LTN Priority Growth - Total Length Deviation from Random", ':', 'green', 'Demand LTN Priority Growth'), ("Betweenness LTN Priority Growth - Total Length Deviation from Random", '-', 'purple', 'Betweenness LTN Priority Growth'),]
    for key, ls, color, label in plot_lines:
        plt.plot(analysis_results[scenario][key], linestyle=ls, color=color, label=label)
    plt.xlabel('Investment Iteration')
    plt.ylabel('Deviation from Random Growth Baseline (meters)')
    plt.title(f'Deviation from Random Growth Baseline for {scenario} - {placeid}')
    plt.legend(loc='upper right')
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    output_path = os.path.join(PATH["plots"], placeid, scenario, "abs_dev_from_random_length.png")
    plt.savefig(output_path, dpi=300)
    plt.close()


Saved absolute deviation results for no_ltn_scenario in newcastle
Saved absolute deviation results for current_ltn_scenario in newcastle
Saved absolute deviation results for more_ltn_scenario in newcastle


#### "Actual" investment length - how much do we actually need to use to close gaps

Calculate length, minus the existing infrastructure

#### Find how much we actually need to invest

In [18]:
for scenario in params["scenarios"]:
    # Load results
    analysis_res_pickle = analysis_res_pickle_paths[scenario]
    analysis_res_json = analysis_res_json_paths[scenario]
    analysis_results[scenario] = utils.load_results(analysis_res_pickle)

    GTs_betweenness = betweenness_results[scenario].get(placeid, {}).get("GTs", [])
    GTs_demand = demand_results[scenario].get(placeid, {}).get("GTs", [])
    if scenario != "no_ltn_scenario":
        GTs_demand_ltn_priority = demand_ltn_priority_results[scenario].get(placeid, {}).get("GTs", [])
        GTs_betweenness_ltn_priority = betweenness_ltn_priority_results[scenario].get(placeid, {}).get("GTs", [])
    random_runs = random_results[scenario].get(placeid, [])

    if rerun or "Betweenness Growth - Total Investment Length" not in analysis_results[scenario]:
        results_list = []

        total_investment_betweenness = utils.compute_total_investment_lengths(GTs_betweenness, distance_cost)
        total_investment_demand = utils.compute_total_investment_lengths(GTs_demand, distance_cost)
        random_runs_investments = [utils.compute_total_investment_lengths(run["GTs"], distance_cost) for run in random_runs]
        random_investment_mean = np.mean(random_runs_investments, axis=0).tolist()

        results_list.append(("Betweenness Growth - Total Investment Length", total_investment_betweenness))
        results_list.append(("Demand Growth - Total Investment Length", total_investment_demand))
        for i, run_lengths in enumerate(random_runs_investments):
            results_list.append((f"Random Run {i+1} - Total Investment Length", run_lengths))
        results_list.append(("Random Growth (mean) - Total Investment Length", random_investment_mean))

        if scenario != "no_ltn_scenario":
            total_investment_demand_ltn_priority = utils.compute_total_investment_lengths(GTs_demand_ltn_priority, distance_cost)
            total_investment_betweenness_ltn_priority = utils.compute_total_investment_lengths(GTs_betweenness_ltn_priority, distance_cost)
            results_list.append(("Demand LTN Priority Growth - Total Investment Length", total_investment_demand_ltn_priority))
            results_list.append(("Betweenness LTN Priority Growth - Total Investment Length", total_investment_betweenness_ltn_priority))

        utils.save_results(results_list, analysis_res_pickle, analysis_res_json)
        analysis_results[scenario] = {label: data for label, data in results_list}
        print(f"Updated investment cost results for {scenario} in {placeid}")

    # Plotting
    plt.figure(figsize=(10, 6))
    for i in range(1, 100):
        key = f"Random Run {i} - Total Investment Length"
        if key in analysis_results[scenario]:
            plt.plot(analysis_results[scenario][key], color='lightgray', linewidth=1, alpha=0.5)
        else:
            break
    plt.plot(analysis_results[scenario]["Random Growth (mean) - Total Investment Length"], linestyle='--', linewidth=2, color='blue', label='Random Growth (mean)')
    plt.plot(analysis_results[scenario]["Betweenness Growth - Total Investment Length"], '-', color='orange', label='Betweenness Growth')
    plt.plot(analysis_results[scenario]["Demand Growth - Total Investment Length"], '-.', color='red', label='Demand Growth')
    if scenario != "no_ltn_scenario":
        plt.plot(analysis_results[scenario]["Demand LTN Priority Growth - Total Investment Length"], ':', color='green', label='Demand LTN Priority Growth')
        plt.plot(analysis_results[scenario]["Betweenness LTN Priority Growth - Total Investment Length"], '-', color='purple', label='Betweenness LTN Priority Growth')

    plt.xlabel('Investment Iteration')
    plt.ylabel('Total Investment Cost (Meters)')
    plt.title(f'Total Investment Cost per Growth Strategy for {scenario} - {placeid}')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()

    output_path = os.path.join(PATH["plots"], placeid, scenario, "total_investment_cost.png")
    plt.savefig(output_path, dpi=300)
    plt.close()


Updated investment cost results for no_ltn_scenario in newcastle
Updated investment cost results for current_ltn_scenario in newcastle
Updated investment cost results for more_ltn_scenario in newcastle


In [19]:
for scenario in params["scenarios"]:
    analysis_res_pickle = analysis_res_pickle_paths[scenario]
    analysis_res_json = analysis_res_json_paths[scenario]
    analysis_results[scenario] = utils.load_results(analysis_res_pickle)

    if rerun or not any(k.endswith("Deviation from Random - Total Investment Length") for k in analysis_results[scenario]):
        baseline = analysis_results[scenario]["Random Growth (mean) - Total Investment Length"]
        results_list = []

        # Compute deviations from random baseline
        results_list.append(("Betweenness Growth - Deviation from Random - Total Investment Length",
            utils.compute_abs_deviation(analysis_results[scenario]["Betweenness Growth - Total Investment Length"], baseline)))
        results_list.append(("Demand Growth - Deviation from Random - Total Investment Length",
            utils.compute_abs_deviation(analysis_results[scenario]["Demand Growth - Total Investment Length"], baseline)))
        random_keys = [key for key in analysis_results[scenario] if key.startswith("Random Run") and "Total Investment Length" in key]
        random_runs = [analysis_results[scenario][key] for key in random_keys]
        random_deviations = [utils.compute_abs_deviation(run, baseline) for run in random_runs]
        mean_random_dev = np.mean(random_deviations, axis=0).tolist()
        results_list.append(("Random Growth (mean) - Deviation from Random - Total Investment Length", mean_random_dev))

        if scenario != "no_ltn_scenario":
            results_list.append((
                "Demand LTN Priority Growth - Deviation from Random - Total Investment Length",
                utils.compute_abs_deviation(analysis_results[scenario]["Demand LTN Priority Growth - Total Investment Length"], baseline)))
            results_list.append((
                "Betweenness LTN Priority Growth - Deviation from Random - Total Investment Length",
                utils.compute_abs_deviation(analysis_results[scenario]["Betweenness LTN Priority Growth - Total Investment Length"], baseline)))
        for i, dev in enumerate(random_deviations):
            results_list.append((f"Random Run {i+1} - Deviation from Random - Total Investment Length", dev))
        utils.save_results(results_list, analysis_res_pickle, analysis_res_json)
        analysis_results[scenario].update({label: data for label, data in results_list})
        print(f"Saved deviation-from-random investment cost results for {scenario} in {placeid}")






    # Plotting
    plt.figure(figsize=(10, 6))
    for key in analysis_results[scenario]:
        if key.startswith("Random Run") and "Deviation from Random - Total Investment Length" in key:
            plt.plot(analysis_results[scenario][key], color='lightgray', linewidth=1, alpha=0.4)
    plt.axhline(0, color='blue', linestyle='--', linewidth=2, label='Random Growth (mean)')

    plot_lines = [("Betweenness Growth - Deviation from Random - Total Investment Length", '-', 'orange', 'Betweenness Growth'),
        ("Demand Growth - Deviation from Random - Total Investment Length", '-.', 'red', 'Demand Growth'),]
    if scenario != "no_ltn_scenario":
        plot_lines += [ ("Demand LTN Priority Growth - Deviation from Random - Total Investment Length", ':', 'green', 'Demand LTN Priority Growth'),
                       ("Betweenness LTN Priority Growth - Deviation from Random - Total Investment Length", '-', 'purple', 'Betweenness LTN Priority Growth')]

    for key, linestyle, color, label in plot_lines:
        plt.plot(analysis_results[scenario][key], linestyle=linestyle, color=color, label=label)

    plt.xlabel('Investment Iteration')
    plt.ylabel('Deviation from Random Growth Baseline (meters × weight)')
    plt.title(f'Deviation from Random Growth Baseline (Total Investment Cost) for {scenario} - {placeid}')
    plt.legend(loc='upper right')
    plt.grid(True, alpha=0.3)
    plt.tight_layout()

    output_path = os.path.join(PATH["plots"], placeid, scenario, "abs_dev_from_random_investment_cost.png")
    plt.savefig(output_path, dpi=300)
    plt.close()


Saved deviation-from-random investment cost results for no_ltn_scenario in newcastle
Saved deviation-from-random investment cost results for current_ltn_scenario in newcastle
Saved deviation-from-random investment cost results for more_ltn_scenario in newcastle


Find comparison between how much we need against full route lengths

Find differance between network size and required investment

In [20]:
for scenario in params["scenarios"]:
    analysis_res_pickle = analysis_res_pickle_paths[scenario]
    analysis_res_json = analysis_res_json_paths[scenario]
    analysis_results[scenario] = utils.load_results(analysis_res_pickle)

    GTs_betweenness = betweenness_results[scenario].get(placeid, {}).get("GTs", [])
    GTs_demand = demand_results[scenario].get(placeid, {}).get("GTs", [])
    if scenario != "no_ltn_scenario":
        GTs_demand_ltn_priority = demand_ltn_priority_results[scenario].get(placeid, {}).get("GTs", [])
        GTs_betweenness_ltn_priority = betweenness_ltn_priority_results[scenario].get(placeid, {}).get("GTs", [])
    random_runs = random_results[scenario].get(placeid, [])

    if rerun or "Betweenness Growth - Length Difference" not in analysis_results[scenario]:
        results_list = []

        length_diff_betweenness = utils.compute_length_difference(GTs_betweenness)
        length_diff_demand = utils.compute_length_difference(GTs_demand)
        random_run_differences = [utils.compute_length_difference(run["GTs"]) for run in random_runs]
        random_diff_mean = np.mean(random_run_differences, axis=0).tolist()

        results_list.append(("Betweenness Growth - Length Difference", length_diff_betweenness))
        results_list.append(("Demand Growth - Length Difference", length_diff_demand))
        for i, run_diff in enumerate(random_run_differences):
            results_list.append((f"Random Run {i+1} - Length Difference", run_diff))
        results_list.append(("Random Growth (mean) - Length Difference", random_diff_mean))

        if scenario != "no_ltn_scenario":
            length_diff_demand_ltn = utils.compute_length_difference(GTs_demand_ltn_priority)
            length_diff_betweenness_ltn = utils.compute_length_difference(GTs_betweenness_ltn_priority)
            results_list.append(("Demand LTN Priority Growth - Length Difference", length_diff_demand_ltn))
            results_list.append(("Betweenness LTN Priority Growth - Length Difference", length_diff_betweenness_ltn))

        utils.save_results(results_list, analysis_res_pickle, analysis_res_json)
        analysis_results[scenario] = {label: data for label, data in results_list}
        print(f"Saved length difference results for {scenario} in {placeid}")





    # Plotting
    plt.figure(figsize=(10, 6))
    for i in range(1, 100):
        key = f"Random Run {i} - Length Difference"
        if key in analysis_results[scenario]:
            plt.plot(analysis_results[scenario][key], color='lightgray', linewidth=1, alpha=0.4)
        else:
            break

    plt.plot(analysis_results[scenario]["Random Growth (mean) - Length Difference"], linestyle='--', linewidth=2, color='blue', label='Random Growth (mean)')
    plt.plot(analysis_results[scenario]["Betweenness Growth - Length Difference"], '-', color='orange', label='Betweenness Growth')
    plt.plot(analysis_results[scenario]["Demand Growth - Length Difference"], '-.', color='red', label='Demand Growth')

    if scenario != "no_ltn_scenario":
        plt.plot(analysis_results[scenario]["Demand LTN Priority Growth - Length Difference"], ':', color='green', label='Demand LTN Priority Growth')
        plt.plot(analysis_results[scenario]["Betweenness LTN Priority Growth - Length Difference"], '-', color='purple', label='Betweenness LTN Priority Growth')

    plt.xlabel("Investment Iteration")
    plt.ylabel("Length Difference (meters)")
    plt.title(f"Difference Between Total Network Size and Investment Size for {scenario} - {placeid}")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()

    output_path = os.path.join(PATH["plots"], placeid, scenario, "length_difference.png")
    plt.savefig(output_path, dpi=300)
    plt.close()


Saved length difference results for no_ltn_scenario in newcastle
Saved length difference results for current_ltn_scenario in newcastle
Saved length difference results for more_ltn_scenario in newcastle


In [21]:
for scenario in params["scenarios"]:
    analysis_res_pickle = analysis_res_pickle_paths[scenario]
    analysis_res_json = analysis_res_json_paths[scenario]
    analysis_results[scenario] = utils.load_results(analysis_res_pickle)

    if rerun or "Betweenness Growth - Deviation from Random Length Difference" not in analysis_results[scenario]:
        results_list = []
        baseline = np.array(analysis_results[scenario]["Random Growth (mean) - Length Difference"])
        deviation_betweenness = (np.array(analysis_results[scenario]["Betweenness Growth - Length Difference"]) - baseline).tolist()
        deviation_demand = (np.array(analysis_results[scenario]["Demand Growth - Length Difference"]) - baseline).tolist()

        results_list.append(("Betweenness Growth - Deviation from Random Length Difference", deviation_betweenness))
        results_list.append(("Demand Growth - Deviation from Random Length Difference", deviation_demand))

        if scenario != "no_ltn_scenario":
            deviation_demand_ltn = (np.array(analysis_results[scenario]["Demand LTN Priority Growth - Length Difference"]) - baseline).tolist()
            deviation_betweenness_ltn = (np.array(analysis_results[scenario]["Betweenness LTN Priority Growth - Length Difference"]) - baseline).tolist()
            results_list.append(("Demand LTN Priority Growth - Deviation from Random Length Difference", deviation_demand_ltn))
            results_list.append(("Betweenness LTN Priority Growth - Deviation from Random Length Difference", deviation_betweenness_ltn))

        utils.save_results(results_list, analysis_res_pickle, analysis_res_json)
        analysis_results[scenario].update({k: v for k, v in results_list})
        print(f"Saved deviation-from-random length difference results for {scenario} in {placeid}")



    # Plotting
    plt.figure(figsize=(10, 6))
    for i in range(1, 100):
        key = f"Random Run {i} - Length Difference"
        if key in analysis_results[scenario]:
            run_dev = np.array(analysis_results[scenario][key]) - np.array(analysis_results[scenario]["Random Growth (mean) - Length Difference"])
            plt.plot(run_dev, color='lightgray', linewidth=1, alpha=0.4)
        else:
            break
    plt.axhline(0, color='blue', linestyle='--', linewidth=2, label='Random Growth (mean)')
    plt.plot(analysis_results[scenario]["Betweenness Growth - Deviation from Random Length Difference"], '-', color='orange', label='Betweenness Growth')
    plt.plot(analysis_results[scenario]["Demand Growth - Deviation from Random Length Difference"], '-.', color='red', label='Demand Growth')
    if scenario != "no_ltn_scenario":
        plt.plot(analysis_results[scenario]["Demand LTN Priority Growth - Deviation from Random Length Difference"], ':', color='green', label='Demand LTN Priority Growth')
        plt.plot(analysis_results[scenario]["Betweenness LTN Priority Growth - Deviation from Random Length Difference"], '-', color='purple', label='Betweenness LTN Priority Growth')
    plt.xlabel("Investment Iteration")
    plt.ylabel("Deviation from Random (meters)")
    plt.title(f"Deviation from Random Growth Strategy for {scenario} - {placeid}")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    output_path = os.path.join(PATH["plots"], placeid, scenario, "length_difference_deviation_from_random.png")
    plt.savefig(output_path, dpi=300)
    plt.close()

Saved deviation-from-random length difference results for no_ltn_scenario in newcastle
Saved deviation-from-random length difference results for current_ltn_scenario in newcastle
Saved deviation-from-random length difference results for more_ltn_scenario in newcastle


In [22]:
for scenario in params["scenarios"]:
    # Load paths and results for this scenario
    analysis_res_pickle = analysis_res_pickle_paths[scenario]
    analysis_res_json = analysis_res_json_paths[scenario]
    analysis_results[scenario] = utils.load_results(analysis_res_pickle)

    # Load GTs data per growth strategy
    GTs_betweenness = betweenness_results[scenario].get(placeid, {}).get("GTs", [])
    GTs_demand = demand_results[scenario].get(placeid, {}).get("GTs", [])
    if scenario != "no_ltn_scenario":
        GTs_demand_ltn_priority = demand_ltn_priority_results[scenario].get(placeid, {}).get("GTs", [])
        GTs_betweenness_ltn_priority = betweenness_ltn_priority_results[scenario].get(placeid, {}).get("GTs", [])
    random_runs = random_results[scenario].get(placeid, [])

    if rerun or 'total_lengths_vs_investment' not in analysis_results[scenario]:
        results_list = []

        # Compute total lengths and investment lengths for each growth type
        lengths_betweenness = utils.compute_total_lengths(GTs_betweenness)
        investment_betweenness = utils.compute_total_investment_lengths(GTs_betweenness, distance_cost)
        lengths_demand = utils.compute_total_lengths(GTs_demand)
        investment_demand = utils.compute_total_investment_lengths(GTs_demand, distance_cost)
        random_lengths_runs = [utils.compute_total_lengths(run["GTs"]) for run in random_runs]
        random_investment_runs = [utils.compute_total_investment_lengths(run["GTs"], distance_cost) for run in random_runs]
        random_lengths_mean = np.mean(random_lengths_runs, axis=0).tolist()
        random_investment_mean = np.mean(random_investment_runs, axis=0).tolist()
        # Append results 
        results_list.append(("Betweenness Growth - Total Length", lengths_betweenness))
        results_list.append(("Betweenness Growth - Total Investment Length", investment_betweenness))
        results_list.append(("Demand Growth - Total Length", lengths_demand))
        results_list.append(("Demand Growth - Total Investment Length", investment_demand))
        for i, (run_lengths, run_investment) in enumerate(zip(random_lengths_runs, random_investment_runs)):
            results_list.append((f"Random Run {i+1} - Total Length", run_lengths))
            results_list.append((f"Random Run {i+1} - Total Investment Length", run_investment))
        results_list.append(("Random Growth (mean) - Total Length", random_lengths_mean))
        results_list.append(("Random Growth (mean) - Total Investment Length", random_investment_mean))
        if scenario != "no_ltn_scenario":
            lengths_demand_ltn = utils.compute_total_lengths(GTs_demand_ltn_priority)
            investment_demand_ltn = utils.compute_total_investment_lengths(GTs_demand_ltn_priority, distance_cost)
            lengths_betweenness_ltn = utils.compute_total_lengths(GTs_betweenness_ltn_priority)
            investment_betweenness_ltn = utils.compute_total_investment_lengths(GTs_betweenness_ltn_priority, distance_cost)
            results_list.append(("Demand LTN Priority Growth - Total Length", lengths_demand_ltn))
            results_list.append(("Demand LTN Priority Growth - Total Investment Length", investment_demand_ltn))
            results_list.append(("Betweenness LTN Priority Growth - Total Length", lengths_betweenness_ltn))
            results_list.append(("Betweenness LTN Priority Growth - Total Investment Length", investment_betweenness_ltn))
        # Save all results
        utils.save_results(results_list, analysis_res_pickle, analysis_res_json)
        analysis_results[scenario] = {label: data for label, data in results_list}
        print(f"Updated analysis results for {scenario} in {placeid}")

    # Plotting: investment length vs total length
    plt.figure(figsize=(10, 6))
    for i in range(1, 100):
        len_key = f"Random Run {i} - Total Length"
        invest_key = f"Random Run {i} - Total Investment Length"
        if len_key in analysis_results[scenario] and invest_key in analysis_results[scenario]:
            plt.plot(analysis_results[scenario][len_key], analysis_results[scenario][invest_key], 
                     color='lightgray', linewidth=1, alpha=0.5)
        else:
            break
    plt.plot(analysis_results[scenario]['Random Growth (mean) - Total Length'],
             analysis_results[scenario]['Random Growth (mean) - Total Investment Length'],
             linestyle='--', linewidth=2, label='Random Growth (mean)', color='blue')
    plt.plot(analysis_results[scenario]['Betweenness Growth - Total Length'],
             analysis_results[scenario]['Betweenness Growth - Total Investment Length'],
             '-', label='Betweenness Growth', color='orange')
    plt.plot(analysis_results[scenario]['Demand Growth - Total Length'],
             analysis_results[scenario]['Demand Growth - Total Investment Length'],
             '-.', label='Demand Growth', color='red')
    if scenario != "no_ltn_scenario":
        # Plot LTN priority demand growth
        plt.plot(analysis_results[scenario]['Demand LTN Priority Growth - Total Length'],
                 analysis_results[scenario]['Demand LTN Priority Growth - Total Investment Length'],
                 ':', label='Demand LTN Priority Growth', color='green')

        # Plot LTN priority betweenness growth
        plt.plot(analysis_results[scenario]['Betweenness LTN Priority Growth - Total Length'],
                 analysis_results[scenario]['Betweenness LTN Priority Growth - Total Investment Length'],
                 '-', label='Betweenness LTN Priority Growth', color='purple')

    plt.xlabel('Total Length (meters)')
    plt.ylabel('Total Investment Length (meters)')
    plt.title(f'Investment Length vs Total Length for {scenario} - {placeid}')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()

    output_path = os.path.join(PATH["plots"], placeid, scenario, "Investment_vs_Length.png")
    plt.savefig(output_path, dpi=300)
    plt.close()


Updated analysis results for no_ltn_scenario in newcastle
Updated analysis results for current_ltn_scenario in newcastle
Updated analysis results for more_ltn_scenario in newcastle


In [23]:
for scenario in params["scenarios"]:
    # Load results
    analysis_res_pickle = analysis_res_pickle_paths[scenario]
    analysis_results = utils.load_results(analysis_res_pickle)

    # Use mean random runs as baseline
    random_lengths_mean = np.array(analysis_results[scenario]["Random Growth (mean) - Total Length"])
    random_investments_mean = np.array(analysis_results[scenario]["Random Growth (mean) - Total Investment Length"])


    # Define strategies
    strategies = {
        'Betweenness Growth': {
            'lengths': np.array(analysis_results[scenario]["Betweenness Growth - Total Length"]),
            'investments': np.array(analysis_results[scenario]["Betweenness Growth - Total Investment Length"]),
            'color': 'orange', 'marker': 'o'},
        'Demand Growth': {
            'lengths': np.array(analysis_results[scenario]["Demand Growth - Total Length"]),
            'investments': np.array(analysis_results[scenario]["Demand Growth - Total Investment Length"]),
            'color': 'red', 'marker': 's' },
        'Demand LTN Growth': {
            'lengths': np.array(analysis_results[scenario].get("Demand LTN Priority Growth - Total Length", [])),
            'investments': np.array(analysis_results[scenario].get("Demand LTN Priority Growth - Total Investment Length", [])),
            'color': 'green', 'marker': '^'},
        'Betweenness LTN Growth': {
            'lengths': np.array(analysis_results[scenario].get("Betweenness LTN Priority Growth - Total Length", [])),
            'investments': np.array(analysis_results[scenario].get("Betweenness LTN Priority Growth - Total Investment Length", [])),
            'color': 'purple', 'marker': 'D'}}

    plt.figure(figsize=(10, 6))

    # Plot individual random runs
    random_runs_lengths = analysis_results.get('random_runs_lengths_list', [])
    random_runs_investments = analysis_results.get('random_runs_investment_lengths_list', [])
    for i in range(len(random_runs_lengths)):
        run_lengths = np.array(random_runs_lengths[i])
        run_investments = np.array(random_runs_investments[i])
        plt.scatter(run_lengths - random_lengths_mean,
                    run_investments - random_investments_mean,
                    color='lightgray', alpha=0.3, s=10, label='_nolegend_')

    # Plot strategy deviations
    for label, data in strategies.items():
        if data['lengths'].size == 0 or data['investments'].size == 0:
            continue  # skip missing
        x_dev = data['lengths'] - random_lengths_mean
        y_dev = data['investments'] - random_investments_mean
        plt.scatter(x_dev, y_dev, label=label, color=data['color'], marker=data['marker'], alpha=0.8, s=50)

    # Reference lines
    plt.axhline(0, color='black', linestyle='--', linewidth=1)
    plt.axvline(0, color='black', linestyle='--', linewidth=1)

    plt.xlabel('Deviation in Total Length (m) from Random Growth (mean)')
    plt.ylabel('Deviation in Investment Length (m) from Random Growth (mean)')
    plt.title(f'Investment Cost vs Length: Deviation from Random ({scenario})')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()

    output_path = os.path.join(PATH["plots"], placeid, scenario, "investment_vs_length_deviation_scatter.png")
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    plt.savefig(output_path, dpi=300)
    plt.close()


KeyError: 'no_ltn_scenario'

### Distance gained

Here we are trying to find how much of the existing network is connected per iteration. 

total bike network - G_bikeall

G'investment_length' - investment size

G'length' - length of created network, not including netowrk size

need to do a compose of G_bikeall and G in GTs

but only compose where infrastucutre is connected to our generated network 

Find the length of infrastructure connected to generated network, along with the combined length. Thus we now know how much extra cycle network is connected per level of investment.

In [None]:
# Load or initialize results
analysis_results = utils.load_results(analysis_res_pickle)

if rerun or 'biketrack_lengths' not in analysis_results:
    results_list = []
    GT_lengths, biketrack_lengths, combined_lengths = utils.compute_biketrack_connected_lengths(GTs, G_biketrack)
    results_list.append(("GT Connected Lengths", GT_lengths))
    results_list.append(("Biketrack Connected Lengths", biketrack_lengths))
    results_list.append(("Combined Connected Lengths", combined_lengths))

    random_runs_GT_lengths = []
    random_runs_biketrack_lengths = []
    random_runs_combined_lengths = []
    for run in random_runs:
        gt, bike, combined = utils.compute_biketrack_connected_lengths(run["GTs"], G_biketrack)
        random_runs_GT_lengths.append(gt)
        random_runs_biketrack_lengths.append(bike)
        random_runs_combined_lengths.append(combined)
    # random runs
    for i, run_lengths in enumerate(random_runs_biketrack_lengths):
        results_list.append((f"Random Run {i+1} - Biketrack Connected Lengths", run_lengths))
    GT_random_mean = np.mean(random_runs_GT_lengths, axis=0).tolist()
    biketrack_random_mean = np.mean(random_runs_biketrack_lengths, axis=0).tolist()
    combined_random_mean = np.mean(random_runs_combined_lengths, axis=0).tolist()
    results_list.append(("GT Random Mean - Connected Lengths", GT_random_mean))
    results_list.append(("Biketrack Random Mean - Connected Lengths", biketrack_random_mean))
    results_list.append(("Combined Random Mean - Connected Lengths", combined_random_mean))

    gt, bike, combined = utils.compute_biketrack_connected_lengths(GTs_demand, G_biketrack)
    results_list.append(("GT Demand Connected Lengths", gt))
    results_list.append(("Biketrack Demand Connected Lengths", bike))
    results_list.append(("Combined Demand Connected Lengths", combined))

    gt, bike, combined = utils.compute_biketrack_connected_lengths(GTs_demand_ltn_priority, G_biketrack)
    results_list.append(("GT Demand LTN Priority Connected Lengths", gt))
    results_list.append(("Biketrack Demand LTN Priority Connected Lengths", bike))
    results_list.append(("Combined Demand LTN Priority Connected Lengths", combined))

    gt, bike, combined = utils.compute_biketrack_connected_lengths(GTs_betweenness_ltn_priority, G_biketrack)
    results_list.append(("GT Betweenness LTN Priority Connected Lengths", gt))
    results_list.append(("Biketrack Betweenness LTN Priority Connected Lengths", bike))
    results_list.append(("Combined Betweenness LTN Priority Connected Lengths", combined))

    # Save
    utils.save_results(results_list, analysis_res_pickle, analysis_res_csv)
    analysis_results = {label: data for label, data in results_list}
    print(f"Updated biketrack connected length analysis results for {placeid}")










# --- Plotting ---
plt.figure(figsize=(10, 6))

# Plot random runs
for i in range(1, 100):
    key = f"Random Run {i} - Biketrack Connected Lengths"
    if key in analysis_results:
        plt.plot(analysis_results[key], color='lightgray', linewidth=1, alpha=0.5)
    else:
        break

# Plot means and strategies
plt.plot(analysis_results["Biketrack Random Mean - Connected Lengths"], '--', color='blue', linewidth=2, label="Random Growth (mean)")
plt.plot(analysis_results["Biketrack Connected Lengths"], '-', color='orange', label="Betweenness")
plt.plot(analysis_results["Biketrack Demand Connected Lengths"], '-.', color='red', label="Demand")
plt.plot(analysis_results["Biketrack Demand LTN Priority Connected Lengths"], ':', color='green', label="Demand LTN Priority")
plt.plot(analysis_results["Biketrack Betweenness LTN Priority Connected Lengths"], '-', color='purple', label="Betweenness LTN Priority")

# Finalize plot
plt.xlabel("Investment Iteration")
plt.ylabel("Additional Cycle Infrastructure Connected Length (meters)")
plt.title("Additional Cycle Infrastructure Connected per Iteration")
plt.legend()
plt.grid(True)
plt.tight_layout()

output_path = os.path.join(PATH["plots"], placeid, "additional_cyclenet_connected.png")
plt.savefig(output_path, dpi=300)
plt.close()


In [None]:
analysis_results = load_results(analysis_res_pickle)

if rerun or 'biketrack_lengths' not in analysis_results:
    analysis_results.update({
        'GT_lengths': compute_biketrack_connected_lengths(GTs, G_biketrack)[0],
        'biketrack_lengths': compute_biketrack_connected_lengths(GTs, G_biketrack)[1],
        'combined_lengths': compute_biketrack_connected_lengths(GTs, G_biketrack)[2],

        'random_runs_GT_lengths': [compute_biketrack_connected_lengths(run["GTs"], G_biketrack)[0] for run in random_runs],
        'random_runs_biketrack_lengths': [compute_biketrack_connected_lengths(run["GTs"], G_biketrack)[1] for run in random_runs],
        'random_runs_combined_lengths': [compute_biketrack_connected_lengths(run["GTs"], G_biketrack)[2] for run in random_runs],

        'GT_random_lengths': np.mean(
            [compute_biketrack_connected_lengths(run["GTs"], G_biketrack)[0] for run in random_runs],
            axis=0
        ).tolist(),
        'biketrack_random_lengths': np.mean(
            [compute_biketrack_connected_lengths(run["GTs"], G_biketrack)[1] for run in random_runs],
            axis=0
        ).tolist(),
        'combined_random_lengths': np.mean(
            [compute_biketrack_connected_lengths(run["GTs"], G_biketrack)[2] for run in random_runs],
            axis=0
        ).tolist(),

        'GT_demand_lengths': compute_biketrack_connected_lengths(GTs_demand, G_biketrack)[0],
        'biketrack_demand_lengths': compute_biketrack_connected_lengths(GTs_demand, G_biketrack)[1],
        'combined_demand_lengths': compute_biketrack_connected_lengths(GTs_demand, G_biketrack)[2],

        'GT_demand_lengths_ltn_priority': compute_biketrack_connected_lengths(GTs_demand_ltn_priority, G_biketrack)[0],
        'biketrack_demand_lengths_ltn_priority': compute_biketrack_connected_lengths(GTs_demand_ltn_priority, G_biketrack)[1],
        'combined_demand_lengths_ltn_priority': compute_biketrack_connected_lengths(GTs_demand_ltn_priority, G_biketrack)[2],

        'GT_betweenness_lengths_ltn_priority': compute_biketrack_connected_lengths(GTs_betweenness_ltn_priority, G_biketrack)[0],
        'biketrack_betweenness_lengths_ltn_priority': compute_biketrack_connected_lengths(GTs_betweenness_ltn_priority, G_biketrack)[1],
        'combined_betweenness_lengths_ltn_priority': compute_biketrack_connected_lengths(GTs_betweenness_ltn_priority, G_biketrack)[2],
    })

    save_results(analysis_results, analysis_res_pickle, analysis_res_csv)

plt.figure(figsize=(10, 6))

# Plot individual random runs
for run_lengths in analysis_results['random_runs_biketrack_lengths']:
    plt.plot(run_lengths, color='lightgray', linewidth=1, alpha=0.5)

# Plot mean of random runs
plt.plot(analysis_results['biketrack_random_lengths'], '--', color='blue', linewidth=2, label="Random Growth (mean)")

# Other strategies
plt.plot(analysis_results['biketrack_lengths'], '-', color='orange', label="Betweenness")
plt.plot(analysis_results['biketrack_demand_lengths'], '-.', color='red', label="Demand")
plt.plot(analysis_results['biketrack_demand_lengths_ltn_priority'], ':', color='green', label="Demand LTN Priority")
plt.plot(analysis_results['biketrack_betweenness_lengths_ltn_priority'], '-', color='purple', label="Betweenness LTN Priority")

plt.xlabel("Investment Iteration")
plt.ylabel("Additional Cycle Infrastructure Connected Length (meters)")
plt.title("Additional Cycle Infrastructure Connected per Iteration")
plt.legend()
plt.grid(True)
plt.tight_layout()

output_path = os.path.join(PATH["plots"], placeid, "additional_cyclenet_connected.png")
plt.savefig(output_path, dpi=300)
plt.show()


In [None]:
analysis_results = load_results(analysis_res_pickle)
if rerun or 'biketrack_deviation_from_random' not in analysis_results:

    random_runs = analysis_results['random_runs_biketrack_lengths']
    random_mean = np.mean(random_runs, axis=0)

    # Deviation of each random run from mean
    random_runs_deviations = [np.array(run) - random_mean for run in random_runs]
    random_deviations_mean = np.mean(random_runs_deviations, axis=0).tolist()

    biketrack_dev_data = {
        'dev_betweenness': compute_abs_deviation(
            analysis_results['biketrack_lengths'], random_mean
        ),
        'dev_demand': compute_abs_deviation(
            analysis_results['biketrack_demand_lengths'], random_mean
        ),
        'dev_demand_ltn': compute_abs_deviation(
            analysis_results['biketrack_demand_lengths_ltn_priority'], random_mean
        ),
        'dev_betweenness_ltn': compute_abs_deviation(
            analysis_results['biketrack_betweenness_lengths_ltn_priority'], random_mean
        ),
        'random_runs_deviations': [d.tolist() for d in random_runs_deviations],
        'random_deviations_mean': random_deviations_mean
    }

    analysis_results['biketrack_deviation_from_random'] = biketrack_dev_data
    save_results(analysis_results, analysis_res_pickle, analysis_res_csv)



# Plot deviation from random for biketrack-connected lengths
plt.figure(figsize=(10, 6))
for dev in analysis_results['biketrack_deviation_from_random']['random_runs_deviations']:
    plt.plot(dev, color='lightgray', linewidth=1, alpha=0.4)
plt.axhline(0, color='blue', linestyle='--', linewidth=2, label='Random Growth (mean)')
for key, style in [
    ('dev_betweenness', ('-', 'orange', 'Betweenness Growth')),
    ('dev_demand', ('-.', 'red', 'Demand Growth')),
    ('dev_demand_ltn', (':', 'green', 'Demand LTN Priority Growth')),
    ('dev_betweenness_ltn', ('-', 'purple', 'Betweenness LTN Priority Growth')),
]:
    plt.plot(
        analysis_results['biketrack_deviation_from_random'][key],
        linestyle=style[0], color=style[1], label=style[2]
    )

plt.xlabel("Investment Iteration")
plt.ylabel("Deviation from Random Growth Baseline (meters)")
plt.title("Deviation in Connected Biketrack Length from Random Growth Baseline")
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()

output_path = os.path.join(PATH["plots"], placeid, "biketrack_connected__deviation_from_random.png")
plt.savefig(output_path, dpi=300)
plt.show()


In [None]:
# # Deviation from Random: Biketrack connected lengths
# random_baseline = np.array(analysis_results['biketrack_random_lengths'])

# # Prepare deviations from random for each strategy
# deviations = {
#     'Betweenness': {
#         'values': np.array(analysis_results['biketrack_lengths']) - random_baseline,
#         'color': 'orange',
#         'linestyle': '--'
#     },
#     'Demand': {
#         'values': np.array(analysis_results['biketrack_demand_lengths']) - random_baseline,
#         'color': 'red',
#         'linestyle': '-.'
#     },
#     'Demand LTN': {
#         'values': np.array(analysis_results['biketrack_demand_lengths_ltn_priority']) - random_baseline,
#         'color': 'green',
#         'linestyle': '-.'
#     },
#     'Betweenness LTN': {
#         'values': np.array(analysis_results['biketrack_betweenness_lengths_ltn_priority']) - random_baseline,
#         'color': 'purple',
#         'linestyle': '-'
#     }
# }

# # Plot
# plt.figure(figsize=(10, 6))
# for label, data in deviations.items():
#     plt.plot(
#         range(1, len(data['values']) + 1),
#         data['values'],
#         linestyle=data['linestyle'],
#         color=data['color'],
#         label=label
#     )

# plt.axhline(0, color='black', linestyle='--', linewidth=1)
# plt.xlabel("Iteration")
# plt.ylabel("Deviation in Connected Length vs Random (meters)")
# plt.title("Additional Cycle Infrastructure Connected — Deviation from Random Growth (Baseline)")
# plt.legend()
# plt.grid(True)
# plt.tight_layout()

# # Save
# output_path = PATH["plots"] + f"/{placeid}/additional_cyclenet_connected__deviation_from_random.png"
# plt.savefig(output_path, dpi=300)
# plt.show()


### Connected Components 

Find the length of the largest connected component, first a just our investment, then combined with existing network, then by combined but only where its connected

In [None]:
## this computes LCC in the "typical" way by measuring componet size by number of nodes

# if os.path.exists(analysis_res_pickle):
#     with open(analysis_res_pickle, 'rb') as f:
#         analysis_results = pickle.load(f)
# else:
#     analysis_results = {}

# if rerun or 'lcc_lengths_GTs' not in analysis_results:
#     # Compute LCC lengths
#     lcc_data = {
#         'lcc_lengths_GTs': [
#             sum(data['length'] for _, _, data in 
#                 G.subgraph(max(nx.weakly_connected_components(G), key=len)).edges(data=True))
#             for G in GTs
#         ],
#         'lcc_lengths_GTs_random': [
#             sum(data['length'] for _, _, data in 
#                 G.subgraph(max(nx.weakly_connected_components(G), key=len)).edges(data=True))
#             for G in GTs_random
#         ],
#         'lcc_lengths_GTs_demand': [
#             sum(data['length'] for _, _, data in 
#                 G.subgraph(max(nx.weakly_connected_components(G), key=len)).edges(data=True))
#             for G in GTs_demand
#         ],
#         'lcc_lengths_GTs_demand_ltn_priority': [
#             sum(data['length'] for _, _, data in 
#                 G.subgraph(max(nx.weakly_connected_components(G), key=len)).edges(data=True))
#             for G in GTs_demand_ltn_priority
#         ],
#         'lcc_lengths_GTs_betweenness_ltn_priority': [
#             sum(data['length'] for _, _, data in 
#                 G.subgraph(max(nx.weakly_connected_components(G), key=len)).edges(data=True))
#             for G in GTs_betweenness_ltn_priority
#         ]
#     }

#     analysis_results.update(lcc_data)

#     with open(analysis_res_pickle, 'wb') as f:
#         pickle.dump(analysis_results, f)
#     pd.DataFrame({k: pd.Series(v) for k,v in analysis_results.items()}) \
#       .to_csv(analysis_res_csv, index=False)

# # Plot LCC lengths
# plt.figure(figsize=(10, 6))
# plt.plot(
#     analysis_results['lcc_lengths_GTs'], 
#     '--', color='orange', label='Betweeness Growth'
# )
# plt.plot(
#     analysis_results['lcc_lengths_GTs_random'], 
#     '-', color='blue', label='Random Growth'
# )

# plt.plot(
#     analysis_results['lcc_lengths_GTs_demand'],
#     '-.', color='red', label='Demand Growth'
# )
# plt.plot(
#     analysis_results['lcc_lengths_GTs_demand_ltn_priority'],
#     '-.', color='green', label='Demand LTN Growth'
# )
# plt.plot(
#     analysis_results['lcc_lengths_GTs_betweenness_ltn_priority'],
#     '-', color='purple', label='Betweenness LTN Growth'
# )
# plt.xlabel('Investment Iteration')
# plt.ylabel('Length (meters)')
# plt.title('Size of Largest Connected Component per Iteration')
# plt.legend()
# plt.grid(True, alpha=0.3)
# plt.tight_layout()
# output_path = PATH["plots"] + "/" + placeid + "/size_of_lcc.png"
# plt.savefig(output_path, dpi=300)


# plt.show()


In [None]:
if rerun or 'lcc_lengths' not in analysis_results:
    lcc_data = {
        'lcc_lengths': [get_longest_connected_components(G) for G in GTs],
        'random_runs_lcc_lengths': [
            [get_longest_connected_components(G) for G in run["GTs"]] for run in random_runs
        ],
        'demand_lcc_lengths': [get_longest_connected_components(G) for G in GTs_demand],
        'demand_lcc_lengths_ltn_priority': [get_longest_connected_components(G) for G in GTs_demand_ltn_priority],
        'betweenness_lcc_lengths_ltn_priority': [get_longest_connected_components(G) for G in GTs_betweenness_ltn_priority]
    }

    # Compute mean random lcc lengths across runs and graphs
    lcc_data['random_lcc_lengths_mean'] = np.mean(
    lcc_data['random_runs_lcc_lengths'], axis=0
    ).tolist()

    analysis_results.update(lcc_data)
    save_results(analysis_results, analysis_res_pickle, analysis_res_csv)


plt.figure(figsize=(10, 6))

for run in analysis_results['random_runs_lcc_lengths']:
    plt.plot(run, color='lightgray', linewidth=1, alpha=0.4)

plt.plot(analysis_results['random_lcc_lengths_mean'], linestyle='--', linewidth=2, label='Random Growth (mean)', color='blue')
plt.plot(analysis_results['lcc_lengths'], '-', label='Betweenness Growth', color='orange')
plt.plot(analysis_results['demand_lcc_lengths'], '-.', label='Demand Growth', color='red')
plt.plot(analysis_results['demand_lcc_lengths_ltn_priority'], ':', label='Demand LTN Priority Growth', color='green')
plt.plot(analysis_results['betweenness_lcc_lengths_ltn_priority'], '-', label='Betweenness LTN Priority Growth', color='purple')

plt.xlabel('Investment Iteration')
plt.ylabel('LCC Length (meters)')
plt.title('Size of Largest Connected Component per Iteration')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()

output_path = os.path.join(PATH["plots"], placeid, "size_of_lcc.png")
plt.savefig(output_path, dpi=300)
plt.show()


In [None]:

# ## largest connected component is calculated as longest (length of edges) connected component
# # this is because we are interested in how far a cyclist can travel, rather than the numeber of nodes

# if os.path.exists(analysis_res_pickle):
#     with open(analysis_res_pickle, 'rb') as f:
#         analysis_results = pickle.load(f)
# else:
#     analysis_results = {}

# if rerun or 'lcc_lengths_GTs' not in analysis_results:
#     # Compute LCC lengths
#         lcc_data = {
#             'lcc_lengths_GTs': [
#                 get_longest_connected_components(G) for G in GTs
#             ],
#             'lcc_lengths_GTs_random': [
#                 get_longest_connected_components(G) for G in GTs_random
#             ],
#             'lcc_lengths_GTs_demand': [
#                 get_longest_connected_components(G) for G in GTs_demand
#             ],
#             'lcc_lengths_GTs_demand_ltn_priority': [
#                 get_longest_connected_components(G) for G in GTs_demand_ltn_priority
#             ],
#             'lcc_lengths_GTs_betweenness_ltn_priority': [
#                 get_longest_connected_components(G) for G in GTs_betweenness_ltn_priority
#             ]
#         }


# analysis_results.update(lcc_data)

# with open(analysis_res_pickle, 'wb') as f:
#     pickle.dump(analysis_results, f)
# pd.DataFrame({k: pd.Series(v) for k,v in analysis_results.items()}) \
#     .to_csv(analysis_res_csv, index=False)

# # Plot LCC lengths
# plt.figure(figsize=(10, 6))
# plt.plot(
#     analysis_results['lcc_lengths_GTs'], 
#     '--', color='orange', label='Betweeness Growth'
# )
# plt.plot(
#     analysis_results['lcc_lengths_GTs_random'], 
#     '-', color='blue', label='Random Growth'
# )

# plt.plot(
#     analysis_results['lcc_lengths_GTs_demand'],
#     '-.', color='red', label='Demand Growth'
# )
# plt.plot(
#     analysis_results['lcc_lengths_GTs_demand_ltn_priority'],
#     '-.', color='green', label='Demand LTN Growth'
# )
# plt.plot(
#     analysis_results['lcc_lengths_GTs_betweenness_ltn_priority'],
#     '-', color='purple', label='Betweenness LTN Growth'
# )
# plt.xlabel('Investment Iteration')
# plt.ylabel('Length (meters)')
# plt.title('Size of Largest Connected Component per Iteration')
# plt.legend()
# plt.grid(True, alpha=0.3)
# plt.tight_layout()
# output_path = PATH["plots"] + "/" + placeid + "/size_of_lcc.png"
# plt.savefig(output_path, dpi=300)


# plt.show()


In [None]:
analysis_results = load_results(analysis_res_pickle)
if rerun or 'lcc_deviation_from_random' not in analysis_results:
    random_runs = analysis_results['random_runs_lcc_lengths']
    random_mean = np.array(analysis_results['random_lcc_lengths_mean'])

    # Deviation of each random run from the mean
    random_runs_deviations = [np.array(run) - random_mean for run in random_runs]
    random_deviations_mean = np.mean(random_runs_deviations, axis=0).tolist()

    lcc_dev_data = {
        'dev_betweenness': np.array(analysis_results['lcc_lengths']) - random_mean,
        'dev_demand': np.array(analysis_results['demand_lcc_lengths']) - random_mean,
        'dev_demand_ltn': np.array(analysis_results['demand_lcc_lengths_ltn_priority']) - random_mean,
        'dev_betweenness_ltn': np.array(analysis_results['betweenness_lcc_lengths_ltn_priority']) - random_mean,
        'random_runs_deviations': [d.tolist() for d in random_runs_deviations],
        'random_deviations_mean': random_deviations_mean
    }

    analysis_results['lcc_deviation_from_random'] = lcc_dev_data
    save_results(analysis_results, analysis_res_pickle, analysis_res_csv)

# --- Plotting ---
plt.figure(figsize=(10, 6))

# Gray lines: deviation of each random run from the mean
for dev in analysis_results['lcc_deviation_from_random']['random_runs_deviations']:
    plt.plot(dev, color='lightgray', linewidth=1, alpha=0.4)

# Dashed blue line at 0 deviation
plt.axhline(0, color='blue', linestyle='--', linewidth=2, label='Random Growth (mean)')

# Plot deviations for each strategy
for key, style in [
    ('dev_betweenness', ('-', 'orange', 'Betweenness Growth')),
    ('dev_demand', ('-.', 'red', 'Demand Growth')),
    ('dev_demand_ltn', (':', 'green', 'Demand LTN Priority Growth')),
    ('dev_betweenness_ltn', ('-', 'purple', 'Betweenness LTN Priority Growth')),
]:
    plt.plot(
        analysis_results['lcc_deviation_from_random'][key],
        linestyle=style[0], color=style[1], label=style[2]
    )

plt.xlabel("Investment Iteration")
plt.ylabel("Deviation from Random Growth Baseline (meters)")
plt.title("Deviation in LCC Length from Random Growth Baseline")
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()

output_path = os.path.join(PATH["plots"], placeid, "lcc_length__deviation_from_random.png")
plt.savefig(output_path, dpi=300)
plt.show()


In [None]:
# ## compared to random baseline
# #  Load previous results
# if os.path.exists(analysis_res_pickle):
#     with open(analysis_res_pickle, 'rb') as f:
#         analysis_results = pickle.load(f)
# else:
#     analysis_results = {}

# # Recalculate if needed
# if rerun or 'lcc_lengths_GTs' not in analysis_results:
#     lcc_data = {
#         'lcc_lengths_GTs': [
#             get_longest_connected_components(G) for G in GTs
#         ],
#         'lcc_lengths_GTs_random': [
#             get_longest_connected_components(G) for G in GTs_random
#         ],
#         'lcc_lengths_GTs_demand': [
#             get_longest_connected_components(G) for G in GTs_demand
#         ],
#         'lcc_lengths_GTs_demand_ltn_priority': [
#             get_longest_connected_components(G) for G in GTs_demand_ltn_priority
#         ],
#         'lcc_lengths_GTs_betweenness_ltn_priority': [
#             get_longest_connected_components(G) for G in GTs_betweenness_ltn_priority
#         ]
#     }
#     analysis_results.update(lcc_data)
#     with open(analysis_res_pickle, 'wb') as f:
#         pickle.dump(analysis_results, f)
#     pd.DataFrame({k: pd.Series(v) for k,v in analysis_results.items()}) \
#         .to_csv(analysis_res_csv, index=False)

# # Calculate deviation from random
# random_lcc = np.array(analysis_results['lcc_lengths_GTs_random'])

# lcc_deviations = {
#     'Betweenness': {
#         'values': np.array(analysis_results['lcc_lengths_GTs']) - random_lcc,
#         'color': 'orange',
#         'linestyle': '--'
#     },
#     'Demand': {
#         'values': np.array(analysis_results['lcc_lengths_GTs_demand']) - random_lcc,
#         'color': 'red',
#         'linestyle': '-.'
#     },
#     'Demand LTN': {
#         'values': np.array(analysis_results['lcc_lengths_GTs_demand_ltn_priority']) - random_lcc,
#         'color': 'green',
#         'linestyle': '-.'
#     },
#     'Betweenness LTN': {
#         'values': np.array(analysis_results['lcc_lengths_GTs_betweenness_ltn_priority']) - random_lcc,
#         'color': 'purple',
#         'linestyle': '-'
#     }
# }

# # Plot
# plt.figure(figsize=(10, 6))
# for label, data in lcc_deviations.items():
#     plt.plot(
#         range(1, len(data['values']) + 1),
#         data['values'],
#         linestyle=data['linestyle'],
#         color=data['color'],
#         label=label
#     )

# plt.axhline(0, color='black', linestyle='--', linewidth=1)
# plt.xlabel('Investment Iteration')
# plt.ylabel('Deviation in LCC Length vs Random (meters)')
# plt.title('Size of Largest Connected Component — Deviation from Random Growth (Baseline)')
# plt.legend()
# plt.grid(True, alpha=0.3)
# plt.tight_layout()

# # Save
# output_path = PATH["plots"] + f"/{placeid}/size_of_lcc__deviation_from_random.png"
# plt.savefig(output_path, dpi=300)
# plt.show()


In [None]:
# # def compute_lcc_lengths(graph_list, G_biketrack):
# #     """Computes the total length of the largest connected component for each graph in the list."""
# #     total_lengths_lcc = []
    
# #     for G in graph_list:
# #         # Compose graphs and find largest connected component
# #         merged = nx.compose(G, G_biketrack)
        
# #         # Get weakly connected components (works for both directed/undirected graphs)
# #         components = list(nx.weakly_connected_components(merged))
        
# #         if not components:
# #             total_length = 0.0  # Handle empty graph case
# #         else:
# #             # Find largest component by node count
# #             largest_component_nodes = max(components, key=len)
# #             largest_component = merged.subgraph(largest_component_nodes)
            
# #             # Calculate total edge length in the largest component
# #             total_length = sum(data['length'] for u, v, data in largest_component.edges(data=True))
        
# #         total_lengths_lcc.append(total_length)
    
# #     return total_lengths_lcc

# # # Compute LCC lengths for GTs and GTs_random
# # total_lengths_lcc_GTs = compute_lcc_lengths(GTs, G_biketrack)
# # total_lengths_lcc_GTs_random = compute_lcc_lengths(GTs_random, G_biketrack)

# # # Create the plot
# # plt.figure(figsize=(10, 6))

# # # Plot GTs
# # plt.plot(
# #     range(len(total_lengths_lcc_GTs)), total_lengths_lcc_GTs, linestyle='-', color='blue', label="GTs"
# # )

# # # Plot GTs_random
# # plt.plot(
# #     range(len(total_lengths_lcc_GTs_random)), total_lengths_lcc_GTs_random, linestyle='--', color='orange', label="GTs_random"
# # )

# # # Labels and title
# # plt.title('Total Length of Largest Connected Component')
# # plt.xlabel('Graph Index')
# # plt.ylabel('Total Length (meters)')
# # plt.grid(True, axis='y', alpha=0.3)
# # plt.legend()
# # plt.tight_layout()
# # plt.show()

# if os.path.exists(analysis_res_pickle):
#     with open(analysis_res_pickle, 'rb') as f:
#         analysis_results = pickle.load(f)
# else:
#     analysis_results = {}

# if rerun or 'composite_lcc_GTs' not in analysis_results:
#     def compute_lcc_lengths(graph_list, G_biketrack):
#         total_lengths_lcc = []
#         for G in graph_list:
#             merged = nx.compose(G, G_biketrack)
#             components = list(nx.weakly_connected_components(merged))
#             max_length = 0.0
#             for comp in components:
#                 subgraph = merged.subgraph(comp)
#                 total_length = sum(data.get('length', 0) for _, _, data in subgraph.edges(data=True))
#                 if total_length > max_length:
#                     max_length = total_length
#             total_lengths_lcc.append(max_length)
#         return total_lengths_lcc

#     composite_data = {
#         'composite_lcc_GTs': compute_lcc_lengths(GTs, G_biketrack),
#         'composite_lcc_GTs_random': compute_lcc_lengths(GTs_random, G_biketrack),
#         'composite_lcc_GTs_demand': compute_lcc_lengths(GTs_demand, G_biketrack),
#         'composite_lcc_GTs_demand_ltn_priority': compute_lcc_lengths(GTs_demand_ltn_priority, G_biketrack),
#         'composite_lcc_GTs_betweenness_ltn_priority': compute_lcc_lengths(GTs_betweenness_ltn_priority, G_biketrack)
#     }

#     analysis_results.update(composite_data)
    
#     with open(analysis_res_pickle, 'wb') as f:
#         pickle.dump(analysis_results, f)
#     df = pd.DataFrame({k: pd.Series(v) for k, v in analysis_results.items()})
#     df.to_csv(analysis_res_csv, index=False)

# # Plot composite LCC results
# plt.figure(figsize=(10, 6))
# plt.plot(
#     analysis_results['composite_lcc_GTs'],
#     '--', color='orange',
#     label='Betweenness Growth (with Bike Track)'
# )
# plt.plot(
#     analysis_results['composite_lcc_GTs_random'],
#     '-', color='blue',
#     label='Random Growth (with Bike Track)'
# )
# plt.plot(
#     analysis_results['composite_lcc_GTs_demand'],
#     '-.', color='red',
#     label='Demand Growth (with Bike Track)'
# )
# plt.plot(
#     analysis_results['composite_lcc_GTs_demand_ltn_priority'],
#     '-.', color='green',
#     label='Demand LTN Growth (with Bike Track)'
# )
# plt.plot(
#     analysis_results['composite_lcc_GTs_betweenness_ltn_priority'],
#     '-', color='purple',
#     label='Betweenness LTN Growth (with Bike Track)'
# )

# plt.title('Largest Connected Component (Including Bike Network)')
# plt.xlabel('Investment Iteration')
# plt.ylabel('Total Length (meters)')
# plt.grid(True, axis='y', alpha=0.3)
# plt.legend()
# plt.tight_layout()
# output_path = PATH["plots"] + "/" + placeid + "/lengthof_lcc_inc_cyclenet.png"
# plt.savefig(output_path, dpi=300)


# plt.show()


In [None]:
# def compute_lcc_lengths(graph_list, G_biketrack):
#     """Computes the total length of the largest connected component for each graph in the list."""
#     total_lengths_lcc = []
    
#     for G in graph_list:
#         # Compose graphs and find largest connected component
#         merged = nx.compose(G, G_biketrack)
        
#         # Get weakly connected components (works for both directed/undirected graphs)
#         components = list(nx.weakly_connected_components(merged))
        
#         if not components:
#             total_length = 0.0  # Handle empty graph case
#         else:
#             # Find largest component by node count
#             largest_component_nodes = max(components, key=len)
#             largest_component = merged.subgraph(largest_component_nodes)
            
#             # Calculate total edge length in the largest component
#             total_length = sum(data['length'] for u, v, data in largest_component.edges(data=True))
        
#         total_lengths_lcc.append(total_length)
    
#     return total_lengths_lcc

# # Compute LCC lengths for GTs and GTs_random
# total_lengths_lcc_GTs = compute_lcc_lengths(GTs, G_biketrack)
# total_lengths_lcc_GTs_random = compute_lcc_lengths(GTs_random, G_biketrack)

# # Create the plot
# plt.figure(figsize=(10, 6))

# # Plot GTs
# plt.plot(
#     range(len(total_lengths_lcc_GTs)), total_lengths_lcc_GTs, linestyle='-', color='blue', label="GTs"
# )

# # Plot GTs_random
# plt.plot(
#     range(len(total_lengths_lcc_GTs_random)), total_lengths_lcc_GTs_random, linestyle='--', color='orange', label="GTs_random"
# )

# # Labels and title
# plt.title('Total Length of Largest Connected Component')
# plt.xlabel('Graph Index')
# plt.ylabel('Total Length (meters)')
# plt.grid(True, axis='y', alpha=0.3)
# plt.legend()
# plt.tight_layout()
# plt.show()



# --- Load or init results ---
analysis_results = load_results(analysis_res_pickle)
if rerun or 'composite_lcc_lengths' not in analysis_results:
    composite_lcc_data = {
    'composite_lcc_lengths': [get_composite_lcc_length(G, G_biketrack) for G in GTs],
    'random_runs_composite_lcc_lengths': [
        [get_composite_lcc_length(G, G_biketrack) for G in run['GTs']] for run in random_runs
    ],
    'composite_lcc_lengths_demand': [get_composite_lcc_length(G, G_biketrack) for G in GTs_demand],
    'composite_lcc_lengths_demand_ltn_priority': [get_composite_lcc_length(G, G_biketrack) for G in GTs_demand_ltn_priority],
    'composite_lcc_lengths_betweenness_ltn_priority': [get_composite_lcc_length(G, G_biketrack) for G in GTs_betweenness_ltn_priority]
    }

    composite_lcc_data['random_composite_lcc_lengths_mean'] = np.mean(
        composite_lcc_data['random_runs_composite_lcc_lengths'], axis=0
    ).tolist()

    analysis_results.update(composite_lcc_data)
    save_results(analysis_results, analysis_res_pickle, analysis_res_csv)


# --- Plot ---
plt.figure(figsize=(10, 6))

for run in analysis_results['random_runs_composite_lcc_lengths']:
    plt.plot(run, color='lightgray', linewidth=1, alpha=0.4)

plt.plot(
    analysis_results['random_composite_lcc_lengths_mean'],
    linestyle='--', linewidth=2, label='Random Growth (mean)', color='blue'
)
plt.plot(
    analysis_results['composite_lcc_lengths'],
    '-', label='Betweenness Growth', color='orange'
)
plt.plot(
    analysis_results['composite_lcc_lengths_demand'],
    '-.', label='Demand Growth', color='red'
)
plt.plot(
    analysis_results['composite_lcc_lengths_demand_ltn_priority'],
    ':', label='Demand LTN Priority Growth', color='green'
)
plt.plot(
    analysis_results['composite_lcc_lengths_betweenness_ltn_priority'],
    '-', label='Betweenness LTN Priority Growth', color='purple'
)
plt.xlabel('Investment Iteration')
plt.ylabel('LCC Length (meters)')
plt.title('Size of Largest Connected Component Including Bike Network per Iteration')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()

output_path = os.path.join(PATH["plots"], placeid, "size_of_composite_lcc.png")
plt.savefig(output_path, dpi=300)
plt.show()


In [None]:
analysis_results = load_results(analysis_res_pickle)

# Extract baseline and strategies
random_runs_array = np.array(analysis_results['random_runs_composite_lcc_lengths'])
random_lcc_mean = np.array(analysis_results['random_composite_lcc_lengths_mean'])

# Compute deviation of each strategy from mean random baseline
composite_deviations = {
    'Betweenness': np.array(analysis_results['composite_lcc_lengths']) - random_lcc_mean,
    'Demand': np.array(analysis_results['composite_lcc_lengths_demand']) - random_lcc_mean,
    'Demand LTN': np.array(analysis_results['composite_lcc_lengths_demand_ltn_priority']) - random_lcc_mean,
    'Betweenness LTN': np.array(analysis_results['composite_lcc_lengths_betweenness_ltn_priority']) - random_lcc_mean,
}

# Plotting
plt.figure(figsize=(10, 6))

# Plot baseline (mean of random runs)
plt.plot(random_lcc_mean, color='blue', linestyle='-', linewidth=2, label='Random Growth Mean Baseline')
for run in random_runs_array:
    plt.plot(run, color='lightgray', linewidth=1, alpha=0.3)

# Plot strategies relative to baseline
styles = {
    'Betweenness': ('--', 'orange'),
    'Demand': ('-.', 'red'),
    'Demand LTN': (':', 'green'),
    'Betweenness LTN': ('-', 'purple'),
}

for label, deviation in composite_deviations.items():
    absolute_values = deviation + random_lcc_mean  # Plot absolute LCC lengths
    linestyle, color = styles[label]
    plt.plot(range(1, len(absolute_values) + 1), absolute_values, linestyle=linestyle, color=color, label=label)

# Finalize plot
plt.title('Composite LCC Length Compared to Random Growth Baseline')
plt.xlabel('Investment Iteration')
plt.ylabel('Total Length (meters)')
plt.legend()
plt.grid(True, axis='y', alpha=0.3)
plt.tight_layout()

output_path = os.path.join(PATH["plots"], placeid, "lengthof_lcc_inc_cyclenet_vs_random_mean_baseline.png")
plt.savefig(output_path, dpi=300)
plt.show()


In [None]:
# if os.path.exists(analysis_res_pickle):
#     with open(analysis_res_pickle, 'rb') as f:
#         analysis_results = pickle.load(f)
# else:
#     analysis_results = {}

# # Recalculate composite LCCs if needed
# if rerun or 'composite_lcc_GTs' not in analysis_results:
#     def compute_lcc_lengths(graph_list, G_biketrack):
#         total_lengths_lcc = []
#         for G in graph_list:
#             merged = nx.compose(G, G_biketrack)
#             components = list(nx.weakly_connected_components(merged))
#             max_length = 0.0
#             for comp in components:
#                 subgraph = merged.subgraph(comp)
#                 total_length = sum(data.get('length', 0) for _, _, data in subgraph.edges(data=True))
#                 if total_length > max_length:
#                     max_length = total_length
#             total_lengths_lcc.append(max_length)
#         return total_lengths_lcc

#     composite_data = {
#         'composite_lcc_GTs': compute_lcc_lengths(GTs, G_biketrack),
#         'composite_lcc_GTs_random': compute_lcc_lengths(GTs_random, G_biketrack),
#         'composite_lcc_GTs_demand': compute_lcc_lengths(GTs_demand, G_biketrack),
#         'composite_lcc_GTs_demand_ltn_priority': compute_lcc_lengths(GTs_demand_ltn_priority, G_biketrack),
#         'composite_lcc_GTs_betweenness_ltn_priority': compute_lcc_lengths(GTs_betweenness_ltn_priority, G_biketrack)
#     }

#     analysis_results.update(composite_data)
#     with open(analysis_res_pickle, 'wb') as f:
#         pickle.dump(analysis_results, f)
#     df = pd.DataFrame({k: pd.Series(v) for k, v in analysis_results.items()})
#     df.to_csv(analysis_res_csv, index=False)

# # Compute deviation from random
# random_composite = np.array(analysis_results['composite_lcc_GTs_random'])

# composite_deviations = {
#     'Betweenness': {
#         'values': np.array(analysis_results['composite_lcc_GTs']) - random_composite,
#         'color': 'orange',
#         'linestyle': '--'
#     },
#     'Demand': {
#         'values': np.array(analysis_results['composite_lcc_GTs_demand']) - random_composite,
#         'color': 'red',
#         'linestyle': '-.'
#     },
#     'Demand LTN': {
#         'values': np.array(analysis_results['composite_lcc_GTs_demand_ltn_priority']) - random_composite,
#         'color': 'green',
#         'linestyle': '-.'
#     },
#     'Betweenness LTN': {
#         'values': np.array(analysis_results['composite_lcc_GTs_betweenness_ltn_priority']) - random_composite,
#         'color': 'purple',
#         'linestyle': '-'
#     }
# }

# # Plot deviation from random
# plt.figure(figsize=(10, 6))
# for label, data in composite_deviations.items():
#     plt.plot(
#         range(1, len(data['values']) + 1),
#         data['values'],
#         linestyle=data['linestyle'],
#         color=data['color'],
#         label=label
#     )

# plt.axhline(0, color='black', linestyle='--', linewidth=1)
# plt.title('Composite LCC Length (With Cycle Network) — Deviation from Random Growth (Baseline)')
# plt.xlabel('Investment Iteration')
# plt.ylabel('Deviation in Total Length vs Random (meters)')
# plt.legend()
# plt.grid(True, axis='y', alpha=0.3)
# plt.tight_layout()

# # Save
# output_path = PATH["plots"] + f"/{placeid}/lengthof_lcc_inc_cyclenet__deviation_from_random.png"
# plt.savefig(output_path, dpi=300)
# plt.show()


In [None]:


# # def total_length(G):
# #     """Computes total edge length in a graph."""
# #     return sum(data.get('length', 1) for _, _, data in G.edges(data=True))

# # def compute_lcc_lengths(graph_list, G_biketrack):
# #     """Computes the total length of the largest connected component for each graph in the list."""
# #     lcc_lengths = []

# #     for G in graph_list:
# #         # Find the common nodes between G and G_biketrack
# #         common_nodes = set(G.nodes) & set(G_biketrack.nodes)

# #         # If there are no common nodes, we can't compose, so we skip and set LCC length to 0
# #         if not common_nodes:
# #             lcc_lengths.append(0.0)
# #             continue
        
# #         # Create a subgraph of G_biketrack with only the common nodes
# #         G_biketrack_subgraph = G_biketrack.subgraph(common_nodes)

# #         # Merge G with the G_biketrack subgraph
# #         merged = nx.compose(G, G_biketrack_subgraph)

# #         # Find weakly connected components (works for both directed/undirected graphs)
# #         components = list(nx.weakly_connected_components(merged))

# #         if not components:
# #             total_length_lcc = 0.0  # Handle empty graph case
# #         else:
# #             # Find the largest connected component by node count
# #             largest_component_nodes = max(components, key=len)
# #             largest_component = merged.subgraph(largest_component_nodes)

# #             # Calculate total edge length in the largest component
# #             total_length_lcc = sum(data.get('length', 1) for u, v, data in largest_component.edges(data=True))

# #         lcc_lengths.append(total_length_lcc)

# #     return lcc_lengths

# # # Compute LCC lengths for both GTs and GTs_random
# # lcc_lengths_GTs = compute_lcc_lengths(GTs, G_biketrack)
# # lcc_lengths_GTs_random = compute_lcc_lengths(GTs_random, G_biketrack)

# # # Create the plot
# # plt.figure(figsize=(10, 6))

# # # Plot LCC lengths for GTs
# # plt.plot(range(1, len(lcc_lengths_GTs) + 1), lcc_lengths_GTs, linestyle='-', color='blue', label="GTs - LCC Length")

# # # Plot LCC lengths for GTs_random
# # plt.plot(range(1, len(lcc_lengths_GTs_random) + 1), lcc_lengths_GTs_random, linestyle='--', color='orange', label="GTs_random - LCC Length")

# # # Labels and title
# # plt.xlabel("Graph Index")
# # plt.ylabel("Largest Connected Component Length (meters)")
# # plt.title("Largest Connected Component Length (GTs vs. GTs_random)")
# # plt.legend()
# # plt.grid(True, alpha=0.3)

# # # Show the plot
# # plt.tight_layout()
# # plt.show()


# if os.path.exists(analysis_res_pickle):
#     with open(analysis_res_pickle, 'rb') as f:
#         analysis_results = pickle.load(f)
# else:
#     analysis_results = {}

# if rerun or 'lcc_lengths_GTs' not in analysis_results:

#     def total_length(G):
#         return sum(data.get('length', 1) for _, _, data in G.edges(data=True))  

#     def compute_lcc_lengths(graph_list, G_biketrack):
#         lcc_lengths = []
#         for G in graph_list:
#             common_nodes = set(G.nodes) & set(G_biketrack.nodes)
#             if not common_nodes:
#                 lcc_lengths.append(0.0)
#                 continue

#             G_biketrack_subgraph = G_biketrack.subgraph(common_nodes)
#             merged = nx.compose(G, G_biketrack_subgraph)
#             components = list(nx.weakly_connected_components(merged))

#             max_length = 0.0
#             for comp in components:
#                 subgraph = merged.subgraph(comp)
#                 total_length = sum(data.get('length', 1) for _, _, data in subgraph.edges(data=True))
#                 if total_length > max_length:
#                     max_length = total_length

#             lcc_lengths.append(max_length)
#         return lcc_lengths

#     lcc_data = {
#         'lcc_lengths_GTs':        compute_lcc_lengths(GTs, G_biketrack),
#         'lcc_lengths_GTs_random': compute_lcc_lengths(GTs_random, G_biketrack),
#         'lcc_lengths_GTs_demand': compute_lcc_lengths(GTs_demand, G_biketrack),
#         'lcc_lengths_GTs_demand_ltn_priority': compute_lcc_lengths(GTs_demand_ltn_priority, G_biketrack),
#         'lcc_lengths_GTs_betweenness_ltn_priority': compute_lcc_lengths(GTs_betweenness_ltn_priority, G_biketrack)
#     }

#     analysis_results.update(lcc_data)

#     with open(analysis_res_pickle, 'wb') as f:
#         pickle.dump(analysis_results, f)
#     df = pd.DataFrame({k: pd.Series(v) for k,v in analysis_results.items()})
#     df.to_csv(analysis_res_csv, index=False)

# plt.figure(figsize=(10, 6))
# plt.plot(
#     range(1, len(analysis_results['lcc_lengths_GTs']) + 1), 
#     analysis_results['lcc_lengths_GTs'], 
#     linestyle='-', color='orange',  
#     label="Betweenness growth"
# )
# plt.plot(
#     range(1, len(analysis_results['lcc_lengths_GTs_random']) + 1), 
#     analysis_results['lcc_lengths_GTs_random'], 
#     linestyle='--', color='blue', 
#     label="Random growth"
# )
# plt.plot(
#     range(1, len(analysis_results['lcc_lengths_GTs_demand']) + 1), 
#     analysis_results['lcc_lengths_GTs_demand'], 
#     linestyle='-.', color='red', 
#     label="Demand growth"
# )
# plt.plot(
#     range(1, len(analysis_results['lcc_lengths_GTs_demand_ltn_priority']) + 1), 
#     analysis_results['lcc_lengths_GTs_demand_ltn_priority'], 
#     linestyle='-.', color='green', 
#     label="Demand LTN growth"
# )
# plt.plot(
#     range(1, len(analysis_results['lcc_lengths_GTs_betweenness_ltn_priority']) + 1), 
#     analysis_results['lcc_lengths_GTs_betweenness_ltn_priority'], 
#     linestyle='-', color='purple', 
#     label="Betweenness LTN growth"
# )

# plt.xlabel("Graph Index")
# plt.ylabel("Largest Connected Component Length (meters)")
# plt.title("Largest Connected Component Length")
# plt.legend()
# plt.grid(True, alpha=0.3)
# plt.tight_layout()
# output_path = PATH["plots"] + "/" + placeid + "/lcc_length.png"
# plt.savefig(output_path, dpi=300)

# plt.show()


### Coverage

Prior to running any coverage analysis, we create buffers of each graph to avoid re-calculating

In [None]:
# if rerun == True or 'GTs_buffers' not in locals():
#     GTs_buffers = []
#     for G in GTs:
#         gdf_edges = ox.graph_to_gdfs(G, nodes=False).to_crs(epsg=3857) # convert graph to geodataframe
#         buffer_gdf = gdf_edges.geometry.buffer(buffer_walk).unary_union # make a buffer
#         buffer_gdf = gpd.GeoDataFrame(geometry=[buffer_gdf], crs=gdf_edges.crs) # set crs and geometry
#         buffer_gdf = buffer_gdf.to_crs(epsg=4326)
#         GTs_buffers.append(buffer_gdf) # add buffer to a list
#     with open(PATH["results"] + placeid + "/" + placeid + "_GTs_buffers.pickle", "wb") as f:
#         pickle.dump(GTs_buffers, f) # save buffers

#     GTs_buffers_random = []
#     for G in GTs_random:
#         gdf_edges = ox.graph_to_gdfs(G, nodes=False).to_crs(epsg=3857)
#         buffer_gdf = gdf_edges.geometry.buffer(buffer_walk).unary_union
#         buffer_gdf = gpd.GeoDataFrame(geometry=[buffer_gdf], crs=gdf_edges.crs)
#         buffer_gdf = buffer_gdf.to_crs(epsg=4326)
#         GTs_buffers_random.append(buffer_gdf)
#     with open(PATH["results"] + placeid + "/" + placeid + "_GTs_buffers_random.pickle", "wb") as f:
#         pickle.dump(GTs_buffers_random, f)

#     GTs_buffers_demand = []
#     for G in GTs_demand:
#         gdf_edges = ox.graph_to_gdfs(G, nodes=False).to_crs(epsg=3857)
#         buffer_gdf = gdf_edges.geometry.buffer(buffer_walk).unary_union
#         buffer_gdf = gpd.GeoDataFrame(geometry=[buffer_gdf], crs=gdf_edges.crs)
#         buffer_gdf = buffer_gdf.to_crs(epsg=4326)
#         GTs_buffers_demand.append(buffer_gdf)
#     with open(PATH["results"] + placeid + "/" + placeid + "_GTs_buffers_demand.pickle", "wb") as f:
#         pickle.dump(GTs_buffers_demand, f)


#     GTs_buffers_demand_ltn_priority = []
#     for G in GTs_demand_ltn_priority:
#         gdf_edges = ox.graph_to_gdfs(G, nodes=False).to_crs(epsg=3857)
#         buffer_gdf = gdf_edges.geometry.buffer(buffer_walk).unary_union
#         buffer_gdf = gpd.GeoDataFrame(geometry=[buffer_gdf], crs=gdf_edges.crs)
#         buffer_gdf = buffer_gdf.to_crs(epsg=4326)
#         GTs_buffers_demand_ltn_priority.append(buffer_gdf)
#     with open(PATH["results"] + placeid + "/" + placeid + "_GTs_buffers_demand_ltn_priority.pickle", "wb") as f:
#         pickle.dump(GTs_buffers_demand_ltn_priority, f)

    
#     GTs_buffers_betweenness_ltn_priority = []
#     for G in GTs_betweenness_ltn_priority:
#         gdf_edges = ox.graph_to_gdfs(G, nodes=False).to_crs(epsg=3857)
#         buffer_gdf = gdf_edges.geometry.buffer(buffer_walk).unary_union
#         buffer_gdf = gpd.GeoDataFrame(geometry=[buffer_gdf], crs=gdf_edges.crs)
#         buffer_gdf = buffer_gdf.to_crs(epsg=4326)
#         GTs_buffers_betweenness_ltn_priority.append(buffer_gdf)
#     with open(PATH["results"] + placeid + "/" + placeid + "_GTs_buffers_betweenness_ltn_priority.pickle", "wb") as f:
#         pickle.dump(GTs_buffers_betweenness_ltn_priority, f)

# else:
#     try:
#         with open(PATH["results"] + placeid + "/" + placeid + "_GTs_buffers.pickle", "rb") as f:
#             GTs_buffers = pickle.load(f)
#         with open(PATH["results"] + placeid + "/" + placeid + "_GTs_buffers_random.pickle", "rb") as f:
#             GTs_buffers_random = pickle.load(f)
#         with open(PATH["results"] + placeid + "/" + placeid + "_GTs_buffers_demand.pickle", "rb") as f:
#             GTs_buffers_demand = pickle.load(f)
#         with open(PATH["results"] + placeid + "/" + placeid + "_GTs_buffers_demand_ltn_priority.pickle", "rb") as f:
#             GTs_buffers_demand_ltn_priority = pickle.load(f)
#         with open(PATH["results"] + placeid + "/" + placeid + "_GTs_buffers_betweenness_ltn_priority.pickle", "rb") as f:
#             GTs_buffers_betweenness_ltn_priority = pickle.load(f)
#     except FileNotFoundError:
#         print("Buffer files not found. Please set rerun to True to regenerate them.")


In [None]:
base_path = os.path.join(PATH["results"], placeid, placeid)
GTs_buffers = process_and_save_buffers_parallel(GTs, "GTs_buffers", rerun, base_path, buffer_walk)
GTs_buffers_demand = process_and_save_buffers_parallel(GTs_demand, "GTs_buffers_demand", rerun, base_path, buffer_walk)
GTs_buffers_demand_ltn_priority = process_and_save_buffers_parallel(GTs_demand_ltn_priority, "GTs_buffers_demand_ltn_priority", rerun, base_path, buffer_walk)
GTs_buffers_betweenness_ltn_priority = process_and_save_buffers_parallel(GTs_betweenness_ltn_priority, "GTs_buffers_betweenness_ltn_priority", rerun, base_path, buffer_walk)
# For multiple random runs
GTs_buffers_random_all = []
for run_id, run_res in enumerate(random_runs, start=1):
    name = f"GTs_buffers_random_run{run_id:02d}"
    buffers = process_and_save_buffers_parallel(run_res["GTs"], name, rerun, base_path, buffer_walk)
    GTs_buffers_random_all.append(buffers)

#### Area coverage

In [None]:
# # area
# target_crs = "EPSG:3857"
# boundary_proj = boundary.to_crs(target_crs)
# total_area = boundary_proj.unary_union.area  # total area in m²

# # Function to compute areas (km²) and percentage coverage for a list of buffers
# def compute_metrics(buffer_list):
#     areas = []
#     percentages = []
#     for gdf in buffer_list:
#         gdf_proj = gdf.to_crs(target_crs)
#         inter = gpd.overlay(gdf_proj, boundary_proj, how='intersection')
#         inter_area = inter.unary_union.area if not inter.empty else 0
#         areas.append(inter_area / 1e6)  # convert m² to km²
#         percentages.append((inter_area / total_area * 100) if total_area else 0)
#     return areas, percentages

# # Compute metrics for both buffer sets
# areas1, perc1 = compute_metrics(GTs_buffers)
# areas2, perc2 = compute_metrics(GTs_buffers_random)

# # Plot 1: Compare areas (in km²) for both buffer sets on one graph
# plt.figure(figsize=(10, 5))
# plt.plot(areas1, 'b-o', label='GTs_buffers Area (km²)')
# plt.plot(areas2, 'g-o', label='GTs_buffers_random Area (km²)')
# plt.xlabel('Buffer Index')
# plt.ylabel('Area (km²)')
# plt.title('Boundary Intersection Area Comparison')
# plt.legend()
# plt.tight_layout()
# plt.show()

# # Plot 2: Compare coverage percentages for both buffer sets on one graph
# plt.figure(figsize=(10, 5))
# plt.plot(perc1, 'r-s', label='GTs_buffers Coverage (%)')
# plt.plot(perc2, 'm-s', label='GTs_buffers_random Coverage (%)')
# plt.xlabel('Buffer Index')
# plt.ylabel('Coverage (%)')
# plt.title('Boundary Coverage Percentage Comparison')
# plt.legend()
# plt.tight_layout()
# plt.show()

# Area analysis cell
if os.path.exists(analysis_res_pickle):
    with open(analysis_res_pickle, 'rb') as f:
        analysis_results = pickle.load(f)
else:
    analysis_results = {}

if rerun or 'buffer_areas' not in analysis_results:
    target_crs = "EPSG:3857"
    boundary_proj = boundary.to_crs(target_crs)
    total_area = boundary_proj.unary_union.area

    def compute_metrics(buffer_list):
        areas = []
        percentages = []
        for gdf in buffer_list:
            gdf_proj = gdf.to_crs(target_crs)
            inter = gpd.overlay(gdf_proj, boundary_proj, how='intersection')
            inter_area = inter.unary_union.area if not inter.empty else 0
            areas.append(inter_area / 1e6)  # Convert m² to km²
            percentages.append((inter_area / total_area * 100) if total_area else 0)
        return areas, percentages

    buffer_metrics = {
        'buffer_areas': compute_metrics(GTs_buffers)[0],
        'buffer_percentages': compute_metrics(GTs_buffers)[1],
        'random_buffer_areas': compute_metrics(GTs_buffers_random)[0],
        'random_buffer_percentages': compute_metrics(GTs_buffers_random)[1],
        'demand_buffer_areas': compute_metrics(GTs_buffers_demand)[0],
        'demand_buffer_percentages': compute_metrics(GTs_buffers_demand)[1],
        'demand_buffer_areas_ltn_priority': compute_metrics(GTs_buffers_demand_ltn_priority)[0],
        'demand_buffer_percentages_ltn_priority': compute_metrics(GTs_buffers_demand_ltn_priority)[1],
        'betweenness_buffer_areas_ltn_priority': compute_metrics(GTs_buffers_betweenness_ltn_priority)[0],
        'betweenness_buffer_percentages_ltn_priority': compute_metrics(GTs_buffers_betweenness_ltn_priority)[1]
    }

    analysis_results.update(buffer_metrics)

    with open(analysis_res_pickle, 'wb') as f:
        pickle.dump(analysis_results, f)
    pd.DataFrame({k: pd.Series(v) for k, v in analysis_results.items()}).to_csv(analysis_res_csv, index=False)

# Plotting - Area (km²)
plt.figure(figsize=(10, 6))
plt.plot(
    analysis_results['buffer_areas'], 
    color='orange', 
    linestyle='-', 
    label='Betweenness Growth'
)
plt.plot(
    analysis_results['random_buffer_areas'], 
    color='blue', 
    linestyle='--', 
    label='Random Growth'
)
plt.plot(
    analysis_results['demand_buffer_areas'], 
    color='red', 
    linestyle='-.', 
    label='Demand Growth'
)
plt.plot(
    analysis_results['demand_buffer_areas_ltn_priority'],
    color='green',
    linestyle=':',
    label='Demand LTN Growth'
)
plt.plot(
    analysis_results['betweenness_buffer_areas_ltn_priority'],
    color='purple',
    linestyle='-',
    label='Betweenness LTN Growth'
)


plt.xlabel('Growth Iteration')
plt.ylabel('Area (km²)')
plt.title('Total Area Coverage')
plt.legend()
plt.tight_layout()
plt.grid(True, alpha=0.3)
output_path = PATH["plots"] + "/" + placeid + "/area_coverage_km2.png"
plt.savefig(output_path, dpi=300)

plt.show()

# Plotting - Percentage Coverage
plt.figure(figsize=(10, 6))
plt.plot(
    analysis_results['buffer_percentages'], 
    color='orange', 
    linestyle='-', 
    label='Betweeness Growth'
)
plt.plot(
    analysis_results['random_buffer_percentages'], 
    color='blue', 
    linestyle='--', 
    label='Random Growth'
)
plt.plot(
    analysis_results['demand_buffer_percentages'], 
    color='red', 
    linestyle='-.', 
    label='Demand Growth'
)
plt.plot(
    analysis_results['demand_buffer_percentages_ltn_priority'],
    color='green',
    linestyle=':',
    label='Demand LTN Growth'
)
plt.plot(
    analysis_results['betweenness_buffer_percentages_ltn_priority'],
    color='purple',
    linestyle='-',
    label='Betweenness LTN Growth'
)

plt.xlabel('Growth Iteration')
plt.ylabel('Coverage (%)')
plt.title('Boundary Coverage')
plt.legend()
plt.tight_layout()
plt.grid(True, alpha=0.3)
output_path = PATH["plots"] + "/" + placeid + "/boundary_cov_percentage.png"
plt.savefig(output_path, dpi=300)

plt.show()


#### Streets coverage

In [None]:
# network_crs = G_biketrackcarall_edges.crs
# total_network_length = G_biketrackcarall_edges["length"].sum()

# def compute_street_coverage(buffer_list):
#     lengths = []
#     percentages = []
#     for gdf in buffer_list:
#         # Reproject buffers to network CRS if needed
#         gdf_proj = gdf.to_crs(network_crs)
#         # Compute intersection between network and buffer
#         inter = gpd.overlay(G_biketrackcarall_edges, gdf_proj, how='intersection')
#         # Sum the existing "length" values from the intersected segments
#         seg_length = inter["length"].sum() if not inter.empty else 0
#         lengths.append(seg_length)
#         percentages.append((seg_length / total_network_length * 100) if total_network_length else 0)
#     return lengths, percentages

# # Compute metrics for both buffer sets
# net_lengths1, net_perc1 = compute_street_coverage(GTs_buffers)
# net_lengths2, net_perc2 = compute_street_coverage(GTs_buffers_random)

# # Plot 1: Compare network lengths (in meters) within each buffer
# plt.figure(figsize=(10, 5))
# plt.plot(net_lengths1, 'b-o', label='GTs_buffers Network (m)')
# plt.plot(net_lengths2, 'g-o', label='GTs_buffers_random Network (m)')
# plt.xlabel('Buffer Index')
# plt.ylabel('Network Length (m)')
# plt.title('Street Network Length within Buffers')
# plt.legend()
# plt.tight_layout()
# plt.show()

# # Plot 2: Compare network coverage percentages
# plt.figure(figsize=(10, 5))
# plt.plot(net_perc1, 'r-s', label='GTs_buffers Coverage (%)')
# plt.plot(net_perc2, 'm-s', label='GTs_buffers_random Coverage (%)')
# plt.xlabel('Buffer Index')
# plt.ylabel('Coverage (%)')
# plt.title('Percentage of Total Network within Buffers')
# plt.legend()
# plt.tight_layout()
# plt.show()
if os.path.exists(analysis_res_pickle):
    with open(analysis_res_pickle, 'rb') as f:
        analysis_results = pickle.load(f)
else:
    analysis_results = {}



if rerun or 'street_lengths' not in analysis_results:
    network_crs = G_biketrackcarall_edges.crs
    total_network_length = G_biketrackcarall_edges["length"].sum()

    # simplfy to reduce computation time
    proj_crs = network_crs if network_crs.is_projected else "EPSG:3857"
    edges_proj = G_biketrackcarall_edges.to_crs(proj_crs)
    edges_simpl = edges_proj.copy()
    edges_simpl.geometry = edges_proj.geometry.simplify(tolerance=10,
                                                         preserve_topology=True)
    edges_simpl = edges_simpl.to_crs(network_crs)

    def compute_street_coverage(buffer_list):
        lengths = []
        percentages = []
        for gdf in buffer_list:
            gdf_proj = gdf.to_crs(network_crs)
            # simplfy to reduce computation time
            gdf_proj = gdf.to_crs(proj_crs).copy()
            gdf_proj.geometry = gdf_proj.geometry.simplify(tolerance=10,
                                                           preserve_topology=True)
            gdf_proj = gdf_proj.to_crs(network_crs)
            
            inter = gpd.overlay(G_biketrackcarall_edges, gdf_proj, how='intersection')
            seg_length = inter["length"].sum() if not inter.empty else 0
            lengths.append(seg_length)
            percentages.append((seg_length / total_network_length * 100) if total_network_length else 0)
        return lengths, percentages

    street_metrics = {
        'street_cov_lengths': compute_street_coverage(GTs_buffers)[0],
        'street_cov_percentages': compute_street_coverage(GTs_buffers)[1],
        'random_street_cov_lengths': compute_street_coverage(GTs_buffers_random)[0],
        'random_street_cov_percentages': compute_street_coverage(GTs_buffers_random)[1],
        'demand_street_cov_lengths': compute_street_coverage(GTs_buffers_demand)[0],
        'demand_street_cov_percentages': compute_street_coverage(GTs_buffers_demand)[1],
        'demand_street_cov_lengths_ltn_priority': compute_street_coverage(GTs_buffers_demand_ltn_priority)[0],
        'demand_street_cov_percentages_ltn_priority': compute_street_coverage(GTs_buffers_demand_ltn_priority)[1],
        'betweenness_street_cov_lengths_ltn_priority': compute_street_coverage(GTs_buffers_betweenness_ltn_priority)[0],
        'betweenness_street_cov_percentages_ltn_priority': compute_street_coverage(GTs_buffers_betweenness_ltn_priority)[1]
    }

    analysis_results.update(street_metrics)

    with open(analysis_res_pickle, 'wb') as f:
        pickle.dump(analysis_results, f)
    df = pd.DataFrame({k: pd.Series(v) for k, v in analysis_results.items()})
    df.to_csv(analysis_res_csv, index=False)

# Plot: Network Length within Buffers
plt.figure(figsize=(10, 6))
plt.plot(analysis_results['street_cov_lengths'], color='orange', linestyle='-', label='Betweenness Growth')
plt.plot(analysis_results['random_street_cov_lengths'], color='blue', linestyle='--', label='Random Growth')
plt.plot(analysis_results['demand_street_cov_lengths'], color='red', linestyle='-.', label='Demand Growth')
plt.plot(analysis_results['demand_street_cov_lengths_ltn_priority'], color='green', linestyle=':', label='Demand LTN Growth')
plt.plot(analysis_results['betweenness_street_cov_lengths_ltn_priority'], color='purple', linestyle='-', label='Betweenness LTN Growth')
plt.xlabel('Growth Iteration')
plt.ylabel('Street Network Length (m)')
plt.title('Street Network Length within Buffers')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
output_path = PATH["plots"] + "/" + placeid + "/streets_within_cyclenet.png"
plt.savefig(output_path, dpi=300)

plt.show()

# Plot: Percentage of Network within Buffers
plt.figure(10, 6)
plt.plot(analysis_results['street_cov_percentages'], color='orange', linestyle='-', label='Betweenness Growth')
plt.plot(analysis_results['random_street_cov_percentages'], color='blue', linestyle='--', label='Random Growth')
plt.plot(analysis_results['demand_street_cov_percentages'], color='red', linestyle='-.', label='Demand Growth')
plt.plot(analysis_results['demand_street_cov_percentages_ltn_priority'], color='green', linestyle=':', label='Demand LTN Growth')
plt.plot(analysis_results['betweenness_street_cov_percentages_ltn_priority'], color='purple', linestyle='-', label='Betweenness LTN Growth')
plt.xlabel('Growth Iteration')
plt.ylabel('Coverage (%)')
plt.title('Percentage of Total Network within Buffers')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
output_path = PATH["plots"] + "/" + placeid + "/percentage_within_cyclenet.png"
plt.savefig(output_path, dpi=300)

plt.show()

#### Population coverage

Get population data from census, asign census data to buildings, find population within cycle route buffer

In [None]:
# # get lsoas and population
# lsoa_bound = gpd.read_file(PATH["data"] + "/" + placeid + "/lsoa_bound.gpkg")
# boundary = ox.geocode_to_gdf(placeinfo["nominatimstring"])
# lsoa_bound = gpd.clip(lsoa_bound, boundary)
# lsoa_bound = add_lsoa_population(lsoa_bound) # using 2011 census data

# # get buildings
# buildings = get_building_populations(lsoa_bound, boundary) ## add more detail??
# buildings = buildings.to_crs(epsg=4326)

In [None]:

# # pop_counts_GT = []
# # pop_counts_random_GT = []


# # # Function to calculate total pop_count within each buffer
# # def calculate_pop_count(buffers_list, buildings):
# #     pop_counts = []
# #     for buffer in buffers_list:
# #         intersecting_buildings = gpd.sjoin(buildings, buffer, predicate="intersects")
# #         total_pop = intersecting_buildings["pop_assigned"].sum()
# #         pop_counts.append(total_pop)
# #     return pop_counts

# # # Calculate for both sets of buffers
# # pop_counts_GT = calculate_pop_count(GTs_buffers, buildings)
# # pop_counts_random_GT = calculate_pop_count(GTs_buffers_random, buildings)

# # plt.figure(figsize=(10, 5))
# # buffer_indices = np.arange(len(GTs_buffers))  # Common x-axis indices for both datasets

# # plt.plot(buffer_indices, pop_counts_GT, label="GTs Buffers", linestyle='-', color='blue')
# # plt.plot(buffer_indices, pop_counts_random_GT, label="Random GTs Buffers", linestyle='--', color='orange')

# # plt.xlabel("Buffer Index")
# # plt.ylabel("Total Population Count")
# # plt.title("Comparison of Population Within Buffers")
# # plt.legend()
# # plt.grid(True)
# # plt.show()
# if os.path.exists(analysis_res_pickle):
#     with open(analysis_res_pickle, 'rb') as f:
#         analysis_results = pickle.load(f)
# else:
#     analysis_results = {}

# if rerun or 'pop_counts_GT' not in analysis_results:
#     def calculate_pop_count(buffers_list, buildings):
#         pop_counts = []
#         for buffer in buffers_list:
#             intersecting_buildings = gpd.sjoin(buildings, buffer, predicate="intersects")
#             pop_counts.append(intersecting_buildings["pop_assigned"].sum())
#         return pop_counts

#     pop_metrics = {
#         'pop_counts_GT': calculate_pop_count(GTs_buffers, buildings),
#         'pop_counts_random_GT': calculate_pop_count(GTs_buffers_random, buildings),
#         'pop_counts_demand_GT': calculate_pop_count(GTs_buffers_demand, buildings)
#     }

#     analysis_results.update(pop_metrics)

#     with open(analysis_res_pickle, 'wb') as f:
#         pickle.dump(analysis_results, f)
#     df = pd.DataFrame({k: pd.Series(v) for k, v in analysis_results.items()})
#     df.to_csv(analysis_res_csv, index=False)

# # Plotting
# plt.figure(figsize=(10, 5))
# buffer_indices = np.arange(len(GTs_buffers))

# plt.plot(
#     buffer_indices,
#     analysis_results['pop_counts_GT'],
#     label="Betweenness Growth",
#     linestyle='-',
#     color='orange'
# )
# plt.plot(
#     buffer_indices,
#     analysis_results['pop_counts_random_GT'],
#     label="Random Growth",
#     linestyle='--',
#     color='blue'
# )
# plt.plot(
#     buffer_indices,
#     analysis_results['pop_counts_demand_GT'],
#     label="Demand-based Growth",
#     linestyle='-.',
#     color='red'
# )

# plt.xlabel("Buffer Index")
# plt.ylabel("Total Population Count")
# plt.title("Population Within Buffers Over Investment Iterations")
# plt.legend()
# plt.grid(True, alpha=0.3)
# plt.tight_layout()
# plt.show()


#### POI coverage

##### Seed points

In [None]:
# counts_buffers = []
# counts_random = []

# # Iterate over each buffer GeoDataFrame in GTs_buffers
# for gdf in GTs_buffers:
#     # Create a union of all polygons in the buffer gdf (if there is more than one)
#     buffer_union = gdf.unary_union
#     # Count the points in combined_points that fall within this union
#     count = combined_points.within(buffer_union).sum()
#     counts_buffers.append(count)

# # Do the same for GTs_buffers_random
# for gdf in GTs_buffers_random:
#     buffer_union = gdf.unary_union
#     count = combined_points.within(buffer_union).sum()
#     counts_random.append(count)

# # Plotting the results on a line graph
# plt.figure(figsize=(10, 6))
# x_vals = range(1, len(counts_buffers) + 1)  # Assuming you want x-axis as buffer index

# plt.plot(x_vals, counts_buffers, marker='o', label='GTs_buffers')
# plt.plot(x_vals, counts_random, marker='o', label='GTs_buffers_random')

# plt.xlabel('Buffer Index')
# plt.ylabel('Number of Points Covered')
# plt.title('Points Covered by Each Buffer')
# plt.legend()
# plt.grid(True)
# plt.show()
# Seed point analysis cell
if os.path.exists(analysis_res_pickle):
    with open(analysis_res_pickle, 'rb') as f:
        analysis_results = pickle.load(f)
else:
    analysis_results = {}  

if rerun or 'points_covered_GT' not in analysis_results:
    point_metrics = {
        'points_covered_GT': [
            combined_points.within(gdf.unary_union).sum()
            for gdf in GTs_buffers
        ],
        'points_covered_random': [
            combined_points.within(gdf.unary_union).sum()
            for gdf in GTs_buffers_random
        ],
        'points_covered_demand': [
            combined_points.within(gdf.unary_union).sum()
            for gdf in GTs_buffers_demand
        ],
        'points_covered_demand_ltn_priority': [
            combined_points.within(gdf.unary_union).sum()
            for gdf in GTs_buffers_demand_ltn_priority
        ],
        'points_covered_betweenness_ltn_priority': [
            combined_points.within(gdf.unary_union).sum()
            for gdf in GTs_buffers_betweenness_ltn_priority
        ]
    }
    analysis_results.update(point_metrics)

    with open(analysis_res_pickle, 'wb') as f:
        pickle.dump(analysis_results, f)
    pd.DataFrame({k: pd.Series(v) for k, v in analysis_results.items()}).to_csv(analysis_res_csv, index=False)

# Plotting
plt.figure(figsize=(10, 6))
x_vals = range(1, len(analysis_results['points_covered_GT']) + 1)

plt.plot(
    x_vals,
    analysis_results['points_covered_GT'],
    color='orange',
    linestyle='-',
    label='Betweenness Growth'
)
plt.plot(
    x_vals,
    analysis_results['points_covered_random'],
    color='blue',
    linestyle='--',
    label='Random Growth'
)
plt.plot(
    x_vals,
    analysis_results['points_covered_demand'],
    color='red',
    linestyle='-.',
    label='Demand-based Growth'
)
plt.plot(
    x_vals,
    analysis_results['points_covered_demand_ltn_priority'],
    color='green',
    linestyle=':',
    label='Demand LTN Growth'
)
plt.plot(
    x_vals,
    analysis_results['points_covered_betweenness_ltn_priority'],
    color='purple',
    linestyle='-',
    label='Betweenness LTN Growth'
)

plt.xlabel('Growth Iteration')
plt.ylabel('Number of Points Covered')
plt.title('Seed Points Covered by Cycle Network')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
output_path = PATH["plots"] + "/" + placeid + "/seed_point_coverage.png"
plt.savefig(output_path, dpi=300)

plt.show()


##### LTN Points

In [None]:
# counts_buffers = []
# counts_random = []

# # Iterate over each buffer GeoDataFrame in GTs_buffers
# for gdf in GTs_buffers:
#     # Create a union of all polygons in the buffer gdf (if there is more than one)
#     buffer_union = gdf.unary_union
#     # Count the points that fall within this union
#     count = ltn_points.within(buffer_union).sum()
#     counts_buffers.append(count)

# # Do the same for GTs_buffers_random
# for gdf in GTs_buffers_random:
#     buffer_union = gdf.unary_union
#     count = ltn_points.within(buffer_union).sum()
#     counts_random.append(count)

# # Plotting the results on a line graph
# plt.figure(figsize=(10, 6))
# x_vals = range(1, len(counts_buffers) + 1)  # Assuming you want x-axis as buffer index

# plt.plot(x_vals, counts_buffers, marker='o', label='GTs_buffers')
# plt.plot(x_vals, counts_random, marker='o', label='GTs_buffers_random')

# plt.xlabel('Buffer Index')
# plt.ylabel('Number of Points Covered')
# plt.title('Points Covered by Each Buffer')
# plt.legend()
# plt.grid(True)
# plt.show()

# LTN point coverage analysis cell
if os.path.exists(analysis_res_pickle):
    with open(analysis_res_pickle, 'rb') as f:
        analysis_results = pickle.load(f)
else:
    analysis_results = {} 

if rerun or 'ltn_points_covered_GT' not in analysis_results:
    def compute_ltn_coverage(buffers_list):
        return [
            ltn_points.within(gdf.unary_union).sum()
            for gdf in buffers_list
        ]
    
    analysis_results.update({
        'ltn_points_covered_GT': compute_ltn_coverage(GTs_buffers),
        'ltn_points_covered_random': compute_ltn_coverage(GTs_buffers_random),
        'ltn_points_covered_demand': compute_ltn_coverage(GTs_buffers_demand),
        'ltn_points_covered_demand_ltn_priority': compute_ltn_coverage(GTs_buffers_demand_ltn_priority),
        'ltn_points_covered_betweenness_ltn_priority': compute_ltn_coverage(GTs_buffers_betweenness_ltn_priority)
    })

    with open(analysis_res_pickle, 'wb') as f:
        pickle.dump(analysis_results, f)
    pd.DataFrame({k: pd.Series(v) for k, v in analysis_results.items()}).to_csv(analysis_res_csv, index=False)

# Plotting
plt.figure(figsize=(10, 6))
x_vals = range(1, len(analysis_results['ltn_points_covered_GT']) + 1)

plt.plot(
    x_vals,
    analysis_results['ltn_points_covered_GT'],
    color='orange',
    linestyle='-',
    label='Betweenness Growth'
)
plt.plot(
    x_vals,
    analysis_results['ltn_points_covered_random'],
    color='blue',
    linestyle='--',
    label='Random Growth'
)
plt.plot(
    x_vals,
    analysis_results['ltn_points_covered_demand'],
    color='red',
    linestyle='-.',
    label='Demand-based Growth'
)
plt.plot(
    x_vals,
    analysis_results['ltn_points_covered_demand_ltn_priority'],
    color='green',
    linestyle=':',
    label='Demand LTN Growth'
)
plt.plot(
    x_vals,
    analysis_results['ltn_points_covered_betweenness_ltn_priority'],
    color='purple',
    linestyle='-',
    label='Betweenness LTN Growth'
)

plt.xlabel('Growth Iteration')
plt.ylabel('Number of LTN Points Covered')
plt.title('LTNs Covered by Cycle Network')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
output_path = PATH["plots"] + "/" + placeid + "/ltns_coverage.png"
plt.savefig(output_path, dpi=300)

plt.show()


##### All neighbourhoods

In [None]:
# think about how if we were to create future LTNs, where could these go based purely on making more cycling safe?

# should these be where the most cycling is on? or which area has the longest bit of cycle network added? 

In [None]:
# counts_buffers = []
# counts_random = []

# # Iterate over each buffer GeoDataFrame in GTs_buffers
# for gdf in GTs_buffers:
#     # Create a union of all polygons in the buffer gdf (if there is more than one)
#     buffer_union = gdf.unary_union
#     # Count the points in combined_points that fall within this union
#     count = all_neighbourhoods_centroids.within(buffer_union).sum()
#     counts_buffers.append(count)

# # Do the same for GTs_buffers_random
# for gdf in GTs_buffers_random:
#     buffer_union = gdf.unary_union
#     count = all_neighbourhoods_centroids.within(buffer_union).sum()
#     counts_random.append(count)

# # Plotting the results on a line graph
# plt.figure(figsize=(10, 6))
# x_vals = range(1, len(counts_buffers) + 1)  # Assuming you want x-axis as buffer index

# plt.plot(x_vals, counts_buffers, marker='o', label='GTs_buffers')
# plt.plot(x_vals, counts_random, marker='o', label='GTs_buffers_random')

# plt.xlabel('Buffer Index')
# plt.ylabel('Number of Points Covered')
# plt.title('Points Covered by Each Buffer')
# plt.legend()
# plt.grid(True)
# plt.show()
# Neighborhood centroids analysis cell
if os.path.exists(analysis_res_pickle):
    with open(analysis_res_pickle, 'rb') as f:
        analysis_results = pickle.load(f)
else:
    analysis_results = {} 

if rerun or 'neighborhood_points_covered_GT' not in analysis_results:
    def count_neighborhood_coverage(buffers_list):
        return [
            all_neighbourhoods_centroids.within(gdf.unary_union).sum()
            for gdf in buffers_list
        ]

    neighborhood_metrics = {
        'neighborhood_points_covered_GT': count_neighborhood_coverage(GTs_buffers),
        'neighborhood_points_covered_random': count_neighborhood_coverage(GTs_buffers_random),
        'neighborhood_points_covered_demand': count_neighborhood_coverage(GTs_buffers_demand),
        'neighborhood_points_covered_demand_ltn_priority': count_neighborhood_coverage(GTs_buffers_demand_ltn_priority),
        'neighborhood_points_covered_betweenness_ltn_priority': count_neighborhood_coverage(GTs_buffers_betweenness_ltn_priority)
    }

    analysis_results.update(neighborhood_metrics)
    with open(analysis_res_pickle, 'wb') as f:
        pickle.dump(analysis_results, f)
    pd.DataFrame({k: pd.Series(v) for k, v in analysis_results.items()}).to_csv(analysis_res_csv, index=False)

# Plotting
plt.figure(figsize=(10, 6))
x_vals = range(1, len(analysis_results['neighborhood_points_covered_GT']) + 1)

plt.plot(
    x_vals,
    analysis_results['neighborhood_points_covered_GT'],
    color='orange',
    linestyle='-',
    label='Betweenness Growth'
)
plt.plot(
    x_vals,
    analysis_results['neighborhood_points_covered_random'],
    color='blue',
    linestyle='--',
    label='Random Growth'
)
plt.plot(
    x_vals,
    analysis_results['neighborhood_points_covered_demand'],
    color='red',
    linestyle='-.',
    label='Demand-based Growth'
)
plt.plot(
    x_vals,
    analysis_results['neighborhood_points_covered_demand_ltn_priority'],
    color='green',
    linestyle=':',
    label='Demand LTN Growth'
)
plt.plot(
    x_vals,
    analysis_results['neighborhood_points_covered_betweenness_ltn_priority'],
    color='purple',
    linestyle='-',
    label='Betweenness LTN Growth'
)

plt.xlabel('Growth Iteration')
plt.ylabel('Neighbourhoods Covered')
plt.title('Neighbourhoods Covered by Cycle Network')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
output_path = PATH["plots"] + "/" + placeid + "/neighbourhoods_coverage.png"
plt.savefig(output_path, dpi=300)

plt.show()


In [None]:
## against random baseline
if os.path.exists(analysis_res_pickle):
    with open(analysis_res_pickle, 'rb') as f:
        analysis_results = pickle.load(f)
else:
    analysis_results = {} 

if rerun or 'neighborhood_points_covered_GT' not in analysis_results:
    def count_neighborhood_coverage(buffers_list):
        return [
            all_neighbourhoods_centroids.within(gdf.unary_union).sum()
            for gdf in buffers_list
        ]

    neighborhood_metrics = {
        'neighborhood_points_covered_GT': count_neighborhood_coverage(GTs_buffers),
        'neighborhood_points_covered_random': count_neighborhood_coverage(GTs_buffers_random),
        'neighborhood_points_covered_demand': count_neighborhood_coverage(GTs_buffers_demand),
        'neighborhood_points_covered_demand_ltn_priority': count_neighborhood_coverage(GTs_buffers_demand_ltn_priority),
        'neighborhood_points_covered_betweenness_ltn_priority': count_neighborhood_coverage(GTs_buffers_betweenness_ltn_priority)
    }

    analysis_results.update(neighborhood_metrics)
    with open(analysis_res_pickle, 'wb') as f:
        pickle.dump(analysis_results, f)

    pd.DataFrame({k: pd.Series(v) for k, v in analysis_results.items()}) \
        .to_csv(analysis_res_csv, index=False)

# Calculate deviation from random
random_coverage = np.array(analysis_results['neighborhood_points_covered_random'])

coverage_deviations = {
    'Betweenness': {
        'values': np.array(analysis_results['neighborhood_points_covered_GT']) - random_coverage,
        'color': 'orange',
        'linestyle': '-'
    },
    'Demand': {
        'values': np.array(analysis_results['neighborhood_points_covered_demand']) - random_coverage,
        'color': 'red',
        'linestyle': '-.'
    },
    'Demand LTN': {
        'values': np.array(analysis_results['neighborhood_points_covered_demand_ltn_priority']) - random_coverage,
        'color': 'green',
        'linestyle': ':'
    },
    'Betweenness LTN': {
        'values': np.array(analysis_results['neighborhood_points_covered_betweenness_ltn_priority']) - random_coverage,
        'color': 'purple',
        'linestyle': '-'
    }
}

# Plot deviation from random
plt.figure(figsize=(10, 6))
x_vals = range(1, len(random_coverage) + 1)

for label, data in coverage_deviations.items():
    plt.plot(
        x_vals,
        data['values'],
        linestyle=data['linestyle'],
        color=data['color'],
        label=label
    )

plt.axhline(0, color='black', linestyle='--', linewidth=1)
plt.xlabel('Growth Iteration')
plt.ylabel('Deviation in Neighbourhoods Covered (vs Random)')
plt.title('Neighbourhood Coverage — Deviation from Random Growth (Baseline)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()

# Save plot
output_path = PATH["plots"] + f"/{placeid}/neighbourhoods_coverage__deviation_from_random.png"
plt.savefig(output_path, dpi=300)

plt.show()


### Overlap

Overlap with existing infrastructure. Finding how much of the existing network we overlap, in terms of edges, distance, and % of total network

In [None]:
# def compare_against_reference(graph_list1, graph_list2, reference_graph):
#     """
#     Compare two lists of graphs against a reference, calculating both:
#     1. How much of the reference is covered by each graph (original metric)
#     2. How much of each graph is covered by the reference (reverse metric)
#     """
#     def calculate_both_ways(graph, reference):
#         # Original: how much of reference is covered by graph
#         orig_size_pct, orig_len_pct, orig_edges, orig_len = calculate_overlap_percentages(reference, graph)
#         # Reverse: how much of graph is covered by reference
#         rev_size_pct, rev_len_pct, rev_edges, rev_len = calculate_overlap_percentages(graph, reference)
#         return (orig_size_pct, orig_len_pct, orig_edges, orig_len,
#                 rev_size_pct, rev_len_pct, rev_edges, rev_len)
    
#     metrics_list1 = [calculate_both_ways(g, reference_graph) for g in graph_list1]
#     metrics_list2 = [calculate_both_ways(g, reference_graph) for g in graph_list2]
    
#     return metrics_list1, metrics_list2

# def plot_comparison(metrics_GTs, metrics_GTs_random):
#     """Plot comparison with separate views for both metrics"""
#     fig, axes = plt.subplots(4, 1, figsize=(12, 16))
    
#     # Original percentage metrics (how much of REFERENCE is covered)
#     axes[0].plot([m[0] for m in metrics_GTs], 'b-', label='GTs Size (Ref Covered)')
#     axes[0].plot([m[0] for m in metrics_GTs_random], 'r--', label='GTs_random Size (Ref Covered)')
#     axes[0].plot([m[1] for m in metrics_GTs], 'g-', label='GTs Length (Ref Covered)')
#     axes[0].plot([m[1] for m in metrics_GTs_random], 'm--', label='GTs_random Length (Ref Covered)')
#     axes[0].set_title('Percentage of Reference Covered')
#     axes[0].set_ylabel('Percentage')
#     axes[0].legend()
#     axes[0].grid(True)
    
#     # Reverse percentage metrics (how much of NETWORK is covered by reference)
#     axes[1].plot([m[4] for m in metrics_GTs], 'b-', label='GTs Size (Network Covered)')
#     axes[1].plot([m[4] for m in metrics_GTs_random], 'r--', label='GTs_random Size (Network Covered)')
#     axes[1].plot([m[5] for m in metrics_GTs], 'g-', label='GTs Length (Network Covered)')
#     axes[1].plot([m[5] for m in metrics_GTs_random], 'm--', label='GTs_random Length (Network Covered)')
#     axes[1].set_title('Percentage of Network Covered by Reference')
#     axes[1].set_ylabel('Percentage')
#     axes[1].legend()
#     axes[1].grid(True)
    
#     # Raw edge counts
#     axes[2].plot([m[2] for m in metrics_GTs], 'b-', label='GTs Edges (Ref Covered)')
#     axes[2].plot([m[2] for m in metrics_GTs_random], 'r--', label='GTs_random Edges (Ref Covered)')
#     axes[2].plot([m[6] for m in metrics_GTs], 'g-', label='GTs Edges (Network Covered)')
#     axes[2].plot([m[6] for m in metrics_GTs_random], 'm--', label='GTs_random Edges (Network Covered)')
#     axes[2].set_title('Raw Edge Counts')
#     axes[2].set_ylabel('Edges')
#     axes[2].legend()
#     axes[2].grid(True)
    
#     # Raw lengths
#     axes[3].plot([m[3] for m in metrics_GTs], 'b-', label='GTs Length (Ref Covered)')
#     axes[3].plot([m[3] for m in metrics_GTs_random], 'r--', label='GTs_random Length (Ref Covered)')
#     axes[3].plot([m[7] for m in metrics_GTs], 'g-', label='GTs Length (Network Covered)')
#     axes[3].plot([m[7] for m in metrics_GTs_random], 'm--', label='GTs_random Length (Network Covered)')
#     axes[3].set_title('Raw Length Overlap')
#     axes[3].set_ylabel('Length')
#     axes[3].legend()
#     axes[3].grid(True)
    
#     plt.tight_layout()
#     plt.show()

# reference = G_biketrack  # Your reference infrastructure
# metrics_GTs, metrics_GTs_random = compare_against_reference(GTs, GTs_random, reference)
# plot_comparison(metrics_GTs, metrics_GTs_random)

In [None]:
# def calculate_overlap_percentages(G_biketrack, G):
#     # Calculate edge overlap and length overlap
#     overlapping_edges = 0
#     overlapping_length = 0
#     total_edges = G_biketrack.number_of_edges()
#     total_length = sum(data['length'] for u, v, data in G_biketrack.edges(data=True))
    
#     for u, v, data in G_biketrack.edges(data=True):
#         if G.has_edge(u, v):
#             overlapping_edges += 1
#             overlapping_length += data['length']
    
#     if total_edges == 0:
#         size_percent = 0.0
#     else:
#         size_percent = (overlapping_edges / total_edges) * 100
    
#     if total_length == 0:
#         length_percent = 0.0
#     else:
#         length_percent = (overlapping_length / total_length) * 100
    
#     return size_percent, length_percent, overlapping_edges, overlapping_length


# def compare_against_existing(graph_list1, graph_list2, reference_graph):
#     """
#     Compare two lists of graphs against a common reference graph.
#     Returns metrics for both lists compared to the reference.
#     """
#     # Calculate metrics for both lists against the reference
#     metrics_list1 = [calculate_overlap_percentages(g, reference_graph) for g in graph_list1]
#     metrics_list2 = [calculate_overlap_percentages(g, reference_graph) for g in graph_list2]
    
#     return metrics_list1, metrics_list2

# def plot_comparison(metrics_GTs, metrics_GTs_random):
#     """Plot comparison between GTs and GTs_random against G_biketrack"""
#     fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(12, 10))
    
#     # Percentage plot
#     ax1.plot([m[0] for m in metrics_GTs], 'b-', label='GTs Size Overlap (%)')
#     ax1.plot([m[0] for m in metrics_GTs_random], 'r--', label='GTs_random Size Overlap (%)')
#     ax1.plot([m[1] for m in metrics_GTs], 'g-', label='GTs Length Overlap (%)')
#     ax1.plot([m[1] for m in metrics_GTs_random], 'm--', label='GTs_random Length Overlap (%)')
#     ax1.set_title('Percentage Overlap with Existing Cycle Infrastructure (Including LTNs)')
#     ax1.set_ylabel('Percentage')
#     ax1.legend()
#     ax1.grid(True)
    
#     # Edge count plot
#     ax2.plot([m[2] for m in metrics_GTs], 'b-', label='GTs Overlapping Edges')
#     ax2.plot([m[2] for m in metrics_GTs_random], 'r--', label='GTs_random Overlapping Edges')
#     ax2.set_title('Edge Overlap Comparison')
#     ax2.set_ylabel('Edge Count')
#     ax2.legend()
#     ax2.grid(True)
    
#     # Length plot
#     ax3.plot([m[3] for m in metrics_GTs], 'g-', label='GTs Overlapping Length')
#     ax3.plot([m[3] for m in metrics_GTs_random], 'm--', label='GTs_random Overlapping Length')
#     ax3.set_title('Length Overlap Comparison')
#     ax3.set_ylabel('Length Units')
#     ax3.legend()
#     ax3.grid(True)
    
#     plt.tight_layout()
#     plt.show()



# metrics_GTs, metrics_GTs_random = compare_against_existing(GTs, GTs_random, G_biketrack)
# plot_comparison(metrics_GTs, metrics_GTs_random)


if os.path.exists(analysis_res_pickle):
    with open(analysis_res_pickle, 'rb') as f:
        analysis_results = pickle.load(f)
else:
    analysis_results = {}

if rerun or 'overlap_size_percent_GTs' not in analysis_results:
    def calculate_overlap_percentages(G_ref, G):
        overlapping_edges = 0
        overlapping_length = 0
        total_edges = G_ref.number_of_edges()
        total_length = sum(data.get('length', 0) for _, _, data in G_ref.edges(data=True))

        for u, v, data in G_ref.edges(data=True):
            if G.has_edge(u, v):
                overlapping_edges += 1
                overlapping_length += data.get('length', 0)

        size_percent = (overlapping_edges / total_edges * 100) if total_edges else 0
        length_percent = (overlapping_length / total_length * 100) if total_length else 0

        return size_percent, length_percent, overlapping_edges, overlapping_length

    def get_metrics(graph_list, ref_graph):
        return [calculate_overlap_percentages(ref_graph, g) for g in graph_list]

    metrics_betweenness = get_metrics(GTs, G_biketrack)
    metrics_random = get_metrics(GTs_random, G_biketrack)
    metrics_demand = get_metrics(GTs_demand, G_biketrack)
    metrics_betweenness_ltn_priority = get_metrics(GTs_betweenness_ltn_priority, G_biketrack)
    metrics_demand_ltn_priority = get_metrics(GTs_demand_ltn_priority, G_biketrack)

    overlap_metrics = {
        # Betweenness
        'overlap_size_percent_GTs': [m[0] for m in metrics_betweenness],
        'overlap_length_percent_GTs': [m[1] for m in metrics_betweenness],
        'overlap_edges_GTs': [m[2] for m in metrics_betweenness],
        'overlap_length_GTs': [m[3] for m in metrics_betweenness],

        # Random
        'overlap_size_percent_random': [m[0] for m in metrics_random],
        'overlap_length_percent_random': [m[1] for m in metrics_random],
        'overlap_edges_random': [m[2] for m in metrics_random],
        'overlap_length_random': [m[3] for m in metrics_random],

        # Demand
        'overlap_size_percent_demand': [m[0] for m in metrics_demand],
        'overlap_length_percent_demand': [m[1] for m in metrics_demand],
        'overlap_edges_demand': [m[2] for m in metrics_demand],
        'overlap_length_demand': [m[3] for m in metrics_demand],

        # Demand LTN Priority
        'overlap_size_percent_demand_ltn_priority': [m[0] for m in metrics_demand_ltn_priority],
        'overlap_length_percent_demand_ltn_priority': [m[1] for m in metrics_demand_ltn_priority],
        'overlap_edges_demand_ltn_priority': [m[2] for m in metrics_demand_ltn_priority],
        'overlap_length_demand_ltn_priority': [m[3] for m in metrics_demand_ltn_priority],

        # Betweenness LTN Priority
        'overlap_size_percent_betweenness_ltn_priority': [m[0] for m in metrics_betweenness_ltn_priority],
        'overlap_length_percent_betweenness_ltn_priority': [m[1] for m in metrics_betweenness_ltn_priority],
        'overlap_edges_betweenness_ltn_priority': [m[2] for m in metrics_betweenness_ltn_priority],
        'overlap_length_betweenness_ltn_priority': [m[3] for m in metrics_betweenness_ltn_priority]
    }

    analysis_results.update(overlap_metrics)
    with open(analysis_res_pickle, 'wb') as f:
        pickle.dump(analysis_results, f)
    pd.DataFrame({k: pd.Series(v) for k, v in analysis_results.items()}).to_csv(analysis_res_csv, index=False)



In [None]:
def plot_metric_comparison(metric_key_prefix, ylabel, title, filename):
    """
    Plots a single metric across all strategies compared to existing infrastructure.
    """
    plt.figure(figsize=(10, 6))

    plt.plot(analysis_results[f'{metric_key_prefix}_GTs'], color='orange', linestyle='-', label='Betweenness')
    plt.plot(analysis_results[f'{metric_key_prefix}_random'], color='blue', linestyle='--', label='Random')
    plt.plot(analysis_results[f'{metric_key_prefix}_demand'], color='red', linestyle='-.', label='Demand')
    plt.plot(analysis_results[f'{metric_key_prefix}_demand_ltn_priority'], color='green', linestyle=':', label='Demand LTN')
    plt.plot(analysis_results[f'{metric_key_prefix}_betweenness_ltn_priority'], color='purple', linestyle='-', label='Betweenness LTN')

    plt.xlabel('Iteration')
    plt.ylabel(ylabel)
    plt.title(title)
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()

    output_path = PATH["plots"] + f"/{placeid}/{filename}"
    plt.savefig(output_path, dpi=300)
    plt.show()

# Call for each metric
plot_metric_comparison(
    metric_key_prefix='overlap_size_percent',
    ylabel='Overlap (%)',
    title='Edge Overlap % with Existing Cycle Network',
    filename='percentage_overlap_edges.png'
)

plot_metric_comparison(
    metric_key_prefix='overlap_length_percent',
    ylabel='Overlap (%)',
    title='Length Overlap % with Existing Cycle Network',
    filename='percentage_overlap_length.png'
)

# plot_metric_comparison(
#     metric_key_prefix='overlap_edges',
#     ylabel='Edge Count',
#     title='Overlapping Edge Count with Existing Cycle Network',
#     filename='overlapping_edges_count.png'
# )

# plot_metric_comparison(
#     metric_key_prefix='overlap_length',
#     ylabel='Length (m)',
#     title='Overlapping Length with Existing Cycle Network',
#     filename='overlapping_length_total.png'
# )


In [None]:
## against a random baseline
def plot_deviation_from_random(metric_key_prefix, ylabel, title, filename):
    """
    Plots deviation from random growth for a given overlap metric.
    """
    plt.figure(figsize=(10, 6))

    baseline = np.array(analysis_results[f'{metric_key_prefix}_random'])

    def plot_diff(strategy_key, label, color, linestyle):
        values = np.array(analysis_results[f'{metric_key_prefix}_{strategy_key}'])
        diff = values - baseline
        plt.plot(diff, label=label, color=color, linestyle=linestyle)

    plot_diff('GTs', 'Betweenness – Random', 'orange', '-')
    plot_diff('demand', 'Demand – Random', 'red', '-.')
    plot_diff('demand_ltn_priority', 'Demand LTN – Random', 'green', ':')
    plot_diff('betweenness_ltn_priority', 'Betweenness LTN – Random', 'purple', '-')

    plt.axhline(0, color='grey', linestyle='--', linewidth=1, alpha=0.6)

    plt.xlabel('Iteration')
    plt.ylabel(ylabel)
    plt.title(title)
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()

    output_path = PATH["plots"] + f"/{placeid}/{filename}"
    plt.savefig(output_path, dpi=300)
    plt.show()

plot_deviation_from_random(
    metric_key_prefix='overlap_size_percent',
    ylabel='Difference in Overlap (%)',
    title='Edge Overlap: Improvement over Random (Baseline)',
    filename='deviation_from_random_overlap_edges.png'
)

plot_deviation_from_random(
    metric_key_prefix='overlap_length_percent',
    ylabel='Difference in Overlap (%)',
    title='Length Overlap: Improvement over Random (Baseline)',
    filename='deviation_from_random_overlap_length.png'
)


In [None]:
# metrics_GTs, metrics_GTs_random = compare_against_existing(GTs, GTs_random, G_biketrack_no_ltn) # no differance?
# plot_comparison(metrics_GTs, metrics_GTs_random)

In [None]:
# #### to explore it

# # work in meters
# G_biketrack_edges = G_biketrack_edges.to_crs(epsg=3857)
# G_edges = G_edges.to_crs(epsg=3857)
# G_biketrack_edges['geometry'] = G_biketrack_edges.geometry.buffer(1)
# G_edges['geometry'] = G_edges.geometry.buffer(1)
# joined = gpd.sjoin(G_biketrack_edges, G_edges, how="inner", predicate="intersects", lsuffix="_biketrack", rsuffix="_edge")

# joined.explore()

### Directness

Directness (Directness=Total Sum of Network Distances/Total Sum of Euclidean Distances​)

In [None]:
# net_dist = []
# eucl_dist = []
# directness = []

# for G in GT_abstracts:
#     total_net_dist = sum(data.get('eucl_dist', 0) for _, _, data in G.edges(data=True))
#     total_eucl_dist = sum(data.get('sp_true_distance', 0) for _, _, data in G.edges(data=True))
#     net_dist.append(total_net_dist)
#     eucl_dist.append(total_eucl_dist)
#     if total_net_dist != 0:
#         ratio = total_eucl_dist / total_net_dist
#     else:
#         ratio = None
#     directness.append(ratio)


# net_dist_random = []
# eucl_dist_random = []
# directness_random = []

# for G in GT_abstracts_random:
#     total_net_dist = sum(data.get('eucl_dist', 0) for _, _, data in G.edges(data=True))
#     total_eucl_dist = sum(data.get('sp_true_distance', 0) for _, _, data in G.edges(data=True))
#     net_dist_random.append(total_net_dist)
#     eucl_dist_random.append(total_eucl_dist)
#     if total_net_dist != 0:
#         ratio = total_eucl_dist / total_net_dist
#     else:
#         ratio = None
#     directness_random.append(ratio)



# # Plotting 
# plt.figure(figsize=(10, 6))
# plt.plot(directness, linestyle='-', color='blue', label='Betweeness')
# plt.plot(directness_random, linestyle='--', color='orange', label='Random')
# plt.xlabel('Graph Index')
# plt.ylabel('Directness (Euclidean / Network Distance)')
# plt.title('Total Network Directness')
# plt.grid(True)
# plt.tight_layout()
# plt.show()

# Directness analysis 
if os.path.exists(analysis_res_pickle):
    with open(analysis_res_pickle, 'rb') as f:
        analysis_results = pickle.load(f)
else:
    analysis_results = {}

if rerun or 'directness_demand' not in analysis_results:
    directness_metrics = {
        # Betweenness
        'directness_net': [
            sum(data.get('eucl_dist', 0) for _, _, data in G.edges(data=True))
            for G in GT_abstracts
        ],
        'directness_eucl': [
            sum(data.get('sp_true_distance', 0) for _, _, data in G.edges(data=True))
            for G in GT_abstracts
        ],
        'directness': [
            (sum(data.get('eucl_dist', 0) for _, _, data in G.edges(data=True)) / 
            sum(data.get('sp_true_distance', 0) for _, _, data in G.edges(data=True)))
            if sum(data.get('sp_true_distance', 0) for _, _, data in G.edges(data=True)) != 0 else None
            for G in GT_abstracts
        ],

        # Random
        'directness_net_random': [
            sum(data.get('eucl_dist', 0) for _, _, data in G.edges(data=True))
            for G in GT_abstracts_random
        ],
        'directness_eucl_random': [
            sum(data.get('sp_true_distance', 0) for _, _, data in G.edges(data=True))
            for G in GT_abstracts_random
        ],
        'directness_random': [
            (sum(data.get('eucl_dist', 0) for _, _, data in G.edges(data=True)) / 
             sum(data.get('sp_true_distance', 0) for _, _, data in G.edges(data=True)))
            if sum(data.get('sp_true_distance', 0) for _, _, data in G.edges(data=True)) != 0 else None
            for G in GT_abstracts_random
        ],

        # Demand
        'directness_net_demand': [
            sum(data.get('eucl_dist', 0) for _, _, data in G.edges(data=True))
            for G in GT_abstracts_demand
        ],
        'directness_eucl_demand': [
            sum(data.get('sp_true_distance', 0) for _, _, data in G.edges(data=True))
            for G in GT_abstracts_demand
        ],
        'directness_demand': [
            (sum(data.get('eucl_dist', 0) for _, _, data in G.edges(data=True)) / 
             sum(data.get('sp_true_distance', 0) for _, _, data in G.edges(data=True)))
            if sum(data.get('sp_true_distance', 0) for _, _, data in G.edges(data=True)) != 0 else None
            for G in GT_abstracts_demand
        ],

        # Demand LTN Priority
        'directness_net_demand_ltn_priority': [
            sum(data.get('eucl_dist', 0) for _, _, data in G.edges(data=True))
            for G in GT_abstracts_demand_ltn_priority
        ],
        'directness_eucl_demand_ltn_priority': [
            sum(data.get('sp_true_distance', 0) for _, _, data in G.edges(data=True))
            for G in GT_abstracts_demand_ltn_priority
        ],
        'directness_demand_ltn_priority': [
            (sum(data.get('eucl_dist', 0) for _, _, data in G.edges(data=True)) / 
             sum(data.get('sp_true_distance', 0) for _, _, data in G.edges(data=True)))
            if sum(data.get('sp_true_distance', 0) for _, _, data in G.edges(data=True)) != 0 else None
            for G in GT_abstracts_demand_ltn_priority
        ],

        # Betweenness LTN Priority
        'directness_net_betweenness_ltn_priority': [
            sum(data.get('eucl_dist', 0) for _, _, data in G.edges(data=True))
            for G in GT_abstracts_betweenness_ltn_priority
        ],
        'directness_eucl_betweenness_ltn_priority': [
            sum(data.get('sp_true_distance', 0) for _, _, data in G.edges(data=True))
            for G in GT_abstracts_betweenness_ltn_priority
        ],
        'directness_betweenness_ltn_priority': [
            (sum(data.get('eucl_dist', 0) for _, _, data in G.edges(data=True)) / 
             sum(data.get('sp_true_distance', 0) for _, _, data in G.edges(data=True)))
            if sum(data.get('sp_true_distance', 0) for _, _, data in G.edges(data=True)) != 0 else None
            for G in GT_abstracts_betweenness_ltn_priority
        ]
    }

    analysis_results.update(directness_metrics)
    with open(analysis_res_pickle, 'wb') as f:
        pickle.dump(analysis_results, f)
    pd.DataFrame({k: pd.Series(v) for k, v in analysis_results.items()}).to_csv(analysis_res_csv, index=False)

plt.figure(figsize=(10, 6))
plt.plot(
    analysis_results['directness'],
    linestyle='--', 
    color='orange', 
    label='Betweenness'
)
plt.plot(
    analysis_results['directness_random'],
    linestyle='-', 
    color='blue', 
    label='Random'
)
plt.plot(
    analysis_results['directness_demand'],
    linestyle='-.', 
    color='red', 
    label='Demand'
)
plt.plot(
    analysis_results['directness_demand_ltn_priority'],
    linestyle=':', 
    color='green', 
    label='Demand LTN'
)
plt.plot(
    analysis_results['directness_betweenness_ltn_priority'],
    linestyle='-', 
    color='purple', 
    label='Betweenness LTN'
)

plt.xlabel('Iteration')
plt.ylabel('Directness (Euclidean / Network Distance)')
plt.title('Network Directness Comparison')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
output_path = PATH["plots"] + "/" + placeid + "/directness.png"
plt.savefig(output_path, dpi=300)
plt.show()


In [None]:
# calcaulate directness of existing network to compare against...

## Efficiency

### Global

In [None]:
# ## edit plotting
# def calculate_efficiency(G):
#     """Calculate global network efficiency using formula E = 1/(N(N-1)) * Σ 1/d_ij"""
#     # Convert to undirected graph
#     undirected_G = nx.Graph(G)
#     try:
#         return nx.global_efficiency(undirected_G)
#     except nx.NetworkXError:
#         return 0  # Handle disconnected graphs

# def plot_efficiency_comparison(GTs, GTs_random):
#     """Calculate and plot global efficiency for both graph lists"""
#     # Calculate efficiencies
#     eff_GTs = [calculate_efficiency(G) for G in GTs]
#     eff_random = [calculate_efficiency(G) for G in GTs_random]
    
#     # Create plot
#     plt.figure(figsize=(10, 6))
#     plt.plot(eff_GTs, 'b-', linewidth=2, label='GTs Efficiency')
#     plt.plot(eff_random, 'r--', linewidth=2, label='GTs_random Efficiency')
    
#     plt.title('Global Network Efficiency Comparison\n$E = \\frac{1}{N(N-1)}\\sum_{i\\neq j} \\frac{1}{d_{ij}}$')
#     plt.ylabel('Global Efficiency')
#     plt.xlabel('Graph Instance Index')
#     plt.legend()
#     plt.grid(True)
#     plt.ylim(0, 1)
#     plt.tight_layout()
#     plt.show()

# # Usage example:
# plot_efficiency_comparison(GTs, GTs_random)

In [None]:
def calculate_global_efficiency(G, numnodepairs=500, normalized=True, weight='weight', debug=False):
    """Calculates global network efficiency for a graph G."""
    if G is None or len(G) < 2:
        return 0.0
    
    nodes = list(G.nodes)
    N = len(nodes)
    
    if N > numnodepairs:
        sampled_nodes = random.sample(nodes, numnodepairs)
    else:
        sampled_nodes = nodes
    S = len(sampled_nodes)
    if S < 2:
        return 0.0
    
    total_efficiency = 0.0
    considered_pairs = S * (S - 1)  
    
    for u in sampled_nodes:
        try:
            lengths = nx.single_source_dijkstra_path_length(G, u, weight=weight)
            for v in sampled_nodes:
                if u == v: continue
                d = lengths.get(v, float('inf'))
                if 0 < d < float('inf'):
                    total_efficiency += 1 / d
        except nx.NetworkXNoPath:
            continue
    
    if considered_pairs == 0:
        return 0.0
    
    # Always use considered_pairs for unnormalized
    EG = total_efficiency / considered_pairs  # average efficiency
    
    if not normalized:
        return EG  # Directly return average efficiency of sampled pairs
    
    # Normalisation logic 
    for node in sampled_nodes:
        if 'x' not in G.nodes[node] or 'y' not in G.nodes[node]:
            raise KeyError("Nodes need 'x' and 'y' for normalization.")
    
    ideal_total = 0.0
    for u, v in itertools.permutations(sampled_nodes, 2):
        x1, y1 = G.nodes[u]['x'], G.nodes[u]['y']
        x2, y2 = G.nodes[v]['x'], G.nodes[v]['y']
        distance = ((x1-x2)**2 + (y1-y2)**2)**0.5
        if distance > 0:
            ideal_total += 1 / distance
    
    if ideal_total == 0:
        return 0.0
    

    ideal_avg = ideal_total / considered_pairs
    normalized_efficiency = EG / ideal_avg
    
    if debug:
        print(f"Actual Avg: {EG}, Ideal Avg: {ideal_avg}, Normalized: {normalized_efficiency}")
    
    return normalized_efficiency

In [None]:
calculate_global_efficiency(G, numnodepairs=500, normalized=True, weight='length', debug=False)

In [None]:
if os.path.exists(analysis_res_pickle):
    with open(analysis_res_pickle, 'rb') as f:
        analysis_results = pickle.load(f)
else:
    analysis_results = {}

if rerun or 'efficiency_demand' not in analysis_results:
    efficiency_metrics = {
        'efficiency': [
            calculate_global_efficiency(G, numnodepairs=1000, normalized=True, weight='length')
            for G in GT_abstracts
        ],
        'efficiency_random': [
            calculate_global_efficiency(G, numnodepairs=1000, normalized=True, weight='length')
            for G in GT_abstracts_random
        ],
        'efficiency_demand': [
            calculate_global_efficiency(G, numnodepairs=1000, normalized=True, weight='length')
            for G in GT_abstracts_demand
        ],
        'efficiency_demand_ltn_priority': [
            calculate_global_efficiency(G, numnodepairs=1000, normalized=True, weight='length')
            for G in GT_abstracts_demand_ltn_priority
        ],
        'efficiency_betweenness_ltn_priority': [
            calculate_global_efficiency(G, numnodepairs=1000, normalized=True, weight='length')
            for G in GT_abstracts_betweenness_ltn_priority
        ]
    }
    

    analysis_results.update(efficiency_metrics)  
    with open(analysis_res_pickle, 'wb') as f:
        pickle.dump(analysis_results, f)
    pd.DataFrame({k: pd.Series(v) for k, v in analysis_results.items()}).to_csv(analysis_res_csv, index=False)


plt.figure(figsize=(10, 6))

# Plot efficiency from analysis_results
plt.plot(
    analysis_results['efficiency'],
    linestyle='-', 
    color='orange',
    label='Betweenness'
)
plt.plot(
    analysis_results['efficiency_random'],
    linestyle='--', 
    color='blue',
    label='Random'
)
plt.plot(
    analysis_results['efficiency_demand'],
    linestyle='-.', 
    color='red',
    label='Demand'
)
plt.plot(
    analysis_results['efficiency_demand_ltn_priority'],
    linestyle=':', 
    color='green',
    label='Demand LTN'
)
plt.plot(
    analysis_results['efficiency_betweenness_ltn_priority'],
    linestyle='-', 
    color='purple',
    label='Betweenness LTN'
)

plt.xlabel('Iteration', fontsize=12)
plt.ylabel('Global Efficiency', fontsize=12)
plt.title('Global Network Efficiency Comparison', fontsize=14)
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()

output_path = PATH["plots"] + "/" + placeid + "/global_eff.png"
plt.savefig(output_path, dpi=300)
plt.show()

In [None]:
# Create plot
plt.figure(figsize=(10, 6))

# Plot both lines
plt.plot(x, eff_GTs, label='GTs',  linestyle='-', color='blue')
plt.plot(x_random, eff_GTs_random, label='GTs Random', linestyle='--', color='red')

# Add labels and title
plt.xlabel('Graph Index', fontsize=12)
plt.ylabel('Global Efficiency', fontsize=12)
plt.title('Global Network Efficiency Comparison', fontsize=14)
plt.legend()
plt.grid(True, alpha=0.3)

# Customize ticks
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)

# Show plot
plt.tight_layout()
plt.show()

### Local

Both

In [None]:
# def calculate_efficiencies(G):
#     """Calculate both global and local efficiencies"""
#     # Convert to undirected graph
#     undirected_G = nx.Graph(G)
    
#     try:
#         global_eff = nx.global_efficiency(undirected_G)
#     except nx.NetworkXError:
#         global_eff = 0
        
#     try:
#         local_eff = nx.local_efficiency(undirected_G)
#     except nx.NetworkXError:
#         local_eff = 0
        
#     return global_eff, local_eff

# def plot_efficiency_comparison(GTs, GTs_random):
#     """Plot comparison of both efficiency metrics"""
#     # Calculate efficiencies
#     global_GTs, local_GTs = zip(*[calculate_efficiencies(G) for G in GTs])
#     global_random, local_random = zip(*[calculate_efficiencies(G) for G in GTs_random])
    
#     # Create plots
#     fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))
    
#     # Global efficiency plot
#     ax1.plot(global_GTs, 'b-', linewidth=2, label='GTs Global Eff')
#     ax1.plot(global_random, 'r--', linewidth=2, label='GTs_random Global Eff')
#     ax1.set_title('Global Network Efficiency Comparison')
#     ax1.set_ylabel('Efficiency')
#     ax1.legend()
#     ax1.grid(True)
#     ax1.set_ylim(0, 1)
    
#     # Local efficiency plot
#     ax2.plot(local_GTs, 'g-', linewidth=2, label='GTs Local Eff')
#     ax2.plot(local_random, 'm--', linewidth=2, label='GTs_random Local Eff')
#     ax2.set_title('Local Network Efficiency Comparison')
#     ax2.set_ylabel('Efficiency')
#     ax2.legend()
#     ax2.grid(True)
#     ax2.set_ylim(0, 1)
    
#     plt.tight_layout()
#     plt.show()

# # Usage example:
# plot_efficiency_comparison(GTs, GTs_random)

# (Optional) Pretty plots of networks

In [None]:
iteration_number = 99

In [None]:
fig, ax = plt.subplots(figsize=(12, 8))  # Adjust the width and height as needed



G_biketrackcarall_edges = ox.graph_to_gdfs(G_biketrackcarall, nodes=False)
G_biketrackcarall_edges = G_biketrackcarall_edges.to_crs(epsg=3857)  # Ensure CRS matches
G_biketrackcarall_edges.plot(ax=ax, color='grey', linewidth=0.6, alpha=0.5, zorder = 0)  # Light grey with thin linewidth

# Add bike track edges
#G_biketrack = {}
#G_biketrack[placeid] = csv_to_ox(PATH["data"] + placeid + "/", placeid, 'biketrack')
#G_biketrack[placeid].graph["crs"] = 'epsg:4326'  # Needed for OSMNX's graph_to_gdfs in utils_graph.py
#G_biketrack = copy.deepcopy(G_biketrack[placeid])
G_biketrack_edges = ox.graph_to_gdfs(G_biketrack, nodes=False)
G_biketrack_edges = G_biketrack_edges.to_crs(epsg=3857)
G_biketrack_edges.plot(ax=ax, color='turquoise', linewidth=1.4, alpha=0.9, zorder = 1)  # Light grey with thin linewidth


# Plot the main graph and layers
GT_nodes, GT_edges = ox.graph_to_gdfs(GTs[iteration_number])
GT_edges = GT_edges.to_crs(epsg=3857)
GT_edges.plot(ax=ax, color='orange')
ltn_points.to_crs(epsg=3857).plot(ax=ax, color='red', markersize=10, zorder=4)
tess_points.to_crs(epsg=3857).plot(ax=ax, color='green', markersize=5, zorder = 3)


ltns = ltns.to_crs(epsg=3857)  # Ensure the CRS matches
ltns.plot(ax=ax, color='blue', alpha=0.5, label=f"Low Traffic Neighbourhoods", zorder=2)


# Remove x and y axis labels and ticks
ax.axis('off')  # This removes the entire axis, including labels and ticks

ax.set_title(f"Iteration: {iteration_number + 1}")
#ax.legend(loc="upper left")

output_path = fr"C:\Users\b8008458\OneDrive - Newcastle University\2022 to 2023\PhD\Conferances etc\GISRUK 2025\Plots\{iteration_number}_network_plot.png"
plt.savefig(output_path, dpi=600, bbox_inches='tight'
            #, transparent=True
            )
# Show the plot
plt.show()


# GIFs

In [None]:
# gif_path = r"C:\Users\b8008458\OneDrive - Newcastle University\2022 to 2023\PhD\networkGrowth\bikenwgrowth_external\videos\newcastle\investment_animation_pct.gif"

# # Set up a figure for animation
# fig, ax = plt.subplots(figsize=(12, 8))

# def update(idx):
#     """Update function for each frame in the animation."""
#     ax.clear()  # Clear previous frame
#     G = GTs[idx]
#     # Skip empty graphs
#     if len(G.edges()) == 0:
#         print(f"Graph {idx + 1} has no edges, skipping plot.")
#         return
    
#     # Add G_weighted edges
#     G_weighted_edges = ox.graph_to_gdfs(G_weighted, nodes=False)
#     G_weighted_edges = G_weighted_edges.to_crs(epsg=3857)
#     G_weighted_edges.plot(ax=ax, color='grey', linewidth=0.5, alpha=0.6, zorder=0)

#     # Add bike track edges
#     G_biketrack_nodes, G_biketrack_edges = ox.graph_to_gdfs(G_biketrack)
#     G_biketrack_edges = G_biketrack_edges.to_crs(epsg=3857)
#     G_biketrack_edges.plot(ax=ax, color='turquoise', linewidth=0.5, alpha=0.8, zorder=1)

#     # Plot main graph
#     GT_nodes, GT_edges = ox.graph_to_gdfs(G)
#     GT_edges = GT_edges.to_crs(epsg=3857)
#     GT_edges.plot(ax=ax, color='orange')

#     # Plot additional layers
#     ltn_gdf.plot(ax=ax, color='red', markersize=10, zorder=4)
#     tess_gdf.plot(ax=ax, color='green', markersize=5, zorder=3)

#     # Plot the neighbourhood
#     placename = "Newcastle Upon Tyne"
#     if placename in neighbourhoods:
#         neighbourhood_gdf = neighbourhoods[placename].to_crs(epsg=3857)
#         neighbourhood_gdf.plot(ax=ax, color='blue', alpha=0.5, zorder=2)

#     # Remove axis and set title
#     ax.axis('off')
#     ax.set_title(f"Meters of investment: {D/10}")

# # Create animation
# ani = animation.FuncAnimation(fig, update, frames=len(GTs), repeat=False)

# # Save the animation as a GIF using PillowWriter
# ani.save(gif_path, writer=animation.PillowWriter(fps=6))

# print(f"GIF saved to: {gif_path}")

In [None]:
# # delete once happy with cell below
# neighbourhoods = load_neighbourhoods(os.path.join(PATH["data"], placeid))
# G_biketrackcarall_edges = (
#     ox.graph_to_gdfs(G_biketrackcarall, nodes=False)
#       .to_crs(epsg=3857)
# )
# G_biketrack_edges = (
#     ox.graph_to_gdfs(G_biketrack, nodes=False)
#       .to_crs(epsg=3857)
# )

# ltn_points_crs = ltn_points.to_crs(epsg=3857)
# tess_points_crs = tess_points.to_crs(epsg=3857)
# neighbourhoods = load_neighbourhoods(PATH["data"] + placeid + "/")
# ltns = neighbourhoods.get("ltns", None)
# ltns_gdf = None
# if neighbourhoods:
#     _, ltns_gdf = next(iter(neighbourhoods.items())) # get the first geodataframe in neighbourhoods. Should fix this to a more elegant solution
#     ltns = ltns_gdf.to_crs(epsg=3857) 


# fig, ax = plt.subplots(figsize=(12, 8))  

# def update(idx):
#     """Update function called for each animation frame."""
#     ax.clear()  # clear the axis for the new frame

#     # Plot the static background layers first.
#     G_biketrackcarall_edges.plot(ax=ax, color='grey', linewidth=0.6, alpha=0.5, zorder=0)
#     G_biketrack_edges.plot(ax=ax, color='turquoise', linewidth=1.4, alpha=0.9, zorder=1)

#     # Get the current main graph from your list of graphs GTs.
#     current_graph = GTs[idx]
#     if len(current_graph.edges()) == 0:
#         print(f"Graph {idx + 1} has no edges, skipping plot.")
#         return

#     # Convert the main graph to GeoDataFrames and reproject
#     GT_nodes, GT_edges = ox.graph_to_gdfs(current_graph)
#     GT_edges.to_crs(epsg=3857).plot(ax=ax, color='orange')
    
#     # Plot additional layers.
#     ltn_points_crs.plot(ax=ax, color='red', markersize=10, zorder=4)
#     tess_points_crs.plot(ax=ax, color='green', markersize=5, zorder=3)
#     ltns.plot(ax=ax, color='blue', alpha=0.5, zorder=2)

#     ax.axis('off')
#     ax.set_title(f"Iterations completed: {idx + 1}%", fontsize=14)


# ani = animation.FuncAnimation(fig, update, frames=len(GT_abstracts), repeat=False)

# # Construct the output file path flexibly
# output_gif = os.path.join(PATH["videos"], placeid + "/" f"investment_animation{prune_measure}.gif")
# output_gif = os.path.join(PATH["videos"], placeid + "/" f"betweenness_greedyTri.gif")
# # Create the directory if it doesn't exist.
# os.makedirs(os.path.dirname(output_gif), exist_ok=True)

# # Save the animation as a GIF with PillowWriter.
# ani.save(output_gif, writer=animation.PillowWriter(fps=6))

# print(f"GIF saved to: {output_gif}")

In [None]:
# new plotting function
def plot_investment_animation(
    graph_list,
    output_path,
    G_biketrackcarall,
    G_biketrack,
    ltn_points,
    tess_points,
    neighbourhoods,
    fps=4,
    title_prefix="Iteration number: ",
    crs_epsg=3857,
    figsize=(12, 8)
):
    """Generate and save an animated GIF showing network growth over time."""

    G_biketrackcarall_edges = (
        ox.graph_to_gdfs(G_biketrackcarall, nodes=False).to_crs(epsg=crs_epsg)
    )
    G_biketrack_edges = (
        ox.graph_to_gdfs(G_biketrack, nodes=False).to_crs(epsg=crs_epsg)
    )
    ltn_points_crs = ltn_points.to_crs(epsg=crs_epsg)
    tess_points_crs = tess_points.to_crs(epsg=crs_epsg)

    # Get a neighbourhood GeoDataFrame from the dictionary
    ltns = None
    if neighbourhoods:
        _, ltns_gdf = next(iter(neighbourhoods.items()))
        ltns = ltns_gdf.to_crs(epsg=crs_epsg)

    # Set up the figure
    fig, ax = plt.subplots(figsize=figsize)

    def update(idx):
        ax.clear()
        G = graph_list[idx]
        if 'crs' not in G.graph:
            G.graph['crs'] = f"epsg:{crs_epsg}"

        # Static background layers
        G_biketrackcarall_edges.plot(ax=ax, color='grey', linewidth=0.6, alpha=0.5, zorder=0)
        G_biketrack_edges.plot(ax=ax, color='turquoise', linewidth=1.4, alpha=0.9, zorder=1)

        # Skip empty graphs
        if len(G.edges()) == 0:
            print(f"Graph {idx + 1} has no edges, skipping.")
            return

        # Main graph
        _, edges = ox.graph_to_gdfs(G)
        edges.to_crs(epsg=crs_epsg).plot(ax=ax, color='orange')

        # Point layers
        ltn_points_crs.plot(ax=ax, color='red', markersize=10, zorder=4)
        tess_points_crs.plot(ax=ax, color='green', markersize=5, zorder=3)

        # LTN areas
        if ltns is not None:
            ltns.plot(ax=ax, color='blue', alpha=0.5, zorder=2)

        ax.axis('off')
        ax.set_title(f"{title_prefix} - iterations completed: {idx + 1}%", fontsize=14)

    # Create animation
    ani = animation.FuncAnimation(fig, update, frames=len(graph_list), repeat=False)
    os.makedirs(os.path.dirname(output_path), exist_ok=True)


    ani.save(output_path, writer=animation.PillowWriter(fps=fps), dpi=400)
    print(f"GIF saved to: {output_path}")

In [None]:
# run plotting function
plot_investment_animation(
    graph_list=GT_abstracts_demand,
    output_path=os.path.join(PATH["videos"], placeid, f"demand_abstract_animation.gif"),
    G_biketrackcarall=G_biketrackcarall,
    G_biketrack=G_biketrack,
    ltn_points=ltn_points,
    tess_points=tess_points,
    neighbourhoods=load_neighbourhoods(os.path.join(PATH["data"], placeid)),
    title_prefix="Demand growth"
)

## Unfinshed

In [None]:
metrics_GTs, metrics_GTs_random = compare_against_existing(GTs, GTs_random, G_biketrack_no_ltn) # no differance?
plot_comparison(metrics_GTs, metrics_GTs_random)