Import files from all simulations

In [35]:
import pandas as pd
import glob
from collections import defaultdict
import osmnx as ox
import networkx as nx
import geopandas as gpd
from shapely.geometry import LineString

# Path to the folder containing the files
folder_path = "./root/Outputs/sim_results"

# Use glob to find all files matching the pattern
file_pattern = f"{folder_path}/edge_population_simulation_*"
files = glob.glob(file_pattern)

# Import all files into a list of DataFrames
dataframes = [pd.read_excel(file) for file in files]

# Combine all DataFrames into one
pedestrian_intensity = pd.concat(dataframes, ignore_index=True)


Combine values from different simulations

In [38]:
pedestrian_intensity['edge'] = pedestrian_intensity.apply(lambda row: tuple(sorted([row['u'], row['v']])), axis=1)


# Convert 'buurtcode' entries to dictionaries
def parse_buurtcode(value):
    if isinstance(value, str):
        try:
            return eval(value)  # Convert string representation of a dictionary to a Python dictionary
        except:
            return {}  # Handle invalid cases
    elif isinstance(value, dict):
        return value
    else:
        return {}

pedestrian_intensity['buurtcode'] = pedestrian_intensity['buurtcode'].apply(parse_buurtcode)


#define function to combine data of buurtcode
def aggregate_buurtcodes(buurtcode_series):
    all_codes = set()
    combined_dict = defaultdict(list)

    # Collect all unique keys (neighborhood codes)
    for row in buurtcode_series:
        if isinstance(row, dict):  # Ensure the row is a dictionary
            for key, value in row.items():
                all_codes.add(key)
                combined_dict[key].append(int(value))  # Ensure values are integers

    # Ensure missing keys get zero values for their counts
    aggregated_values = {key: 0 for key in all_codes}
    counts = {key: 0 for key in all_codes}

    # Aggregate values and counts
    for key in all_codes:
        for value_list in buurtcode_series:
            if isinstance(value_list, dict) and key in value_list:
                aggregated_values[key] += int(value_list[key])  # Convert to int if not already
            # Increment count for the key regardless of presence
            counts[key] += 1

    # Calculate the mean for each key
    aggregated_mean = {key: aggregated_values[key] / counts[key] for key in all_codes}

    return aggregated_mean


pedestrian_intensity_all_simulations = pedestrian_intensity.groupby('edge').agg({
    'pop': 'mean',  # Take the mean
    'buurtcode': aggregate_buurtcodes,  # Use the function
    'young_pop': 'mean',  # Take the mean
    'old_pop': 'mean',  # Take the mean
    'age-unknown_pop': 'mean'  # Take the mean
}).reset_index()

# Restore `u` and `v` columns for clarity
pedestrian_intensity_all_simulations[['u', 'v']] = pd.DataFrame(
    pedestrian_intensity_all_simulations['edge'].tolist(),
    index=pedestrian_intensity_all_simulations.index
)


Store as percentages

In [40]:
pedestrian_intensity_all_simulations['young_pop_pct'] = (pedestrian_intensity_all_simulations['young_pop'] / pedestrian_intensity_all_simulations['pop']) * 100
pedestrian_intensity_all_simulations['old_pop_pct'] = (pedestrian_intensity_all_simulations['old_pop'] / pedestrian_intensity_all_simulations['pop']) * 100
pedestrian_intensity_all_simulations['pop_unknown_pct'] = (pedestrian_intensity_all_simulations['age-unknown_pop'] / pedestrian_intensity_all_simulations['pop']) * 100
pedestrian_intensity_all_simulations['pop'] = pedestrian_intensity_all_simulations['pop'].astype(int)

pedestrian_intensity_all_final = pedestrian_intensity_all_simulations.drop(columns=['young_pop', 'old_pop', 'age-unknown_pop'])

Combine with geometry

In [41]:
north, south, east, west = 52.43, 52.28, 5.10, 4.74

#fetch network
cf = """
     ["area"!~"yes"]
     ["highway"]
     ["highway"!~"motor|proposed|construction|abandoned|platform|raceway"]
     ["foot"!~"no"]
     ["service"!~"private"]
     ["access"!~"private"]
     """

#fetch network
G = ox.graph_from_bbox(north, south, east, west, custom_filter=cf, network_type='walk', simplify= False, truncate_by_edge=True) 



  G = ox.graph_from_bbox(north, south, east, west, custom_filter=cf, network_type='walk', simplify= False, truncate_by_edge=True)


In [42]:
# Initialize an empty list to store geometries
geometries = []

# Loop through each row in the DataFrame and create a LineString geometry
for index, row in pedestrian_intensity_all_final.iterrows():
    try:
        # Extract node u and node v
        u, v = row['u'], row['v']
        
        # Get node coordinates from the graph G
        x1, y1 = G.nodes[u]['x'], G.nodes[u]['y']
        x2, y2 = G.nodes[v]['x'], G.nodes[v]['y']
        
        # Create a LineString geometry and append it
        geometries.append(LineString([(x1, y1), (x2, y2)]))
    except KeyError:
        print(f"Node pair ({u}, {v}) not found in the graph.")
        geometries.append(None)  # Append None for missing nodes

# Add the geometries as a new column in the DataFrame
pedestrian_intensity_all_final['geometry'] = geometries
pedestrian_intensity_all_final = pedestrian_intensity_all_final.dropna(subset=['geometry'])

# Ensure u and v are integers
pedestrian_intensity_all_final['u'] = pedestrian_intensity_all_final['u'].astype('int64')
pedestrian_intensity_all_final['v'] = pedestrian_intensity_all_final['v'].astype('int64')

#drop edge column
pedestrian_intensity_all_final = pedestrian_intensity_all_final.drop(columns=['edge'])

# Convert to GeoDataFrame
pedestrian_intensity_all_final_gdf = gpd.GeoDataFrame(pedestrian_intensity_all_final, geometry='geometry', crs="EPSG:4326")


In [None]:
#export as geopackage
pedestrian_intensity_all_final_gdf.to_file("./root/Outputs/2_modelled_pedestrian_intensity.gpkg", driver="GPKG")

# show the output
display(pedestrian_intensity_all_final_gdf.head())

Unnamed: 0,pop,buurtcode,u,v,young_pop_pct,old_pop_pct,geometry
0,31,{'BU0363AC03': 31.0},6316199,46388769,3.225806,9.677419,"LINESTRING (4.88840 52.37017, 4.88873 52.37195)"
1,660,"{'BU0363AG01': 189.0, 'BU0363AC03': 269.0, 'BU...",6316199,451946447,5.30303,9.69697,"LINESTRING (4.88840 52.37017, 4.88827 52.37018)"
2,629,"{'BU0363AG01': 189.0, 'BU0363AC03': 238.0, 'BU...",6316199,1360288038,5.405405,9.697933,"LINESTRING (4.88840 52.37017, 4.88847 52.37017)"
3,694,"{'BU0363AJ04': 308.0, 'BU0363AJ02': 379.0, 'BU...",25596455,46356773,9.365994,13.400576,"LINESTRING (4.92356 52.36484, 4.92312 52.36510)"
4,694,"{'BU0363AJ04': 308.0, 'BU0363AJ02': 379.0, 'BU...",25596455,8383889398,9.365994,13.400576,"LINESTRING (4.92356 52.36484, 4.92358 52.36483)"
