In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
import folium
import pulp 

# Load data
demand_points = gpd.read_file('data/derived_data/kmeans_clusters.gpkg')
potential_sites = gpd.read_file('data/derived_data/all_pot_sites.gpkg')
durations=pd.read_csv('data/derived_data/distance_matrix_walking.csv')
rcps=gpd.read_file('data/raw_data/geodata_stadt_Zuerich/recycling_sammelstellen/data/stzh.poi_sammelstelle_view.shp')
matrix=pd.read_csv('data/derived_data/distance_matrix_walking.csv')

In [None]:
temp = matrix.copy()

# Separate the prefix (e, p, etc.) and the numeric part of the ID
temp['prefix'] = temp['ID'].str.split('_').str[0]
temp['numeric_id'] = temp['ID'].str.split('_').str[1].astype(int)

# Sort by prefix, then by numeric part
temp.sort_values(['prefix', 'numeric_id'], inplace=True)

# Pivot and preserve the custom-sorted order of IDs
distance_matrix = temp.pivot(index='ID', columns='cluster_ID', values='Walking_Duration_Minutes')
distance_matrix = distance_matrix.reindex(temp['ID'].unique())


In [None]:

# Parameter: number of NEW facilities to open (in addition to already open sites)
p = 10

# Get dimensions from the real distance matrix
num_demand_points = distance_matrix.shape[1]  # number of demand points
num_potential_sites = distance_matrix.shape[0]  # number of potential sites

I = range(num_demand_points)
J = range(num_potential_sites)

# Set population per demand point using total_est_pop from demand_points
pop = demand_points['total_est_pop'].to_dict()

# Identify existing and potential sites
existing = [j for j in J if potential_sites.iloc[j]['status'] == "open"]
new_sites = [j for j in J if potential_sites.iloc[j]['status'] == "potential"]

# Create facility open/closed indicator: fixed at 1 for existing sites,
# and decision variable for potential new sites.
y = {}
for j in J:
    if j in existing:
        y[j] = 1  # existing facility is fixed as open
    else:
        y[j] = pulp.LpVariable(f"Facility_{j}", cat='Binary')

# Create the model
prob = pulp.LpProblem("P-Median_Problem", pulp.LpMinimize)

# Assignment decision variables for all sites (binary: 1 if demand point i is assigned to facility j, else 0)
x = pulp.LpVariable.dicts("Assign", [(i, j) for i in I for j in J], cat='Binary')

# Objective Function: Minimize total weighted distance with distance weighted 4 times more than population
prob += pulp.lpSum(pop[i]*(distance_matrix.iloc[j, i]) * x[(i, j)] for i in I for j in J)

# Constraint: Each demand point is assigned to exactly one facility.
for i in I:
    prob += pulp.lpSum(x[(i, j)] for j in J) == 1

# Constraint: A demand point can only be assigned to an open facility.
for i in I:
    for j in J:
        if j in new_sites:
            # For potential sites, deny assignment if facility is not selected.
            prob += x[(i, j)] <= y[j]
        else:
            # For existing sites, they are always open so assignment is allowed.
            prob += x[(i, j)] <= 1

# Constraint: Exactly p new facilities are opened among potential sites.
prob += pulp.lpSum(y[j] for j in new_sites) == p

# Constraint: Limit the population living outside a 10-minute walking distance to 2,000
prob += pulp.lpSum(pop[i] * x[(i, j)] for i in I for j in J if distance_matrix.iloc[j, i] > 10) <= 150

# Solve the model using Gurobi
prob.solve(pulp.GUROBI(msg=True))

# Results
print(f"Status: {pulp.LpStatus[prob.status]}")
print(f"Total Weighted Distance: {pulp.value(prob.objective)}")

# Get new opened facilities from potential sites where y[j]==1
opened_new = [j for j in new_sites if pulp.value(y[j]) == 1]

# Combine existing sites (already open) with new opened facilities for full set of open facilities.
opened_facilities = existing + opened_new

# Create a dataframe of selected sites
selected_sites = potential_sites.iloc[opened_facilities].copy()

# Construct assignments: a dictionary mapping demand point i to the facility j assigned.
assignments = {(i, j): pulp.value(x[(i, j)]) for i in I for j in J if pulp.value(x[(i, j)]) == 1}

In [None]:
# Create a dictionary mapping each facility (j) to a list of demand point indices (i) assigned to it
facility_to_demands = {}
for (i, j) in assignments:
    facility_to_demands.setdefault(j, []).append(i)

# Build a DataFrame with facility index, facility ID and its associated demand points
facility_demand_df = pd.DataFrame([
    {
        'Facility_Index': j, 
        'Facility_ID': potential_sites.iloc[j]['ID'], 
        'Demand_Points': demand_list
    } 
    for j, demand_list in facility_to_demands.items()
])

facility_demand_df.sort_values(by='Facility_Index', inplace=True)
facility_demand_df.head()

In [None]:
# Define a list with 10 distinct colors
colors = ['red', 'blue', 'green', 'orange', 'purple', 'darkred', 'lightblue', 'cadetblue', 'darkgreen', 'pink']

facilities = selected_sites.iloc[0:10].copy()
facilities_4326 = facilities.to_crs(epsg=4326)

# Create a new folium map centered on the mean location of these facilities
mean_lat = facilities_4326.geometry.y.mean()
mean_lon = facilities_4326.geometry.x.mean()
m_cluster = folium.Map(location=[mean_lat, mean_lon], zoom_start=12)

# For each facility find its assigned demand points from the assignments dictionary
for idx, (fac_idx, facility) in enumerate(facilities_4326.iterrows()):
    fac_color = colors[idx % len(colors)]
    fac_coords = [facility.geometry.y, facility.geometry.x]
    # Add a facility marker (using a circle marker to better visualize clusters)
    folium.CircleMarker(
        location=fac_coords,
        radius=8,
        color=fac_color,
        fill=True,
        fill_color=fac_color,
        popup=f"Facility {facility['ID']}"
    ).add_to(m_cluster)
    
    # Find all demand point indices assigned to this facility
    assigned_demand_indices = [i for (i, j) in assignments.keys() if j == fac_idx]
    
    # If there are demand points assigned, plot them
    if assigned_demand_indices:
        demand_pts = demand_points.loc[assigned_demand_indices]
        demand_pts_4326 = demand_pts.to_crs(epsg=4326)
        
        for _, demand in demand_pts_4326.iterrows():
            d_coords = [demand.geometry.y, demand.geometry.x]
            folium.CircleMarker(
                location=d_coords,
                radius=4,
                color=fac_color,
                fill=True,
                fill_color=fac_color,
                popup=f"Pop: {demand['total_est_pop'], demand['cluster_id']}"
            ).add_to(m_cluster)

# Display the map
m_cluster


In [None]:
import folium

# Convert selected_sites to WGS 84 if not already in lat/lon
selected_sites_4326 = selected_sites.to_crs(epsg=4326)

# Set the map center to the mean coordinates of all sites
mean_lat = selected_sites_4326.geometry.y.mean()
mean_lon = selected_sites_4326.geometry.x.mean()
m = folium.Map(location=[mean_lat, mean_lon], zoom_start=12)

# Add markers with different colors:
# - Open sites (existing) in blue
# - New sites (potential) in red
for idx, row in selected_sites_4326.iterrows():
    coords = [row.geometry.y, row.geometry.x]
    color = 'blue' if row['status'] == 'open' else 'red'
    folium.Marker(location=coords, popup=row['ID'], icon=folium.Icon(color=color)).add_to(m)

m

In [None]:
import openrouteservice
import scripts.util as util
import geopandas as gpd

# Load the flats_population dataset (produced by the population allocation step)
flats_population = gpd.read_file('data/derived_data/flats_population.gpkg').to_crs(epsg=4326)

flats_population = flats_population.groupby('egid').agg({'est_pop': 'sum', 'geometry': 'first'}).reset_index()

# Initialize ORS client (using our local ORS instance)
client = openrouteservice.Client(base_url='http://localhost:8080/ors')

selected_sites_4326 = selected_sites.to_crs(epsg=4326)
# Build the BallTree using the selected_sites_4326 as rcp locations
selected_sites_4326 = selected_sites_4326.rename(columns={'ID': 'poi_id'})
tree, rcp_coords, rcp_ids = util.initialize_ball_tree(selected_sites_4326, 'poi_id')

# For each flat in flats_population, find the nearest RCP and its walking duration.
# It is assumed that util.find_nearest_rcp_duration returns a tuple (rcp_id, duration_minutes)
results = flats_population.apply(lambda row: util.find_nearest_rcp_duration(row.geometry, tree, rcp_coords, rcp_ids, client), axis=1)

# Extract durations (in minutes); if no valid duration is returned, assign NaN.
flats_population['duration'] = results.apply(lambda res: res[1] if res is not None and res[1] is not None else np.nan)

# Compute the weighted average walking duration using the 'est_pop' column as weights.
weighted_avg = (flats_population['duration'] * flats_population['est_pop']).sum() / flats_population['est_pop'].sum()

print("Weighted Average Walking Duration (min):", weighted_avg)

In [None]:
# write in min:sec format
minutes = int(weighted_avg)
seconds = int((weighted_avg - minutes) * 60)
print(f"Weighted Average Walking Duration: {minutes} min {seconds} sec")

# Count population outside 10-minute radius
outside_10min = flats_population[flats_population['duration'] > 10]['est_pop'].sum()
total_pop = flats_population['est_pop'].sum()
percent_outside = (outside_10min / total_pop) * 100

print(f"Population outside 10-minute radius: {int(outside_10min):,} people")
print(f"Percentage of total population: {percent_outside:.1f}%")

In [None]:
# AIzaSyAjZ5nVVoeXnPAQPtEB1U8JZRksrxGJtbc