In [1]:
import geopandas as gpd
import pandas as pd
from sklearn.cluster import DBSCAN
import folium
import branca.colormap as cm  # Add this import
from folium.plugins import HeatMap
from folium.plugins import MarkerCluster
from shapely.ops import unary_union
import os
import openrouteservice
from sklearn.neighbors import BallTree
from shapely.geometry import Point
import logging
from shapely.geometry import shape
import numpy as np
import util
# get ORS key
ors_key = os.getenv('ORS_API_KEY')
client = openrouteservice.Client(base_url='http://localhost:8080/ors')


In [2]:
# import data
flats_duration = gpd.read_file('../data/derived_data/flats_duration.gpkg')
rcps = gpd.read_file('../data/raw_data/geodata_stadt_Zuerich/recycling_sammelstellen/data/stzh.poi_sammelstelle_view.shp')

# Convert to WGS84
flats_duration.to_crs(epsg=4326, inplace=True)
rcps.to_crs(epsg=4326, inplace=True)

# Initialize BallTree
tree, rcp_coords, rcp_ids = util.initialize_ball_tree(rcps)

INFO:util:BallTree initialized with RCP coordinates.


In [3]:
# Identify flats with duration above threshold
iso_threshold = 10

# Filter flats with population > 0 and duration >= threshold
high_pop_unserved = flats_duration[
    (flats_duration['est_pop'] > 0) & 
    (flats_duration['duration'] >= iso_threshold)
].copy()

# Ensure GeoDataFrame consistency
high_pop_unserved = gpd.GeoDataFrame(high_pop_unserved, geometry='geometry', crs="EPSG:4326")

# Export to file
high_pop_unserved.to_file(
    '../data/derived_data/high_pop_unserved_with_durations.gpkg', 
    driver='GPKG'
)

In [4]:

# convert to WGS84
high_pop_unserved = gpd.GeoDataFrame(high_pop_unserved, geometry='geometry', crs="EPSG:4326")

# Ensure the centroid calculation does not raise a warning
high_pop_unserved = high_pop_unserved.set_geometry(high_pop_unserved.geometry.centroid)

definedcoords = high_pop_unserved.geometry
X = pd.DataFrame({
    'x': definedcoords.x,
    'y': definedcoords.y,
    'population': high_pop_unserved['est_pop']
})

# Step 3: Apply DBSCAN clustering
db = DBSCAN(eps=0.005, min_samples=20).fit(X[['x', 'y']])
X['cluster'] = db.labels_

# Remove noise points
clusters = X[X['cluster'] != -1]

# Step 4: Calculate cluster centers weighted by population
cluster_centers = clusters.groupby('cluster').apply(
    lambda df: pd.Series({
        'x': (df['x'] * df['population']).sum() / df['population'].sum(),
        'y': (df['y'] * df['population']).sum() / df['population'].sum()
    })
).reset_index()

# Step 5: Create GeoDataFrame for new collection points
new_points = gpd.GeoDataFrame(
    cluster_centers,
    geometry=gpd.points_from_xy(cluster_centers['x'], cluster_centers['y']),
    crs="EPSG:4326"
)


  high_pop_unserved = high_pop_unserved.set_geometry(high_pop_unserved.geometry.centroid)
  cluster_centers = clusters.groupby('cluster').apply(


In [5]:
# Step 6: Plotting
m = folium.Map(location=[47.3769, 8.5417], zoom_start=13)

# Add existing collection points
""" for _, row in rcps.iterrows():
    folium.Marker(
        location=[row.geometry.y, row.geometry.x],
        popup=row['adresse'],
        icon=folium.Icon(color='green', icon='recycle', prefix='fa')
    ).add_to(m) """

#add high population unserved flats
for _, row in high_pop_unserved.iterrows():
    folium.CircleMarker(
        location=[row.geometry.y, row.geometry.x],
        popup=f"Flat ID: {row['egid']}<br>Duration: {row['duration']:.2f} min, Population: {row['est_pop']}",
        color='red',
        radius=5,
        fill=True
    ).add_to(m)


# Add new collection points
for _, row in new_points.iterrows():
    folium.Marker(
        location=[row.geometry.y, row.geometry.x],
        popup='New Collection Point',
        icon=folium.Icon(color='blue', icon='plus', prefix='fa')
    ).add_to(m)

m

In [6]:

# Add new_points to the rcps GeoDataFrame
rcps_new = pd.concat([rcps, new_points], ignore_index=True)


In [7]:
# Initialize BallTree with rcps_new
tree_new, rcp_coords_new, rcp_ids_new = util.initialize_ball_tree(rcps_new)

# Calculate durations for each flat
flats_duration_new = flats_duration.copy()
flats_duration_new['nearest_rcp_id'], flats_duration_new['duration'] = zip(
    *flats_duration_new['geometry'].apply(
        lambda geom: util.find_nearest_rcp_duration(geom, tree_new, rcp_coords_new, rcp_ids_new, client)
    )
)

# Calculate impact metrics
flats_duration_new['impact'] = flats_duration_new['est_pop'] * flats_duration_new['duration']
flats_duration_new['impact_log'] = np.log1p(flats_duration_new['impact'])


INFO:util:BallTree initialized with RCP coordinates.
ERROR:util:Unexpected error for origin (8.544431690844714, 47.3759246992302) to destination (8.540993522800246, 47.37462938371909): HTTPConnectionPool(host='localhost', port=8080): Max retries exceeded with url: /ors/v2/directions/foot-walking/geojson (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x786c54525940>: Failed to establish a new connection: [Errno 111] Connection refused'))
ERROR:util:Unexpected error for origin (8.544431690844714, 47.3759246992302) to destination (8.54659529197587, 47.37986835132213): HTTPConnectionPool(host='localhost', port=8080): Max retries exceeded with url: /ors/v2/directions/foot-walking/geojson (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x786c51a25580>: Failed to establish a new connection: [Errno 111] Connection refused'))
ERROR:util:Unexpected error for origin (8.544431690844714, 47.3759246992302) to destination (8.547035612308706, 47

KeyboardInterrupt: 

In [40]:
# Convert weighted average time to minutes and seconds
def convert_to_minutes_seconds(time_in_minutes):
	minutes = int(time_in_minutes)
	seconds = int((time_in_minutes - minutes) * 60)
	return minutes, seconds

# Calculate metrics for new collection points
flats_duration_new['weighted_time'] = flats_duration_new['duration'] * flats_duration_new['est_pop']
weighted_average_time_new = flats_duration_new['weighted_time'].sum() / flats_duration_new['est_pop'].sum()

# Calculate metrics for original data
flats_duration['weighted_time'] = flats_duration['duration'] * flats_duration['est_pop']
weighted_average_time_old = flats_duration['weighted_time'].sum() / flats_duration['est_pop'].sum()

# Calculate unserved population (≥ 10 minutes)
unserved_pop_new = flats_duration_new[flats_duration_new['duration'] >= 10]['est_pop'].sum()
unserved_pop_old = flats_duration[flats_duration['duration'] >= 10]['est_pop'].sum()

# Convert to minutes:seconds format
minutes_new, seconds_new = convert_to_minutes_seconds(weighted_average_time_new)
minutes_old, seconds_old = convert_to_minutes_seconds(weighted_average_time_old)

# Print results
print(f"New RCPs installed: {len(new_points)}")
print(f"\nComparison of metrics:")
print(f"Original weighted average walking time: {minutes_old}:{seconds_old:02d}")
print(f"New weighted average walking time: {minutes_new}:{seconds_new:02d}")
print(f"\nOriginal population with walking time >= 10 minutes: {unserved_pop_old:.0f}")
print(f"New population with walking time >= 10 minutes: {unserved_pop_new:.0f}")
print(f"\nPercentage of total population with walking time >= 10 minutes:")
total_pop = flats_duration['est_pop'].sum()
print(f"Original: {(unserved_pop_old/total_pop)*100:.1f}%")
print(f"New: {(unserved_pop_new/total_pop)*100:.1f}%")


New RCPs installed: 12

Comparison of metrics:
Original weighted average walking time: 4:06
New weighted average walking time: 3:51

Original population with walking time >= 10 minutes: 9183
New population with walking time >= 10 minutes: 1933

Percentage of total population with walking time >= 10 minutes:
Original: 2.1%
New: 0.4%
