In [31]:
import geopandas as gpd
import pandas as pd
from sklearn.cluster import DBSCAN
import folium
import branca.colormap as cm  # Add this import
from folium.plugins import HeatMap
from folium.plugins import MarkerCluster
from shapely.ops import unary_union
import os
import openrouteservice
from sklearn.neighbors import BallTree
from shapely.geometry import Point
import logging
from shapely.geometry import shape
import numpy as np
import util
# get ORS key
ors_key = os.getenv('ORS_API_KEY')
client = openrouteservice.Client(base_url='http://localhost:8080/ors')


In [32]:
# import data
flats_duration = gpd.read_file('../data/derived_data/flats_duration.gpkg')
rcps = gpd.read_file('../data/raw_data/geodata_stadt_Zuerich/recycling_sammelstellen/data/stzh.poi_sammelstelle_view.shp')

# Convert to WGS84
flats_duration.to_crs(epsg=4326, inplace=True)
rcps.to_crs(epsg=4326, inplace=True)

# Initialize BallTree
tree, rcp_coords, rcp_ids = util.initialize_ball_tree(rcps)

INFO:util:BallTree initialized with RCP coordinates.


In [33]:
# Identify flats with duration above threshold
iso_threshold = 10

# Filter flats with population > 0 and duration >= threshold
high_pop_unserved = flats_duration[
    (flats_duration['est_pop'] > 0) & 
    (flats_duration['duration'] >= iso_threshold)
].copy()

# Ensure GeoDataFrame consistency
high_pop_unserved = gpd.GeoDataFrame(high_pop_unserved, geometry='geometry', crs="EPSG:4326")

# Export to file
high_pop_unserved.to_file(
    '../data/derived_data/high_pop_unserved_with_durations.gpkg', 
    driver='GPKG'
)

In [34]:

# convert to WGS84
high_pop_unserved = gpd.GeoDataFrame(high_pop_unserved, geometry='geometry', crs="EPSG:4326")

# Ensure the centroid calculation does not raise a warning
high_pop_unserved = high_pop_unserved.set_geometry(high_pop_unserved.geometry.centroid)

definedcoords = high_pop_unserved.geometry
X = pd.DataFrame({
    'x': definedcoords.x,
    'y': definedcoords.y,
    'population': high_pop_unserved['est_pop']
})

# Step 3: Apply DBSCAN clustering
db = DBSCAN(eps=0.005, min_samples=20).fit(X[['x', 'y']])
X['cluster'] = db.labels_

# Remove noise points
clusters = X[X['cluster'] != -1]

# Step 4: Calculate cluster centers weighted by population
cluster_centers = clusters.groupby('cluster').apply(
    lambda df: pd.Series({
        'x': (df['x'] * df['population']).sum() / df['population'].sum(),
        'y': (df['y'] * df['population']).sum() / df['population'].sum()
    })
).reset_index()

# Step 5: Create GeoDataFrame for new collection points
new_points = gpd.GeoDataFrame(
    cluster_centers,
    geometry=gpd.points_from_xy(cluster_centers['x'], cluster_centers['y']),
    crs="EPSG:4326"
)


  high_pop_unserved = high_pop_unserved.set_geometry(high_pop_unserved.geometry.centroid)
  cluster_centers = clusters.groupby('cluster').apply(


In [35]:
# Step 6: Plotting
m = folium.Map(location=[47.3769, 8.5417], zoom_start=13)

# Add existing collection points
""" for _, row in rcps.iterrows():
    folium.Marker(
        location=[row.geometry.y, row.geometry.x],
        popup=row['adresse'],
        icon=folium.Icon(color='green', icon='recycle', prefix='fa')
    ).add_to(m) """

#add high population unserved flats
for _, row in high_pop_unserved.iterrows():
    folium.CircleMarker(
        location=[row.geometry.y, row.geometry.x],
        popup=f"Flat ID: {row['egid']}<br>Duration: {row['duration']:.2f} min, Population: {row['est_pop']}",
        color='red',
        radius=5,
        fill=True
    ).add_to(m)


# Add new collection points
for _, row in new_points.iterrows():
    folium.Marker(
        location=[row.geometry.y, row.geometry.x],
        popup='New Collection Point',
        icon=folium.Icon(color='blue', icon='plus', prefix='fa')
    ).add_to(m)

m

In [36]:

# Add new_points to the rcps GeoDataFrame
rcps_new = pd.concat([rcps, new_points], ignore_index=True)


In [38]:
# Initialize BallTree with rcps_new
tree_new, rcp_coords_new, rcp_ids_new = util.initialize_ball_tree(rcps_new)

# Calculate durations for each flat
flats_duration_new = flats_duration.copy()
flats_duration_new['nearest_rcp_id'], flats_duration_new['duration'] = zip(
    *flats_duration_new['geometry'].apply(
        lambda geom: util.find_nearest_rcp_duration(geom, tree_new, rcp_coords_new, rcp_ids_new, client)
    )
)

# Calculate impact metrics
flats_duration_new['impact'] = flats_duration_new['est_pop'] * flats_duration_new['duration']
flats_duration_new['impact_log'] = np.log1p(flats_duration_new['impact'])


INFO:util:BallTree initialized with RCP coordinates.


In [40]:
# Convert weighted average time to minutes and seconds
def convert_to_minutes_seconds(time_in_minutes):
	minutes = int(time_in_minutes)
	seconds = int((time_in_minutes - minutes) * 60)
	return minutes, seconds

# Calculate metrics for new collection points
flats_duration_new['weighted_time'] = flats_duration_new['duration'] * flats_duration_new['est_pop']
weighted_average_time_new = flats_duration_new['weighted_time'].sum() / flats_duration_new['est_pop'].sum()

# Calculate metrics for original data
flats_duration['weighted_time'] = flats_duration['duration'] * flats_duration['est_pop']
weighted_average_time_old = flats_duration['weighted_time'].sum() / flats_duration['est_pop'].sum()

# Calculate unserved population (≥ 10 minutes)
unserved_pop_new = flats_duration_new[flats_duration_new['duration'] >= 10]['est_pop'].sum()
unserved_pop_old = flats_duration[flats_duration['duration'] >= 10]['est_pop'].sum()

# Convert to minutes:seconds format
minutes_new, seconds_new = convert_to_minutes_seconds(weighted_average_time_new)
minutes_old, seconds_old = convert_to_minutes_seconds(weighted_average_time_old)

# Print results
print(f"New RCPs installed: {len(new_points)}")
print(f"\nComparison of metrics:")
print(f"Original weighted average walking time: {minutes_old}:{seconds_old:02d}")
print(f"New weighted average walking time: {minutes_new}:{seconds_new:02d}")
print(f"\nOriginal population with walking time >= 10 minutes: {unserved_pop_old:.0f}")
print(f"New population with walking time >= 10 minutes: {unserved_pop_new:.0f}")
print(f"\nPercentage of total population with walking time >= 10 minutes:")
total_pop = flats_duration['est_pop'].sum()
print(f"Original: {(unserved_pop_old/total_pop)*100:.1f}%")
print(f"New: {(unserved_pop_new/total_pop)*100:.1f}%")


New RCPs installed: 12

Comparison of metrics:
Original weighted average walking time: 4:06
New weighted average walking time: 3:51

Original population with walking time >= 10 minutes: 9183
New population with walking time >= 10 minutes: 1933

Percentage of total population with walking time >= 10 minutes:
Original: 2.1%
New: 0.4%


In [29]:
def generate_isochrones(client, locations, time_limit):
    params = {
        "locations": [locations],
        "range": [time_limit],
        "range_type": "time",
        "location_type": "start",
        "smoothing": 0.3,
        "profile": "foot-walking",
    }
    isochrones = client.isochrones(**params)
    return isochrones

# Generate isochrones for new points
new_isochrones = []
time_limits = [60, 120, 180, 240, 300, 360, 420, 480, 540, 600]  # 10 minutes

for time_limit in time_limits:
    for _, row in new_points.iterrows():
        lon, lat = row.geometry.x, row.geometry.y
        isochrone = generate_isochrones(client, [lon, lat], time_limit)
        if isochrone:
            for feature in isochrone['features']:
                new_isochrones.append({
                    'geometry': shape(feature['geometry']),
                    'time': time_limit / 60,
                    'rcp_id': f"new_{row['cluster']}"
                })

# Create GeoDataFrame for new isochrones
new_isochrones_gdf = gpd.GeoDataFrame(new_isochrones, crs="EPSG:4326")


In [27]:
#import existing isochrones
existing_isochrones = gpd.read_file('../data/derived_data/isochrones_all.gpkg')

# Merge existing and new isochrones
isochrones_gdf = pd.concat([existing_isochrones, new_isochrones_gdf], ignore_index=True)

def merge_isochrones_preserve_time(isochrones_gdf):
    """
    Merge isochrones preserving lower time values.

    Parameters:
    - isochrones_gdf: GeoDataFrame with isochrones and 'time' attribute.

    Returns:
    - GeoDataFrame with merged isochrones.
    """
    # Ensure CRS is EPSG:4326
    if isochrones_gdf.crs != "EPSG:4326":
        isochrones_gdf = isochrones_gdf.to_crs(epsg=4326)

    # Sort isochrones by 'time' ascending
    isochrones_sorted = isochrones_gdf.sort_values(by='time')

    merged_isochrones = gpd.GeoDataFrame(columns=isochrones_sorted.columns, crs="EPSG:4326")

    # Initialize an empty geometry for subtraction
    accumulated_geom = None

    for _, row in isochrones_sorted.iterrows():
        current_geom = row.geometry
        current_time = row['time']

        if accumulated_geom:
            remaining_geom = current_geom.difference(accumulated_geom)
        else:
            remaining_geom = current_geom

        if not remaining_geom.is_empty:
            new_row = row.copy()
            new_row.geometry = remaining_geom
            # Ensure the new_row GeoDataFrame has the correct CRS
            new_row = gpd.GeoDataFrame([new_row], crs="EPSG:4326")
            merged_isochrones = pd.concat([merged_isochrones, new_row], ignore_index=True)
            # Update accumulated geometry
            if accumulated_geom:
                accumulated_geom = unary_union([accumulated_geom, remaining_geom])
            else:
                accumulated_geom = remaining_geom
    return merged_isochrones

# Merge isochrones
merged_isochrones_gdf = merge_isochrones_preserve_time(isochrones_gdf)
old_ischrones_merged = merge_isochrones_preserve_time(existing_isochrones)

  merged_isochrones = pd.concat([merged_isochrones, new_row], ignore_index=True)


Unnamed: 0,egid,est_pop,nearest_rcp_id,duration,impact,impact_log,geometry
0,141117.0,5.910448,sa46864,10.08,59.577313,4.103920,POINT (8.52905 47.35190)
1,141118.0,18.241935,sa46864,10.11,184.425968,5.222656,POINT (8.52944 47.35161)
2,141689.0,4.000000,sa42624,10.28,41.120000,3.740523,POINT (8.53714 47.34147)
3,141692.0,1.944444,sa42624,11.61,22.575000,3.160187,POINT (8.53858 47.33969)
4,141693.0,1.944444,sa42624,10.98,21.350000,3.106826,POINT (8.53841 47.33947)
...,...,...,...,...,...,...,...
881,302065759.0,3.162162,sa42985,10.28,32.507027,3.511755,POINT (8.50486 47.39770)
882,302065782.0,21.012888,sa65124,10.98,230.721514,5.445536,POINT (8.52554 47.34018)
883,302065793.0,10.823970,sa42911,10.32,111.703371,4.724759,POINT (8.47852 47.38209)
884,302066058.0,3.183521,sa42911,10.35,32.949438,3.524872,POINT (8.47910 47.38185)


In [11]:
# Function to prepare heatmap data
merged_isochrones_gdf.to_crs(epsg=4326, inplace=True)
def prepare_heatmap_data(merged_isochrones_gdf):
    """
    Prepare heatmap data from merged isochrones.

    Parameters:
    - merged_isochrones_gdf: GeoDataFrame with merged isochrones.

    Returns:
    - List of [latitude, longitude, intensity] for HeatMap.
    """
    heat_data = []
    for _, row in merged_isochrones_gdf.iterrows():
        if 'time' not in row:
            print("Missing 'time' column in merged_isochrones_gdf")
            continue
        centroid = row.geometry.centroid
        lat, lon = centroid.y, centroid.x
        intensity = row['time'] / 60  # Convert time to minutes for intensity
        heat_data.append([lat, lon, intensity])
    return heat_data


In [8]:
# Convert time column to numeric and convert to minutes
merged_isochrones_gdf['time'] = pd.to_numeric(merged_isochrones_gdf['time']) / 60.0

heat_data = prepare_heatmap_data(merged_isochrones_gdf)

# Initialize Folium map centered around Zurich
m = folium.Map(location=[47.3769, 8.5417], zoom_start=13)

# Create a continuous color map using Viridis colorscheme
colormap = cm.linear.viridis.scale(
    vmin=0,
    vmax=10,
    caption='Walking Time (minutes)'
)

# Add merged isochrones to the map
for _, row in merged_isochrones_gdf.iterrows():
    time_minutes = row['time']
    folium.GeoJson(
        row['geometry'],
        name=f"Isochrone {time_minutes:.1f} min",
        control=False,
        style_function=lambda feature, time=time_minutes: {
            'fillColor': colormap(min(time, 10)),
            'color': 'none',
            'weight': 0.5,
            'fillOpacity': 0.5,
        }
    ).add_to(m)

# Add recycling collection points to the map using MarkerCluster
marker_cluster = MarkerCluster(name='Existing Collection Points').add_to(m)
for _, row in rcps.iterrows():
    folium.Marker(
        location=[row.geometry.y, row.geometry.x],
        popup=row['adresse'],
        icon=folium.Icon(color='green', icon='recycle', prefix='fa')
    ).add_to(marker_cluster)

# Add new collection points to a separate feature group
new_points_group = folium.FeatureGroup(name='New Collection Points')
for _, row in new_points.iterrows():
    folium.Marker(
        location=[row.geometry.y, row.geometry.x],
        popup='New Collection Point',
        icon=folium.Icon(color='blue', icon='plus', prefix='fa')
    ).add_to(new_points_group)

# Add the new points group to the map
new_points_group.add_to(m)

# Add title to the map
title_html = '''
             <h3 align="center" style="font-size:20px"><b>Isochrones and Recycling Collection Points</b></h3>
             '''
m.get_root().html.add_child(folium.Element(title_html))

# LayerControl to toggle between layers
folium.LayerControl().add_to(m)

# Add legend
legend_html = '''
<div style="position: fixed; 
            bottom: 30px; left: 30px; width: 180; height: 100px; 
            border:2px solid black; z-index:9999; font-size:14px;
            background-color:white; opacity: 0.8;
            padding: 8px;">
    <b>Legend</b><br>
    <i class="fa fa-recycle fa-2x" style="color:green"></i> Existing Collection Points<br>
    <i class="fa fa-plus fa-2x" style="color:blue"></i> New Collection Points
</div>
'''
m.get_root().html.add_child(folium.Element(legend_html))

m.save('../data/plots/isochrones_with_new_points.html')
m


NameError: name 'merged_isochrones_gdf' is not defined

In [19]:
# Create a copy of the existing RCPs and add a type column
old_rcps = rcps.copy()
old_rcps['rcp_type'] = 'existing'

# Prepare new RCPs with cluster information and type
new_rcps = new_points.copy()
new_rcps['rcp_type'] = 'new'
new_rcps['cluster_id'] = new_rcps['cluster']
new_rcps['adresse'] = 'New Collection Point'  # To match the schema of old_rcps

# Combine old and new RCPs
combined_rcps = pd.concat([
    old_rcps[['geometry', 'adresse', 'rcp_type']],
    new_rcps[['geometry', 'adresse', 'rcp_type']]
], ignore_index=True)
# Add poi_id column (sa1 to sa[n])
combined_rcps['poi_id'] = ['sa' + str(i) for i in range(1, len(combined_rcps) + 1)]

# Convert to GeoDataFrame and ensure correct CRS
combined_rcps = gpd.GeoDataFrame(combined_rcps, geometry='geometry', crs="EPSG:4326")

# Export to file
combined_rcps.to_file('../data/derived_data/combined_rcps.gpkg', driver='GPKG')

In [15]:
# Prepare heatmap data for existing isochrones only
heat_data_existing = prepare_heatmap_data(old_ischrones_merged)

# Initialize Folium map centered around Zurich
m_existing = folium.Map(location=[47.3769, 8.5417], zoom_start=13)

# Create a continuous color map for the heatmap
colormap_existing = cm.linear.viridis.scale(0, 10)  # Changed color scheme to Viridis
colormap_existing.caption = 'Isochrone Time (minutes)'
colormap_existing.add_to(m_existing)

# Add merged existing isochrones to the map
for _, row in old_ischrones_merged.iterrows():
    folium.GeoJson(
        row['geometry'],
        name=f"Isochrone {row['time']} min",
        style_function=lambda feature, time=row['time']: {
            'fillColor': colormap_existing(time),
            'color': colormap_existing(time),
            'weight': 1,
            'fillOpacity': 0.9,
        }
    ).add_to(m_existing)

# Add recycling collection points to the map using MarkerCluster
marker_cluster_existing = MarkerCluster().add_to(m_existing)
for _, row in rcps.iterrows():
    folium.Marker(
        location=[row.geometry.y, row.geometry.x],
        popup=row['adresse'],
        icon=folium.Icon(color='green', icon='recycle', prefix='fa')
    ).add_to(marker_cluster_existing)

# Add title to the map
title_html = '''
             <h3 align="center" style="font-size:20px"><b>Existing Isochrones and Recycling Collection Points</b></h3>
             '''
m_existing.get_root().html.add_child(folium.Element(title_html))

# Save and display the map
m_existing


In [1]:
def calculate_weighted_average_time(flats_with_pop, isochrones_gdf):
    """
    Calculate the population-weighted average time for given isochrones.

    Parameters:
    - flats_with_pop: GeoDataFrame with flats and population data.
    - isochrones_gdf: GeoDataFrame with isochrones and 'duration' attribute.

    Returns:
    - Weighted average time.
    """
    joined = gpd.sjoin(flats_with_pop, isochrones_gdf[['geometry', 'duration']], how='inner', predicate='within')
    population_per_isochrone = joined.groupby('duration')['est_pop'].sum().reset_index()
    total_population = population_per_isochrone['est_pop'].sum()
    population_per_isochrone['weighted_time'] = population_per_isochrone['duration'] * population_per_isochrone['est_pop']
    weighted_average_time = population_per_isochrone['weighted_time'].sum() / total_population
    return weighted_average_time

# Calculate weighted average time for new isochrones
weighted_average_time_new = calculate_weighted_average_time(flats_with_pop, merged_isochrones_gdf)

# Calculate weighted average time for old isochrones
weighted_average_time_old = calculate_weighted_average_time(flats_with_pop, old_ischrones_merged)


# Convert and print the results
minutes_new, seconds_new = convert_to_minutes_seconds(weighted_average_time_new)
minutes_old, seconds_old = convert_to_minutes_seconds(weighted_average_time_old)

print(f"Population-weighted average time with new isochrones: {minutes_new} minutes and {seconds_new} seconds")
print(f"Population-weighted average time with old isochrones: {minutes_old} minutes and {seconds_old} seconds")

NameError: name 'flats_with_pop' is not defined

In [30]:

# Convert weighted average time to minutes and seconds
def convert_to_minutes_seconds(time_in_minutes):
    minutes = int(time_in_minutes)
    seconds = int((time_in_minutes - minutes) * 60)
    return minutes, seconds

In [26]:
# Initialize BallTree with combined_rcps
tree_new, rcp_coords_new, rcp_ids_new = util.initialize_ball_tree(combined_rcps)

# Recalculate nearest RCP and duration for each flat
flats_duration_new = flats_duration.copy()
flats_duration_new['nearest_rcp_id'], flats_duration_new['duration'] = zip(
    *flats_duration_new['geometry'].apply(
        lambda geom: util.find_nearest_rcp_duration(geom, tree_new, rcp_coords_new, rcp_ids_new, client)
    )
)

# Calculate impact and impact_log
flats_duration_new['impact'] = flats_duration_new['est_pop'] * flats_duration_new['duration']
flats_duration_new['impact_log'] = np.log1p(flats_duration_new['impact'])

# Calculate weighted average time with new RCPs
flats_duration_new['weighted_time'] = flats_duration_new['duration'] * flats_duration_new['est_pop']
weighted_average_time_new = flats_duration_new['weighted_time'].sum() / flats_duration_new['est_pop'].sum()

# Calculate weighted average time with existing RCPs
flats_duration['weighted_time'] = flats_duration['duration'] * flats_duration['est_pop']
weighted_average_time_old = flats_duration['weighted_time'].sum() / flats_duration['est_pop'].sum()

# Convert to minutes and seconds
minutes_new, seconds_new = convert_to_minutes_seconds(weighted_average_time_new)
minutes_old, seconds_old = convert_to_minutes_seconds(weighted_average_time_old)
# Compare with previous weighted average time
print(f"New population-weighted average time: {minutes_new}:{seconds_new:02d}")
print(f"Previous population-weighted average time: {minutes_old}:{seconds_old:02d}")

# calculate number of people not wihin 10 minutes
unserved_pop_new = flats_duration_new[flats_duration_new['duration'] >= 10]['est_pop'].sum()
unserved_pop_old = flats_duration[flats_duration['duration'] >= 10]['est_pop'].sum()

# Compare with previous unserved population
print(f"New unserved population: {unserved_pop_new}")
print(f"Previous unserved population: {unserved_pop_old}")
print(f"Number of new collection points to be installed: {len(new_points)}")

NameError: name 'combined_rcps' is not defined

In [58]:
# With new points
joined = gpd.sjoin(flats_with_pop, merged_isochrones_gdf[['geometry', 'time']], how='inner', predicate='within')
people_10min_or_longer_with_new = joined[joined['time'] >= 10]['est_pop'].sum()
total_population_with_new = joined['est_pop'].sum()
percentage_10min_or_longer_with_new = (people_10min_or_longer_with_new / total_population_with_new) * 100

print(f"Number of people who have 10 minutes or longer to the nearest RCP with new points: {people_10min_or_longer_with_new:.2f}")
print(f"Percentage of people who have 10 minutes or longer to the nearest RCP with new points: {percentage_10min_or_longer_with_new:.2f}%")

# Without new points
joined_existing = gpd.sjoin(flats_with_pop, old_ischrones_merged[['geometry', 'time']], how='inner', predicate='within')
people_10min_or_longer_without_new = joined_existing[joined_existing['time'] >= 10]['est_pop'].sum()
total_population_without_new = joined_existing['est_pop'].sum()
percentage_10min_or_longer_without_new = (people_10min_or_longer_without_new / total_population_without_new) * 100

print(f"Number of people who have 10 minutes or longer to the nearest RCP without new points: {people_10min_or_longer_without_new:.2f}")
print(f"Percentage of people who have 10 minutes or longer to the nearest RCP without new points: {percentage_10min_or_longer_without_new:.2f}%")


Number of people who have 10 minutes or longer to the nearest RCP with new points: 526.30
Percentage of people who have 10 minutes or longer to the nearest RCP with new points: 0.12%
Number of people who have 10 minutes or longer to the nearest RCP without new points: 3802.01
Percentage of people who have 10 minutes or longer to the nearest RCP without new points: 0.86%
