In [23]:
import geopandas as gpd
import pandas as pd
from sklearn.cluster import DBSCAN
import folium
import branca.colormap as cm  # Add this import
from folium.plugins import HeatMap
from folium.plugins import MarkerCluster
from shapely.ops import unary_union
import os
import openrouteservice
from sklearn.neighbors import BallTree
from shapely.geometry import Point
import logging
from shapely.geometry import shape
import numpy as np
import util
# get ORS key
ors_key = os.getenv('ORS_API_KEY')
client = openrouteservice.Client(key=ors_key)


In [2]:
# Import flats with population
flats_with_pop = gpd.read_file('/home/silas/projects/msc_thesis/data/derived_data/flats_population.gpkg')

# Import isochrones
merged_isochrones_gdf= gpd.read_file('/home/silas/projects/msc_thesis/data/derived_data/isochrones_1-10min.gpkg')
# Load or create rcps GeoDataFrame
rcps=gpd.read_file('/home/silas/projects/msc_thesis/data/raw_data/geodata_stadt_Zuerich/recycling_sammelstellen/data/stzh.poi_sammelstelle_view.shp')
flats_with_pop.to_crs(epsg=4326, inplace=True)
rcps.to_crs(epsg=4326, inplace=True)

# Initialize BallTree
tree, rcp_coords, rcp_ids = util.initialize_ball_tree(rcps)



INFO:util:BallTree initialized with RCP coordinates.


In [None]:
# Streamlined Processing of High Population Unserved Flats

# Remove flats with population 0
flats_with_pop = flats_with_pop[flats_with_pop['est_pop'] > 0]

# Verify 'time' column exists
if 'time' not in merged_isochrones_gdf.columns:
    raise KeyError("'time' column is missing in merged_isochrones_gdf")

# Spatial join to retain all flats
joined = gpd.sjoin(
    flats_with_pop, 
    merged_isochrones_gdf[['geometry', 'time']], 
    how='left', 
    predicate='within'
)

# Assign default high time value to unserved flats
iso_threshold = 10
joined['time'] = joined['time'].fillna(iso_threshold + 1)

# Get the shortest time for each flat
joined = joined.groupby('egid', as_index=False).agg({
    'est_pop': 'first',
    'geometry': 'first',
    'time': 'min'
})

# Identify unserved flats
high_pop_unserved = joined[joined['time'] >= iso_threshold].copy()

# Calculate duration to the nearest RCP
high_pop_unserved[['nearest_rcp_id', 'duration_to_rcp_min']] = high_pop_unserved['geometry'].apply(
    lambda geom: util.find_nearest_rcp_duration(geom, tree, rcp_coords, rcp_ids, client)
).apply(pd.Series)

# Update 'time' with the calculated duration
joined.loc[joined['time'] >= iso_threshold, 'time'] = high_pop_unserved['duration_to_rcp_min'].values

# Ensure GeoDataFrame consistency
high_pop_unserved = gpd.GeoDataFrame(high_pop_unserved, geometry='geometry', crs="EPSG:4326")

# Export to file
high_pop_unserved.to_file(
    '/home/silas/projects/msc_thesis/data/derived_data/high_pop_unserved_with_durations.gpkg', 
    driver='GPKG'
)

# Optional: Verify the first few entries
high_pop_unserved[['egid', 'nearest_rcp_id', 'duration_to_rcp_min']].head()

# Optional: Update 'time' column in 'high_pop_unserved'
high_pop_unserved['time']=high_pop_unserved['duration_to_rcp_min']
high_pop_unserved.drop(columns=['nearest_rcp_id', 'duration_to_rcp_min'], inplace=True)

INFO:pyogrio._io:Created 696 records


Unnamed: 0,egid,nearest_rcp_id,duration_to_rcp_min
1231,141692.0,sa42624,11.61
1232,141693.0,sa42624,10.98
1233,141694.0,sa42624,12.15
1234,141696.0,sa42624,11.96
1235,141697.0,sa42624,13.05


In [18]:
high_pop_unserved = gpd.GeoDataFrame(high_pop_unserved, geometry='geometry', crs="EPSG:4326")

# Ensure the centroid calculation does not raise a warning
high_pop_unserved = high_pop_unserved.set_geometry(high_pop_unserved.geometry.centroid)

definedcoords = high_pop_unserved.geometry
X = pd.DataFrame({
    'x': definedcoords.x,
    'y': definedcoords.y,
    'population': high_pop_unserved['est_pop']
})

# Step 3: Apply DBSCAN clustering
db = DBSCAN(eps=0.005, min_samples=10).fit(X[['x', 'y']])
X['cluster'] = db.labels_

# Remove noise points
clusters = X[X['cluster'] != -1]

# Step 4: Calculate cluster centers weighted by population
cluster_centers = clusters.groupby('cluster').apply(
    lambda df: pd.Series({
        'x': (df['x'] * df['population']).sum() / df['population'].sum(),
        'y': (df['y'] * df['population']).sum() / df['population'].sum()
    })
).reset_index()

# Step 5: Create GeoDataFrame for new collection points
new_points = gpd.GeoDataFrame(
    cluster_centers,
    geometry=gpd.points_from_xy(cluster_centers['x'], cluster_centers['y']),
    crs="EPSG:4326"
)


  high_pop_unserved = high_pop_unserved.set_geometry(high_pop_unserved.geometry.centroid)
  cluster_centers = clusters.groupby('cluster').apply(


In [21]:
# Step 6: Plotting
m = folium.Map(location=[47.3769, 8.5417], zoom_start=13)

# Add existing collection points
""" for _, row in rcps.iterrows():
    folium.Marker(
        location=[row.geometry.y, row.geometry.x],
        popup=row['adresse'],
        icon=folium.Icon(color='green', icon='recycle', prefix='fa')
    ).add_to(m) """

#add high population unserved flats
for _, row in high_pop_unserved.iterrows():
    folium.CircleMarker(
        location=[row.geometry.y, row.geometry.x],
        popup=f"Flat ID: {row['egid']}<br>Duration: {row['time']:.2f} min, Population: {row['est_pop']}",
        color='red',
        radius=5,
        fill=True

    ).add_to(m)


# Add new collection points
for _, row in new_points.iterrows():
    folium.Marker(
        location=[row.geometry.y, row.geometry.x],
        popup='New Collection Point',
        icon=folium.Icon(color='blue', icon='plus', prefix='fa')
    ).add_to(m)
m.save('/home/silas/projects/msc_thesis/data/derived_data/new_collection_points_dbscan.html')
m

In [24]:
# Initialize ORS client
client = openrouteservice.Client(key=ors_key)

def generate_isochrones(client, locations, time_limit):
    params = {
        "locations": [locations],
        "range": [time_limit],
        "range_type": "time",
        "location_type": "start",
        "smoothing": 0.3,
        "profile": "foot-walking",
    }
    isochrones = client.isochrones(**params)
    return isochrones

# Generate isochrones for new points
new_isochrones = []
time_limits = [60, 120, 180, 240, 300, 360, 420, 480, 540, 600]  # 10 minutes

for time_limit in time_limits:
    for _, row in new_points.iterrows():
        lon, lat = row.geometry.x, row.geometry.y
        isochrone = generate_isochrones(client, [lon, lat], time_limit)
        if isochrone:
            for feature in isochrone['features']:
                new_isochrones.append({
                    'geometry': shape(feature['geometry']),
                    'time': time_limit / 60,
                    'rcp_id': f"new_{row['cluster']}"
                })

# Create GeoDataFrame for new isochrones
new_isochrones_gdf = gpd.GeoDataFrame(new_isochrones, crs="EPSG:4326")




In [26]:
#import existing isochrones
existing_isochrones = gpd.read_file('/home/silas/projects/msc_thesis/data/derived_data/isochrones_1-10min.gpkg')

# Merge existing and new isochrones
isochrones_gdf = pd.concat([existing_isochrones, new_isochrones_gdf], ignore_index=True)

def merge_isochrones_preserve_time(isochrones_gdf):
    """
    Merge isochrones preserving lower time values.

    Parameters:
    - isochrones_gdf: GeoDataFrame with isochrones and 'time' attribute.

    Returns:
    - GeoDataFrame with merged isochrones.
    """
    # Ensure CRS is EPSG:4326
    if isochrones_gdf.crs != "EPSG:4326":
        isochrones_gdf = isochrones_gdf.to_crs(epsg=4326)

    # Sort isochrones by 'time' ascending
    isochrones_sorted = isochrones_gdf.sort_values(by='time')

    merged_isochrones = gpd.GeoDataFrame(columns=isochrones_sorted.columns, crs="EPSG:4326")

    # Initialize an empty geometry for subtraction
    accumulated_geom = None

    for _, row in isochrones_sorted.iterrows():
        current_geom = row.geometry
        current_time = row['time']

        if accumulated_geom:
            remaining_geom = current_geom.difference(accumulated_geom)
        else:
            remaining_geom = current_geom

        if not remaining_geom.is_empty:
            new_row = row.copy()
            new_row.geometry = remaining_geom
            # Ensure the new_row GeoDataFrame has the correct CRS
            new_row = gpd.GeoDataFrame([new_row], crs="EPSG:4326")
            merged_isochrones = pd.concat([merged_isochrones, new_row], ignore_index=True)
            # Update accumulated geometry
            if accumulated_geom:
                accumulated_geom = unary_union([accumulated_geom, remaining_geom])
            else:
                accumulated_geom = remaining_geom
    return merged_isochrones

# Merge isochrones
merged_isochrones_gdf = merge_isochrones_preserve_time(isochrones_gdf)
old_ischrones_merged = merge_isochrones_preserve_time(existing_isochrones)

  merged_isochrones = pd.concat([merged_isochrones, new_row], ignore_index=True)
  merged_isochrones = pd.concat([merged_isochrones, new_row], ignore_index=True)


In [29]:
# Function to prepare heatmap data
merged_isochrones_gdf.to_crs(epsg=4326, inplace=True)
def prepare_heatmap_data(merged_isochrones_gdf):
    """
    Prepare heatmap data from merged isochrones.

    Parameters:
    - merged_isochrones_gdf: GeoDataFrame with merged isochrones.

    Returns:
    - List of [latitude, longitude, intensity] for HeatMap.
    """
    heat_data = []
    for _, row in merged_isochrones_gdf.iterrows():
        if 'time' not in row:
            print("Missing 'time' column in merged_isochrones_gdf")
            continue
        centroid = row.geometry.centroid
        lat, lon = centroid.y, centroid.x
        intensity = row['time'] / 60  # Convert time to minutes for intensity
        heat_data.append([lat, lon, intensity])
    return heat_data


In [47]:
heat_data = prepare_heatmap_data(merged_isochrones_gdf)

# Initialize Folium map centered around Zurich
m = folium.Map(location=[47.3769, 8.5417], zoom_start=13)

# Create a continuous color map for the heatmap
colormap = cm.linear.viridis.scale(0, 10)  # Changed color scheme to Viridis
colormap.caption = 'Isochrone Time (minutes)'
colormap.add_to(m)
# Add merged isochrones to the map
for _, row in merged_isochrones_gdf.iterrows():
    folium.GeoJson(
        row['geometry'],
        name=f"Isochrone {row['time']} min",
        control=False,
        style_function=lambda feature, time=row['time']: {
            'fillColor': colormap(time),
            'color': colormap(time),
            'weight': 1,
            'fillOpacity': 0.8,
        }
    ).add_to(m)

# Add recycling collection points to the map using MarkerCluster
marker_cluster = MarkerCluster(name='Existing Collection Points').add_to(m)
for _, row in rcps.iterrows():
    folium.Marker(
        location=[row.geometry.y, row.geometry.x],
        popup=row['adresse'],
        icon=folium.Icon(color='green', icon='recycle', prefix='fa')
    ).add_to(marker_cluster)

# Add new collection points to a separate feature group
new_points_group = folium.FeatureGroup(name='New Collection Points')
for _, row in new_points.iterrows():
    folium.Marker(
        location=[row.geometry.y, row.geometry.x],
        popup='New Collection Point',
        icon=folium.Icon(color='blue', icon='plus', prefix='fa')
    ).add_to(new_points_group)

# Add the new points group to the map
new_points_group.add_to(m)

# Add title to the map
title_html = '''
             <h3 align="center" style="font-size:20px"><b>Isochrones and Recycling Collection Points</b></h3>
             '''
m.get_root().html.add_child(folium.Element(title_html))

# LayerControl to toggle between layers
folium.LayerControl().add_to(m)

# Add legend
legend_html = '''
<div style="position: fixed; 
            bottom: 30px; left: 30px; width: 180; height: 100px; 
            border:2px solid black; z-index:9999; font-size:14px;
            background-color:white; opacity: 0.8;
            padding: 8px;">
    <b>Legend</b><br>
    <i class="fa fa-recycle fa-2x" style="color:green"></i> Existing Collection Points<br>
    <i class="fa fa-plus fa-2x" style="color:blue"></i> New Collection Points
</div>
'''
m.get_root().html.add_child(folium.Element(legend_html))

heatmap_path = '/home/silas/projects/msc_thesis/data/derived_data/heatmap_test_after_cluster.html'
m.save(heatmap_path)
m


In [31]:
# Prepare heatmap data for existing isochrones only
heat_data_existing = prepare_heatmap_data(old_ischrones_merged)

# Initialize Folium map centered around Zurich
m_existing = folium.Map(location=[47.3769, 8.5417], zoom_start=13)

# Create a continuous color map for the heatmap
colormap_existing = cm.linear.viridis.scale(0, 10)  # Changed color scheme to Viridis
colormap_existing.caption = 'Isochrone Time (minutes)'
colormap_existing.add_to(m_existing)

# Add merged existing isochrones to the map
for _, row in old_ischrones_merged.iterrows():
    folium.GeoJson(
        row['geometry'],
        name=f"Isochrone {row['time']} min",
        style_function=lambda feature, time=row['time']: {
            'fillColor': colormap_existing(time),
            'color': colormap_existing(time),
            'weight': 1,
            'fillOpacity': 0.9,
        }
    ).add_to(m_existing)

# Add recycling collection points to the map using MarkerCluster
marker_cluster_existing = MarkerCluster().add_to(m_existing)
for _, row in rcps.iterrows():
    folium.Marker(
        location=[row.geometry.y, row.geometry.x],
        popup=row['adresse'],
        icon=folium.Icon(color='green', icon='recycle', prefix='fa')
    ).add_to(marker_cluster_existing)

# Add title to the map
title_html = '''
             <h3 align="center" style="font-size:20px"><b>Existing Isochrones and Recycling Collection Points</b></h3>
             '''
m_existing.get_root().html.add_child(folium.Element(title_html))

# Save and display the map
heatmap_path_existing = '/home/silas/projects/msc_thesis/data/derived_data/heatmap_existing_isochrones.html'
m_existing.save(heatmap_path_existing)
m_existing

In [51]:
def calculate_weighted_average_time(flats_with_pop, isochrones_gdf):
    """
    Calculate the population-weighted average time for given isochrones.

    Parameters:
    - flats_with_pop: GeoDataFrame with flats and population data.
    - isochrones_gdf: GeoDataFrame with isochrones and 'time' attribute.

    Returns:
    - Weighted average time.
    """
    joined = gpd.sjoin(flats_with_pop, isochrones_gdf, how='inner', predicate='within')
    population_per_isochrone = joined.groupby('time')['est_pop'].sum().reset_index()
    total_population = population_per_isochrone['est_pop'].sum()
    population_per_isochrone['weighted_time'] = population_per_isochrone['time'] * population_per_isochrone['est_pop']
    weighted_average_time = population_per_isochrone['weighted_time'].sum() / total_population
    return weighted_average_time

# Calculate weighted average time for new isochrones
weighted_average_time_new = calculate_weighted_average_time(flats_with_pop, merged_isochrones_gdf)

# Calculate weighted average time for old isochrones
weighted_average_time_old = calculate_weighted_average_time(flats_with_pop, old_ischrones_merged)

# Convert weighted average time to minutes and seconds
def convert_to_minutes_seconds(time_in_minutes):
    minutes = int(time_in_minutes)
    seconds = int((time_in_minutes - minutes) * 60)
    return minutes, seconds

# Convert and print the results
minutes_new, seconds_new = convert_to_minutes_seconds(weighted_average_time_new)
minutes_old, seconds_old = convert_to_minutes_seconds(weighted_average_time_old)

print(f"Population-weighted average time with new isochrones: {minutes_new} minutes and {seconds_new} seconds")
print(f"Population-weighted average time with old isochrones: {minutes_old} minutes and {seconds_old} seconds")

Population-weighted average time with new isochrones: 3 minutes and 50 seconds
Population-weighted average time with old isochrones: 3 minutes and 59 seconds


In [58]:
# With new points
joined = gpd.sjoin(flats_with_pop, merged_isochrones_gdf[['geometry', 'time']], how='inner', predicate='within')
people_10min_or_longer_with_new = joined[joined['time'] >= 10]['est_pop'].sum()
total_population_with_new = joined['est_pop'].sum()
percentage_10min_or_longer_with_new = (people_10min_or_longer_with_new / total_population_with_new) * 100

print(f"Number of people who have 10 minutes or longer to the nearest RCP with new points: {people_10min_or_longer_with_new:.2f}")
print(f"Percentage of people who have 10 minutes or longer to the nearest RCP with new points: {percentage_10min_or_longer_with_new:.2f}%")

# Without new points
joined_existing = gpd.sjoin(flats_with_pop, old_ischrones_merged[['geometry', 'time']], how='inner', predicate='within')
people_10min_or_longer_without_new = joined_existing[joined_existing['time'] >= 10]['est_pop'].sum()
total_population_without_new = joined_existing['est_pop'].sum()
percentage_10min_or_longer_without_new = (people_10min_or_longer_without_new / total_population_without_new) * 100

print(f"Number of people who have 10 minutes or longer to the nearest RCP without new points: {people_10min_or_longer_without_new:.2f}")
print(f"Percentage of people who have 10 minutes or longer to the nearest RCP without new points: {percentage_10min_or_longer_without_new:.2f}%")


Number of people who have 10 minutes or longer to the nearest RCP with new points: 526.30
Percentage of people who have 10 minutes or longer to the nearest RCP with new points: 0.12%
Number of people who have 10 minutes or longer to the nearest RCP without new points: 3802.01
Percentage of people who have 10 minutes or longer to the nearest RCP without new points: 0.86%
