In [1]:
import geopandas as gpd
import pandas as pd
from sklearn.cluster import DBSCAN
import folium
import branca.colormap as cm  # Add this import
from folium.plugins import HeatMap
from folium.plugins import MarkerCluster
from shapely.ops import unary_union
import os
import openrouteservice
from sklearn.neighbors import BallTree
from shapely.geometry import Point
import logging
import numpy as np
import util
# get ORS key
ors_key = os.getenv('ORS_API_KEY')
client = openrouteservice.Client(key=ors_key)


In [2]:
# Import flats with population
flats_with_pop = gpd.read_file('/home/silas/projects/msc_thesis/data/derived_data/flats_population.gpkg')

# Import isochrones
merged_isochrones_gdf= gpd.read_file('/home/silas/projects/msc_thesis/data/derived_data/isochrones_1-10min.gpkg')
# Load or create rcps GeoDataFrame
rcps=gpd.read_file('/home/silas/projects/msc_thesis/data/raw_data/geodata_stadt_Zuerich/recycling_sammelstellen/data/stzh.poi_sammelstelle_view.shp')
flats_with_pop.to_crs(epsg=4326, inplace=True)
rcps.to_crs(epsg=4326, inplace=True)

# Initialize BallTree
tree, rcp_coords, rcp_ids = util.initialize_ball_tree(rcps)



INFO:util:BallTree initialized with RCP coordinates.


In [3]:
# Streamlined Processing of High Population Unserved Flats

# Remove flats with population 0
flats_with_pop = flats_with_pop[flats_with_pop['est_pop'] > 0]

# Verify 'time' column exists
if 'time' not in merged_isochrones_gdf.columns:
    raise KeyError("'time' column is missing in merged_isochrones_gdf")

# Spatial join to retain all flats
joined = gpd.sjoin(
    flats_with_pop, 
    merged_isochrones_gdf[['geometry', 'time']], 
    how='left', 
    predicate='within'
)

# Assign default high time value to unserved flats
iso_threshold = 10
joined['time'] = joined['time'].fillna(iso_threshold + 1)

# Get the shortest time for each flat
joined = joined.groupby('egid', as_index=False).agg({
    'est_pop': 'first',
    'geometry': 'first',
    'time': 'min'
})

# Identify unserved flats
high_pop_unserved = joined[joined['time'] >= iso_threshold].copy()

# Calculate duration to the nearest RCP
high_pop_unserved[['nearest_rcp_id', 'duration_to_rcp_min']] = high_pop_unserved['geometry'].apply(
    lambda geom: util.find_nearest_rcp_duration(geom, tree, rcp_coords, rcp_ids, client)
).apply(pd.Series)

# Update 'time' with the calculated duration
joined.loc[joined['time'] >= iso_threshold, 'time'] = high_pop_unserved['duration_to_rcp_min'].values

# Ensure GeoDataFrame consistency
high_pop_unserved = gpd.GeoDataFrame(high_pop_unserved, geometry='geometry', crs="EPSG:4326")

# Export to file
high_pop_unserved.to_file(
    '/home/silas/projects/msc_thesis/data/derived_data/high_pop_unserved_with_durations.gpkg', 
    driver='GPKG'
)

# Optional: Verify the first few entries
high_pop_unserved[['egid', 'nearest_rcp_id', 'duration_to_rcp_min']].head()

INFO:pyogrio._io:Created 696 records


Unnamed: 0,egid,nearest_rcp_id,duration_to_rcp_min
1231,141692.0,sa42624,11.61
1232,141693.0,sa42624,10.98
1233,141694.0,sa42624,12.15
1234,141696.0,sa42624,11.96
1235,141697.0,sa42624,13.05


In [12]:
high_pop_unserved['duration_to_rcp_min'].describe()
len(high_pop_unserved)

696

In [None]:
high_pop_unserved = gpd.GeoDataFrame(high_pop_unserved, geometry='geometry', crs="EPSG:4326")

# Ensure the centroid calculation does not raise a warning
high_pop_unserved = high_pop_unserved.set_geometry(high_pop_unserved.geometry.centroid)

definedcoords = high_pop_unserved.geometry
X = pd.DataFrame({
    'x': definedcoords.x,
    'y': definedcoords.y,
    'population': high_pop_unserved['est_pop']
})

# Step 3: Apply DBSCAN clustering
db = DBSCAN(eps=0.005, min_samples=10).fit(X[['x', 'y']])
X['cluster'] = db.labels_

# Remove noise points
clusters = X[X['cluster'] != -1]

# Step 4: Calculate cluster centers weighted by population
cluster_centers = clusters.groupby('cluster').apply(
    lambda df: pd.Series({
        'x': (df['x'] * df['population']).sum() / df['population'].sum(),
        'y': (df['y'] * df['population']).sum() / df['population'].sum()
    })
).reset_index()

# Step 5: Create GeoDataFrame for new collection points
new_points = gpd.GeoDataFrame(
    cluster_centers,
    geometry=gpd.points_from_xy(cluster_centers['x'], cluster_centers['y']),
    crs="EPSG:4326"
)


  high_pop_unserved = high_pop_unserved.set_geometry(high_pop_unserved.geometry.centroid)
  cluster_centers = clusters.groupby('cluster').apply(


In [None]:
# Extract RCP coordinates
rcp_coords = rcps.geometry.apply(lambda geom: (geom.x, geom.y)).tolist()
rcp_ids = rcps['poi_id'].tolist()  # Replace 'poi_id' with your actual RCP identifier column

# Convert coordinates to radians for BallTree
flat_coords_rad = np.radians(high_pop_unserved.geometry.apply(lambda geom: [geom.y, geom.x]).tolist())
rcp_coords_rad = np.radians([coord[::-1] for coord in rcp_coords])  # [lat, lon]

# Build BallTree for efficient nearest neighbor search
tree = BallTree(rcp_coords_rad, metric='haversine')

# Apply the function to each flat
high_pop_unserved['nearest_rcp_id'], high_pop_unserved['duration_to_rcp_min'] = zip(*high_pop_unserved.geometry.apply(find_nearest_rcp_duration))

# Save the updated GeoDataFrame
high_pop_unserved.to_file('/home/silas/projects/msc_thesis/data/derived_data/high_pop_unserved_with_durations.gpkg', driver='GPKG')

# Optional: Verify the first few entries
high_pop_unserved[['egid', 'nearest_rcp_id', 'duration_to_rcp_min']].head()

NameError: name 'find_nearest_rcp_duration' is not defined

In [61]:
high_pop_unserved

Unnamed: 0,egid,est_pop,geometry,time,duration_to_rcp_min
1231,141692.0,1.944444,POINT (8.53858 47.33969),11.0,"(sa42624, 11.608333333333333)"
1232,141693.0,1.944444,POINT (8.53841 47.33947),11.0,"(sa42624, 10.979999999999999)"
1233,141694.0,1.111111,POINT (8.53877 47.33931),11.0,"(sa42624, 12.151666666666667)"
1234,141696.0,2.224138,POINT (8.53891 47.33789),11.0,"(sa42624, 11.965)"
1235,141697.0,0.947368,POINT (8.53973 47.33782),11.0,"(sa42624, 13.05)"
...,...,...,...,...,...
36007,302064832.0,2.812500,POINT (8.49704 47.35877),11.0,"(sa42823, 11.486666666666668)"
36153,302065296.0,1.866667,POINT (8.58694 47.38327),11.0,"(sa42938, 15.120000000000001)"
36154,302065297.0,1.400000,POINT (8.58723 47.38334),11.0,"(sa42938, 15.391666666666667)"
36332,302065759.0,2.108108,POINT (8.50486 47.3977),10.0,"(sa42985, 10.283333333333333)"


In [43]:
# Step 6: Plotting
m = folium.Map(location=[47.3769, 8.5417], zoom_start=13)

# Add existing collection points
""" for _, row in rcps.iterrows():
    folium.Marker(
        location=[row.geometry.y, row.geometry.x],
        popup=row['adresse'],
        icon=folium.Icon(color='green', icon='recycle', prefix='fa')
    ).add_to(m) """

#add high population unserved flats
for _, row in high_pop_unserved.iterrows():
    folium.CircleMarker(
        location=[row.geometry.y, row.geometry.x],
        popup=f"Flat ID: {row['egid']}<br>Duration: {row['time']:.2f} min, Population: {row['est_pop']}",
        color='red',
        radius=5,
        fill=True

    ).add_to(m)


# Add new collection points
for _, row in new_points.iterrows():
    folium.Marker(
        location=[row.geometry.y, row.geometry.x],
        popup='New Collection Point',
        icon=folium.Icon(color='blue', icon='plus', prefix='fa')
    ).add_to(m)
m.save('/home/silas/projects/msc_thesis/data/derived_data/new_collection_points_dbscan.html')
m

In [None]:
# Initialize ORS client
client = openrouteservice.Client(key=ors_key)

def generate_isochrones(client, locations, time_limit):
    params = {
        "locations": [locations],
        "range": [time_limit],
        "range_type": "time",
        "location_type": "start",
        "smoothing": 0.3,
        "profile": "foot-walking",
    }
    isochrones = client.isochrones(**params)
    return isochrones

# Generate isochrones for new points
new_isochrones = []
time_limits = [60, 120, 180, 240, 300, 360, 420, 480, 540, 600]  # 10 minutes

for time_limit in time_limits:
    for _, row in new_points.iterrows():
        lon, lat = row.geometry.x, row.geometry.y
        isochrone = generate_isochrones(client, [lon, lat], time_limit)
        if isochrone:
            for feature in isochrone['features']:
                new_isochrones.append({
                    'geometry': shape(feature['geometry']),
                    'time': time_limit / 60,
                    'rcp_id': f"new_{row['cluster']}"
                })

# Create GeoDataFrame for new isochrones
new_isochrones_gdf = gpd.GeoDataFrame(new_isochrones, crs="EPSG:4326")




In [None]:
#import existing isochrones
existing_isochrones = gpd.read_file('/home/silas/projects/msc_thesis/data/derived_data/isochrones_1-10min.gpkg')

# Merge existing and new isochrones
isochrones_gdf = pd.concat([existing_isochrones, new_isochrones_gdf], ignore_index=True)

def merge_isochrones_preserve_time(isochrones_gdf):
    """
    Merge isochrones preserving lower time values.

    Parameters:
    - isochrones_gdf: GeoDataFrame with isochrones and 'time' attribute.

    Returns:
    - GeoDataFrame with merged isochrones.
    """
    # Ensure CRS is EPSG:4326
    if isochrones_gdf.crs != "EPSG:4326":
        isochrones_gdf = isochrones_gdf.to_crs(epsg=4326)

    # Sort isochrones by 'time' ascending
    isochrones_sorted = isochrones_gdf.sort_values(by='time')

    merged_isochrones = gpd.GeoDataFrame(columns=isochrones_sorted.columns, crs="EPSG:4326")

    # Initialize an empty geometry for subtraction
    accumulated_geom = None

    for _, row in isochrones_sorted.iterrows():
        current_geom = row.geometry
        current_time = row['time']

        if accumulated_geom:
            remaining_geom = current_geom.difference(accumulated_geom)
        else:
            remaining_geom = current_geom

        if not remaining_geom.is_empty:
            new_row = row.copy()
            new_row.geometry = remaining_geom
            # Ensure the new_row GeoDataFrame has the correct CRS
            new_row = gpd.GeoDataFrame([new_row], crs="EPSG:4326")
            merged_isochrones = pd.concat([merged_isochrones, new_row], ignore_index=True)
            # Update accumulated geometry
            if accumulated_geom:
                accumulated_geom = unary_union([accumulated_geom, remaining_geom])
            else:
                accumulated_geom = remaining_geom
    return merged_isochrones

# Merge isochrones
merged_isochrones_gdf = merge_isochrones_preserve_time(isochrones_gdf)
old_ischrones_merged = merge_isochrones_preserve_time(existing_isochrones)

In [19]:
# Function to prepare heatmap data
merged_isochrones_gdf.to_crs(epsg=4326, inplace=True)
def prepare_heatmap_data(merged_isochrones_gdf):
    """
    Prepare heatmap data from merged isochrones.

    Parameters:
    - merged_isochrones_gdf: GeoDataFrame with merged isochrones.

    Returns:
    - List of [latitude, longitude, intensity] for HeatMap.
    """
    heat_data = []
    for _, row in merged_isochrones_gdf.iterrows():
        if 'time' not in row:
            print("Missing 'time' column in merged_isochrones_gdf")
            continue
        centroid = row.geometry.centroid
        lat, lon = centroid.y, centroid.x
        intensity = row['time'] / 60  # Convert time to minutes for intensity
        heat_data.append([lat, lon, intensity])
    return heat_data


In [None]:
heat_data = prepare_heatmap_data(merged_isochrones_gdf)

# Initialize Folium map centered around Zurich
m = folium.Map(location=[47.3769, 8.5417], zoom_start=13)

# Create a continuous color map for the heatmap
colormap = cm.linear.viridis.scale(0, 10)  # Changed color scheme to Viridis
colormap.caption = 'Isochrone Time (minutes)'
colormap.add_to(m)

# Add merged isochrones to the map
for _, row in merged_isochrones_gdf.iterrows():
    folium.GeoJson(
        row['geometry'],
        name=f"Isochrone {row['time']} min",
        style_function=lambda feature, time=row['time']: {
            'fillColor': colormap(time),
            'color': colormap(time),
            'weight': 1,
            'fillOpacity': 0.9,
        }
    ).add_to(m)

# Add recycling collection points to the map using MarkerCluster
marker_cluster = MarkerCluster(name='Existing Collection Points').add_to(m)
for _, row in rcps.iterrows():
    folium.Marker(
        location=[row.geometry.y, row.geometry.x],
        popup=row['adresse'],
        icon=folium.Icon(color='green', icon='recycle', prefix='fa')
    ).add_to(marker_cluster)

# Add new collection points to a separate feature group
new_points_group = folium.FeatureGroup(name='New Collection Points')
for _, row in new_points.iterrows():
    folium.Marker(
        location=[row.geometry.y, row.geometry.x],
        popup='New Collection Point',
        icon=folium.Icon(color='blue', icon='plus', prefix='fa')
    ).add_to(m)

# Save and display the map
heatmap_path = '/home/silas/projects/msc_thesis/data/derived_data/heatmap_test_after_cluster.html'
m.save(heatmap_path)
m


In [23]:
# Prepare heatmap data for existing isochrones only
heat_data_existing = prepare_heatmap_data(old_ischrones_merged)

# Initialize Folium map centered around Zurich
m_existing = folium.Map(location=[47.3769, 8.5417], zoom_start=13)

# Create a continuous color map for the heatmap
colormap_existing = cm.linear.viridis.scale(0, 10)  # Changed color scheme to Viridis
colormap_existing.caption = 'Isochrone Time (minutes)'
colormap_existing.add_to(m_existing)

# Add merged existing isochrones to the map
for _, row in old_ischrones_merged.iterrows():
    folium.GeoJson(
        row['geometry'],
        name=f"Isochrone {row['time']} min",
        style_function=lambda feature, time=row['time']: {
            'fillColor': colormap_existing(time),
            'color': colormap_existing(time),
            'weight': 1,
            'fillOpacity': 0.9,
        }
    ).add_to(m_existing)

# Add recycling collection points to the map using MarkerCluster
marker_cluster_existing = MarkerCluster().add_to(m_existing)
for _, row in rcps.iterrows():
    folium.Marker(
        location=[row.geometry.y, row.geometry.x],
        popup=row['adresse'],
        icon=folium.Icon(color='green', icon='recycle', prefix='fa')
    ).add_to(marker_cluster_existing)

# Save and display the map
heatmap_path_existing = '/home/silas/projects/msc_thesis/data/derived_data/heatmap_existing_isochrones.html'
m_existing.save(heatmap_path_existing)
m_existing

In [35]:
def calculate_weighted_average_time(flats_with_pop, isochrones_gdf):
    """
    Calculate the population-weighted average time for given isochrones.

    Parameters:
    - flats_with_pop: GeoDataFrame with flats and population data.
    - isochrones_gdf: GeoDataFrame with isochrones and 'time' attribute.

    Returns:
    - Weighted average time.
    """
    joined = gpd.sjoin(flats_with_pop, isochrones_gdf, how='inner', predicate='within')
    population_per_isochrone = joined.groupby('time')['est_pop'].sum().reset_index()
    total_population = population_per_isochrone['est_pop'].sum()
    population_per_isochrone['weighted_time'] = population_per_isochrone['time'] * population_per_isochrone['est_pop']
    weighted_average_time = population_per_isochrone['weighted_time'].sum() / total_population
    return weighted_average_time

# Calculate weighted average time for new isochrones
weighted_average_time_new = calculate_weighted_average_time(flats_with_pop, merged_isochrones_gdf)

# Calculate weighted average time for old isochrones
weighted_average_time_old = calculate_weighted_average_time(flats_with_pop, old_ischrones_merged)

# Print the results
print(f"Population-weighted average time with new isochrones: {weighted_average_time_new:.2f} minutes")
print(f"Population-weighted average time with old isochrones: {weighted_average_time_old:.2f} minutes")

Population-weighted average time with new isochrones: 3.89 minutes
Population-weighted average time with old isochrones: 3.99 minutes


In [33]:
# With new points
people_10min_or_longer_with_new = joined[joined['time'] >= 10]['est_pop'].sum()
total_population_with_new = joined['est_pop'].sum()
percentage_10min_or_longer_with_new = (people_10min_or_longer_with_new / total_population_with_new) * 100

print(f"Percentage of people who have 10 minutes or longer to the nearest RCP with new points: {percentage_10min_or_longer_with_new:.2f}%")

# Without new points
# Spatial join: assign each flat to an isochrone for existing isochrones
joined_existing = gpd.sjoin(flats_with_pop, old_ischrones_merged, how='inner', predicate='within')

people_10min_or_longer_without_new = joined_existing[joined_existing['time'] >= 10]['est_pop'].sum()
total_population_without_new = joined_existing['est_pop'].sum()
percentage_10min_or_longer_without_new = (people_10min_or_longer_without_new / total_population_without_new) * 100

print(f"Percentage of people who have 10 minutes or longer to the nearest RCP without new points: {percentage_10min_or_longer_without_new:.2f}%")

Percentage of people who have 10 minutes or longer to the nearest RCP with new points: 0.31%
Percentage of people who have 10 minutes or longer to the nearest RCP without new points: 0.86%
