In [None]:
import geopandas as gpd
import pandas as pd
from sklearn.cluster import DBSCAN
import folium
import branca.colormap as cm  # Add this import
from folium.plugins import HeatMap
from folium.plugins import MarkerCluster
from shapely.ops import unary_union
from sklearn.neighbors import BallTree
from shapely.geometry import Point
from shapely.geometry import shape

# import datasets
existing_isochrones = gpd.read_file('../data/derived_data/iso_merged.gpkg')
flats_pop = gpd.read_file('../data/derived_data/flats_population.gpkg')
rcps= rcps = gpd.read_file('../data/raw_data/geodata_stadt_Zuerich/recycling_sammelstellen/data/stzh.poi_sammelstelle_view.shp')
flats_duration= gpd.read_file('../data/derived_data/flats_duration.gpkg')
rcps.to_crs('EPSG:4326', inplace=True)
potential_sites= gpd.read_file('../data/derived_data/all_pot_sites.gpkg')


In [3]:
def merge_isochrones_preserve_time(isochrones_gdf):
    """
    Merge isochrones preserving lower time values.

    Parameters:
    - isochrones_gdf: GeoDataFrame with isochrones and 'time' attribute.

    Returns:
    - GeoDataFrame with merged isochrones.
    """
    # Ensure CRS is EPSG:4326
    if isochrones_gdf.crs != "EPSG:4326":
        isochrones_gdf = isochrones_gdf.to_crs(epsg=4326)

    # Sort isochrones by 'time' ascending
    isochrones_sorted = isochrones_gdf.sort_values(by='time')

    merged_isochrones = gpd.GeoDataFrame(columns=isochrones_sorted.columns, crs="EPSG:4326")

    # Initialize an empty geometry for subtraction
    accumulated_geom = None

    for _, row in isochrones_sorted.iterrows():
        current_geom = row.geometry
        current_time = row['time']

        if accumulated_geom:
            remaining_geom = current_geom.difference(accumulated_geom)
        else:
            remaining_geom = current_geom

        if not remaining_geom.is_empty:
            new_row = row.copy()
            new_row.geometry = remaining_geom
            # Ensure the new_row GeoDataFrame has the correct CRS
            new_row = gpd.GeoDataFrame([new_row], crs="EPSG:4326")
            merged_isochrones = pd.concat([merged_isochrones, new_row], ignore_index=True)
            # Update accumulated geometry
            if accumulated_geom:
                accumulated_geom = unary_union([accumulated_geom, remaining_geom])
            else:
                accumulated_geom = remaining_geom
    return merged_isochrones


merged_isochrones = merge_isochrones_preserve_time(existing_isochrones)
merged_isochrones.to_file('../data/derived_data/merged_isochrones.gpkg', driver='GPKG')

In [4]:
# Reproject flats_pop to match merged_isochrones CRS
flats_pop_4326 = flats_pop.to_crs(merged_isochrones.crs)

# Merge all isochrones into a single geometry
iso_union = merged_isochrones.unary_union

# Identify flats outside any isochrones
flats_outside = flats_pop_4326[~flats_pop_4326.geometry.within(iso_union)]


In [5]:
# Convert to centroids and set up the data for clustering
X = pd.DataFrame({
    'x': flats_outside.geometry.x,
    'y': flats_outside.geometry.y,
    'population': flats_outside['est_pop']
})

# Apply DBSCAN clustering
db = DBSCAN(eps=0.005, min_samples=20).fit(X[['x', 'y']])
X['cluster'] = db.labels_

# Remove noise points
clusters = X[X['cluster'] != -1]

# Calculate cluster centers weighted by population
cluster_centers = clusters.groupby('cluster').apply(
    lambda df: pd.Series({
        'x': (df['x'] * df['population']).sum() / df['population'].sum(),
        'y': (df['y'] * df['population']).sum() / df['population'].sum()
    })
).reset_index()

# Create GeoDataFrame for new collection points
new_points = gpd.GeoDataFrame(
    cluster_centers,
    geometry=gpd.points_from_xy(cluster_centers['x'], cluster_centers['y']),
    crs="EPSG:4326"
)


  cluster_centers = clusters.groupby('cluster').apply(


In [6]:
centroid = merged_isochrones.geometry.unary_union.centroid

# Initialize the folium map centered on the centroid with specified tiles
m = folium.Map(location=[centroid.y, centroid.x], zoom_start=12, tiles='cartodbpositron')

# Convert 'time' column to numeric and convert seconds to minutes
merged_isochrones['time'] = pd.to_numeric(merged_isochrones['time']) / 60

# Define a viridis colormap based on time (in minutes)
colormap = cm.linear.viridis.scale(
    merged_isochrones['time'].min(),
    merged_isochrones['time'].max()
)
colormap.caption = 'Walking Time (minutes)'
colormap.add_to(m)

folium.GeoJson(
    merged_isochrones,
    name='Merged Isochrones',
    style_function=lambda feature: {
        'fillColor': colormap(float(feature['properties']['time'])),
        'color': colormap(float(feature['properties']['time'])),
        'weight': 1,
        'fillOpacity': 0.5,
    },
    show=False
).add_to(m)

# Add RCP dataset to the map with green markers
rcp_layer = folium.FeatureGroup(name='RCP Locations')
for _, row in rcps.iterrows():
    folium.Marker(
        location=[row.geometry.y, row.geometry.x],
        popup=row['adresse'],
        icon=folium.Icon(color='green', icon='recycle', prefix='fa')
    ).add_to(rcp_layer)
rcp_layer.add_to(m)

# Add flats_outside as red CircleMarkers within a FeatureGroup
flats_outside_layer = folium.FeatureGroup(name='Flats Outside', show=False)
for _, row in flats_outside.iterrows():
    folium.CircleMarker(
        location=[row.geometry.y, row.geometry.x],
        radius=3,
        fill=True,
        color='red',
        fill_color='red',
        fill_opacity=0.6,
        popup=f'Population: {row.est_pop:.2f}'
    ).add_to(flats_outside_layer)
flats_outside_layer.add_to(m)

# Add new collection points to the map with blue + sign markers
new_rcp_layer = folium.FeatureGroup(name='New RCP Locations', show=False)
for _, point in new_points.iterrows():
    folium.Marker(
        location=[point.geometry.y, point.geometry.x],
        icon=folium.Icon(color='blue', icon='plus', prefix='fa')
    ).add_to(new_rcp_layer)
new_rcp_layer.add_to(m)

folium.LayerControl().add_to(m)


# Save the map to an HTML file
m.save('../data/plots/map_all_steps_old.html')
m

In [12]:
# Filter potential sites with status "potential"
potential_pot = potential_sites[potential_sites["status"] == "potential"].copy()

# Reproject potential sites to EPSG:4326 if needed
if potential_pot.crs != "EPSG:4326":
    potential_pot = potential_pot.to_crs("EPSG:4326")

# Create a GeoDataFrame of cluster centres from the cluster_centers DataFrame
cluster_centers_gdf = gpd.GeoDataFrame(
    cluster_centers,
    geometry=gpd.points_from_xy(cluster_centers['x'], cluster_centers['y']),
    crs="EPSG:4326"
)

# For each cluster centre, find the closest potential location
closest_locations = []
for idx, centre in cluster_centers_gdf.iterrows():
    # Compute distances from this centre to all potential sites
    potential_pot['dist'] = potential_pot.geometry.distance(centre.geometry)
    # Get the potential site with the minimum distance
    min_idx = potential_pot['dist'].idxmin()
    min_loc = potential_pot.loc[min_idx]
    closest_locations.append({
         'potential_ID': min_loc['ID'],
         'geometry': min_loc.geometry
    })

closest_locations_gdf = gpd.GeoDataFrame(closest_locations, geometry='geometry', crs="EPSG:4326")
print(closest_locations_gdf)

  potential_ID                  geometry
0       p_3031  POINT (8.50534 47.36147)
1       p_7839  POINT (8.53439 47.33533)
2        p_133  POINT (8.58052 47.35058)
3         p_20  POINT (8.52425 47.41386)
4       p_7315  POINT (8.57882 47.37443)
5       p_4763  POINT (8.55827 47.37646)
6         p_24  POINT (8.57731 47.38367)
7         p_46  POINT (8.50619 47.39553)
8       p_6636  POINT (8.49361 47.38850)
9        p_344  POINT (8.48040 47.40093)



  potential_pot['dist'] = potential_pot.geometry.distance(centre.geometry)

  potential_pot['dist'] = potential_pot.geometry.distance(centre.geometry)

  potential_pot['dist'] = potential_pot.geometry.distance(centre.geometry)

  potential_pot['dist'] = potential_pot.geometry.distance(centre.geometry)

  potential_pot['dist'] = potential_pot.geometry.distance(centre.geometry)

  potential_pot['dist'] = potential_pot.geometry.distance(centre.geometry)

  potential_pot['dist'] = potential_pot.geometry.distance(centre.geometry)

  potential_pot['dist'] = potential_pot.geometry.distance(centre.geometry)

  potential_pot['dist'] = potential_pot.geometry.distance(centre.geometry)

  potential_pot['dist'] = potential_pot.geometry.distance(centre.geometry)


In [17]:
# Separate existing and newly created RCPS based on the "adresse" column
existing = combined_rcps[~combined_rcps['adresse'].astype(str).str.startswith("Closest Location")].copy()
potentials = combined_rcps[combined_rcps['adresse'].astype(str).str.startswith("Closest Location")].copy()

# Create identifiers for each group
existing['identifier'] = ['existing_' + str(i + 1) for i in range(len(existing))]
potentials['identifier'] = ['pot_' + str(i + 1) for i in range(len(potentials))]

# Combine the two groups and then select only the required columns
rcp_summary = pd.concat([existing, potentials]).reset_index(drop=True)
rcp_summary = rcp_summary[['identifier', 'geometry']]

# export to file
rcp_summary.to_file('../data/derived_data/rcps_clustering_iso.gpkg', driver='GPKG')