In [2]:
import geopandas as gpd
import pandas as pd
from sklearn.cluster import DBSCAN
import folium

In [None]:
# Import flats with population
flats_with_pop = gpd.read_file('/home/silas/projects/msc_thesis/data/derived_data/flats_population.gpkg')

# Import isochrones
merged_isochrones_gdf= gpd.read_file('/home/silas/projects/msc_thesis/data/derived_data/isochrones_1-10min.gpkg')
# Load or create rcps GeoDataFrame
rcps=gpd.read_file('/home/silas/projects/msc_thesis/data/raw_data/geodata_stadt_Zuerich/recycling_sammelstellen/data/stzh.poi_sammelstelle_view.shp')
flats_with_pop.to_crs(epsg=4326, inplace=True)
rcps.to_crs(epsg=4326, inplace=True)

In [1]:
# Step 1: Verify 'time' column exists in merged_isochrones_gdf
if 'time' not in merged_isochrones_gdf.columns:
    raise KeyError("'time' column is missing in merged_isochrones_gdf")

# Step 2: Spatial join with a left join to retain all flats
joined = gpd.sjoin(flats_with_pop, merged_isochrones_gdf[['geometry', 'time']], how='left', predicate='within')

# Step 3: Assign a default high time value to unserved flats
iso_threshold = 10
joined['time'] = joined['time'].fillna(iso_threshold + 1)  # Assign a value greater than threshold

# Step 4: Filter unserved flats within the isochrone threshold
high_pop_unserved = joined[joined['time'] <= iso_threshold]

# Step 5: Get only the shortest time for each flat
high_pop_unserved = joined.groupby('egid').agg({
    'est_pop': 'first',
    'geometry': 'first',
    'time': 'min'
}).reset_index()

# Then filter for the threshold
high_pop_unserved = high_pop_unserved[high_pop_unserved['time'] <= iso_threshold]


print(high_pop_unserved)

NameError: name 'merged_isochrones_gdf' is not defined

In [None]:

high_pop_unserved=high_pop_unserved.head(50000)

coords = high_pop_unserved.geometry.centroid
X = pd.DataFrame({
    'x': coords.x,
    'y': coords.y,
    'population': high_pop_unserved['est_pop']
})

# Step 3: Apply DBSCAN clustering
db = DBSCAN(eps=0.02, min_samples=5).fit(X[['x', 'y']])
X['cluster'] = db.labels_

# Remove noise points
clusters = X[X['cluster'] != -1]

# Step 4: Calculate cluster centers weighted by population
cluster_centers = clusters.groupby('cluster').apply(
    lambda df: pd.Series({
        'x': (df['x'] * df['population']).sum() / df['population'].sum(),
        'y': (df['y'] * df['population']).sum() / df['population'].sum()
    })
).reset_index()

# Step 5: Create GeoDataFrame for new collection points
new_points = gpd.GeoDataFrame(
    cluster_centers,
    geometry=gpd.points_from_xy(cluster_centers['x'], cluster_centers['y']),
    crs="EPSG:4326"
)


  coords = high_pop_unserved.geometry.centroid


: 

In [17]:


# Load or create rcps GeoDataFrame
# Assuming rcps is a GeoDataFrame containing existing collection points

# Step 6: Plotting
m = folium.Map(location=[47.3769, 8.5417], zoom_start=13)

# Add existing collection points
for _, row in rcps.iterrows():
    folium.Marker(
        location=[row.geometry.y, row.geometry.x],
        popup=row['adresse'],
        icon=folium.Icon(color='green', icon='recycle', prefix='fa')
    ).add_to(m)

# Add new collection points
for _, row in new_points.iterrows():
    folium.Marker(
        location=[row.geometry.y, row.geometry.x],
        popup='New Collection Point',
        icon=folium.Icon(color='blue', icon='plus', prefix='fa')
    ).add_to(m)

m.save('/home/silas/projects/msc_thesis/data/derived_data/new_collection_points_dbscan.html')
m