In [6]:
import numpy as np
import pandas as pd
import geopandas as gpd
import folium
from sklearn.cluster import DBSCAN, KMeans
from shapely.geometry import Point
import os


os.chdir('/home/silas/rcp_project/rcp_project')
# Load buildings population data
buildings_pop = gpd.read_file('/home/silas/rcp_project/rcp_project/data/derived_data/flats_duration.gpkg')

# load rcp data from the RCP shapefile instead of CSV
rcp_data = gpd.read_file('/home/silas/rcp_project/rcp_project/data/raw_data/geodata_stadt_Zuerich/recycling_sammelstellen/data/stzh.poi_sammelstelle_view.shp')

# Ensure correct CRS
buildings_pop = buildings_pop.to_crs("EPSG:4326")

print("Data loaded successfully. Shape:", buildings_pop.shape)

Data loaded successfully. Shape: (36732, 7)


In [8]:

# Extract coordinates
coords = buildings_pop[['geometry']].apply(lambda row: (row.geometry.x, row.geometry.y), axis=1)
coords = np.array(coords.tolist())

# Define DBSCAN parameters
epsilon = 0.0005  # Adjust based on your spatial scale
min_samples = 1

# Initialize and fit DBSCAN
db = DBSCAN(eps=epsilon, min_samples=min_samples, metric='euclidean')
buildings_pop['cluster'] = db.fit_predict(coords)

# Filter out noise points (clusters labeled as -1)
clusters = buildings_pop[buildings_pop['cluster'] != -1]

# Calculate cluster centroids and convert to GeoDataFrame
cluster_centroids = gpd.GeoDataFrame(
    clusters.groupby('cluster').agg({
        'est_pop': 'sum',
        'impact': 'sum'
    }),
    geometry=clusters.groupby('cluster')['geometry'].apply(lambda x: x.unary_union.centroid),
    crs=clusters.crs
).reset_index()

# Rename columns for clarity
cluster_centroids.rename(columns={'est_pop': 'total_est_pop', 'impact': 'total_impact'}, inplace=True)

print(cluster_centroids.head())

# Create a map centered around the mean coordinates
m = folium.Map(location=[buildings_pop.geometry.y.mean(), buildings_pop.geometry.x.mean()], zoom_start=12)

# Add clusters to the map
for _, row in cluster_centroids.iterrows():
    folium.CircleMarker(
        location=(row.geometry.y, row.geometry.x),
        radius=5,
        color='blue',
        fill=True,
        fill_color='blue',
        popup=f"Cluster {int(row['cluster'])}: Population {int(row['total_est_pop'])}"
    ).add_to(m)

# Save map and export data
m.save('./data/plots/dbscan_clusters.html')
cluster_centroids.to_file('./data/derived_data/dbscan_clusters.gpkg', driver='GPKG')

m

   cluster  total_est_pop   total_impact                  geometry
0        0    3339.006048   12792.781552  POINT (8.54496 47.37186)
1        1      93.061224     270.982700  POINT (8.53322 47.36976)
2        2  123239.606015  502703.211165  POINT (8.54149 47.38694)
3        3     109.226415     584.017562  POINT (8.54507 47.37725)
4        4       3.888889      20.961111  POINT (8.54641 47.37461)


In [10]:
n_clusters=1200

# Extract coordinates and population weights
coordinates = np.column_stack([buildings_pop.geometry.x, buildings_pop.geometry.y])
weights = buildings_pop['est_pop'].values

# Initialize KMeans with n clusters
kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)

# Fit KMeans with sample weights
kmeans.fit(coordinates, sample_weight=weights)

# Assign cluster labels to buildings_pop
buildings_pop['kmeans_cluster'] = kmeans.labels_

# Get cluster centers
cluster_centers = kmeans.cluster_centers_

# Create a GeoDataFrame for cluster centers
cluster_centers_gdf = gpd.GeoDataFrame(
    {'cluster_id': range(len(cluster_centers))},
    geometry=[Point(xy) for xy in cluster_centers],
    crs=buildings_pop.crs
)

# Compute population per cluster
cluster_pop = buildings_pop.groupby('kmeans_cluster')['est_pop'].sum().reset_index()
cluster_centers_gdf = cluster_centers_gdf.merge(cluster_pop, left_on='cluster_id', right_on='kmeans_cluster', how='left')
cluster_centers_gdf.rename(columns={'est_pop': 'total_est_pop'}, inplace=True)

# Create a Folium map centered around the mean coordinates
m_clusters = folium.Map(
    location=[buildings_pop.geometry.y.mean(), buildings_pop.geometry.x.mean()],
    zoom_start=12,
    control_scale=True,
    tiles='cartodbpositron'
)

# Add cluster centers to the map with population
for _, row in cluster_centers_gdf.iterrows():
    folium.CircleMarker(
        location=[row.geometry.y, row.geometry.x],
        radius=2,
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.6,
        popup=f"Cluster {row.cluster_id}<br>Population: {int(row.total_est_pop)}"
    ).add_to(m_clusters)

# Add a legend (optional)
legend_html = '''
<div style="position: fixed; 
            bottom: 50px; left: 50px; width: 150px; height: 60px; 
            border:2px solid grey; z-index:9999; font-size:14px;
            background-color:white;
            ">
    &nbsp;<b>Cluster Centers</b><br>
    &nbsp;<i style="background: blue; width: 10px; height: 10px; display: inline-block;"></i>&nbsp; Cluster Center
</div>
'''
m_clusters.get_root().html.add_child(folium.Element(legend_html))

# Save and display the map
m_clusters.save('./data/plots/kmeans_clusters.html')
cluster_centers_gdf.to_file('./data/derived_data/kmeans_clusters.gpkg', driver='GPKG')

m