## Setup

In [None]:
# set libraries to refresh
%load_ext autoreload
%autoreload 2

In [None]:
from pathlib import Path
import geopandas as gpd

In [None]:
from clustering.kmeans import TunedClustering

## Load data

In [None]:
ROOT_DIR = Path(".")
DATA_DIR = ROOT_DIR / "data"

## Clustering

### Set parameters

In [None]:
# choose which demo data to use
data_type = "rooftops" #"weighted_grids"

In [None]:
# runs
n_jobs = -1
initial_max_trials = 100
max_passes = 100
subsequent_max_trials = 20

In [None]:
if data_type == "rooftops":
    gdf_for_cluster = gpd.read_parquet(DATA_DIR / "rooftops.parquet")
    gdf_for_cluster.loc[:, "weight"] = 1
    # admin variables
    id_col = "rooftop_id"
    lat_col = "Lat_centroid"
    lon_col = "Lon_centroid"
    weight_col = "weight"
    projected_epsg = 26191  # morocco
    # clustering variables
    desired_cluster_radius = 550
    desired_cluster_weight = 30
    max_cluster_weight = 50
    weight_importance_factor = 1
    minibatch_reassignment_ratio = 0.1

else:
    gdf_for_cluster = gpd.read_parquet(DATA_DIR / "grids.parquet")
    # admin variables
    id_col = "grid_id"
    lat_col = "Lat"
    lon_col = "Lon"
    weight_col = "population"
    projected_epsg = 3121  # philippines
    # clustering variables
    desired_cluster_radius = 1000
    desired_cluster_weight = 240
    max_cluster_weight = 300
    weight_importance_factor = 1
    minibatch_reassignment_ratio = 0.1

gdf_for_cluster

### Initialise Algorithm

In [None]:
tuned_clustering = TunedClustering(
    desired_cluster_weight=desired_cluster_weight,
    desired_cluster_radius=desired_cluster_radius,
    weight_importance_factor=weight_importance_factor,
    minibatch_reassignment_ratio=minibatch_reassignment_ratio,
    initial_max_trials=initial_max_trials,
    max_passes=max_passes,
    max_cluster_weight=max_cluster_weight,
    subsequent_max_trials=subsequent_max_trials,
    n_jobs=n_jobs,
    show_progress_bar=True,
)

###  Run Algorithm

In [None]:
gdf_w_clusters = tuned_clustering.run(
    gdf=gdf_for_cluster,
    weight_col=weight_col,
    projected_epsg=projected_epsg,
    return_type="geodataframe",
)

### Examine results

#### Simple plot

In [None]:
import matplotlib.pyplot as plt

In [None]:
gdf_w_clusters.plot(column="cluster_id", markersize=0.1, figsize=(10, 10))
plt.show()

#### Using functions from `gridsample`

You need to have installed `gridsample` via `pip install .` in that repo's root folder into the environment you're using for running this notebook.

In [None]:
from gridsample.reporting import plot_weights_vs_radii
from gridsample.mapping import create_interactive_map

In [None]:
plot_weights_vs_radii(
    point_gdf_w_cluster=gdf_w_clusters,
    point_weight_col=weight_col,
    point_projected_epsg=projected_epsg,
    y_human_readable=data_type,
)
plt.show()

In [None]:
folium_map = create_interactive_map(
    points_gdf=gdf_w_clusters,
    cluster_id_col="cluster_id",
    point_id_col=id_col,
    point_lat_col=lat_col,
    point_lon_col=lon_col,
    cols_for_tooltip=[id_col, "cluster_id"],
    zoom_start=12,
)
folium_map