In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.impute import SimpleImputer
from sklearn.cluster import MiniBatchKMeans
from sklearn.cluster import DBSCAN

In [None]:
df = pd.read_csv('replace with clean_data_sh012') 

### Search for Hyperparameters for 4 clusters

In [None]:
X = df[['utm_x', 'utm_y', 'value']].values

In [1]:
for eps in [0.1, 0.3, 0.5, 1.0, 2.0]:
    for min_samples in [5, 10, 20, 50]:
        db = DBSCAN(eps=eps, min_samples=min_samples).fit(X)
        labels = db.labels_
        n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
        print(f"eps={eps}, min_samples={min_samples} → clusters={n_clusters}")


eps=0.1, min_samples=5 → clusters=30
eps=0.1, min_samples=10 → clusters=9
eps=0.1, min_samples=20 → clusters=4
eps=0.1, min_samples=50 → clusters=0
eps=0.3, min_samples=5 → clusters=54
eps=0.3, min_samples=10 → clusters=12
eps=0.3, min_samples=20 → clusters=4
eps=0.3, min_samples=50 → clusters=0
eps=0.5, min_samples=5 → clusters=65
eps=0.5, min_samples=10 → clusters=16
eps=0.5, min_samples=20 → clusters=5
eps=0.5, min_samples=50 → clusters=0
eps=1.0, min_samples=5 → clusters=116
eps=1.0, min_samples=10 → clusters=22
eps=1.0, min_samples=20 → clusters=8
eps=1.0, min_samples=50 → clusters=1
eps=2.0, min_samples=5 → clusters=260
eps=2.0, min_samples=10 → clusters=42
eps=2.0, min_samples=20 → clusters=12
eps=2.0, min_samples=50 → clusters=1


In [None]:


db = DBSCAN(eps=0.1, min_samples=20)
labels = db.fit_predict(X)

df['cluster'] = labels

fig, ax = plt.subplots(subplot_kw={"projection": "3d"}, figsize=(10,7))

scatter = ax.scatter(
    X[:,0], X[:,1], X[:,2],
    c=labels, cmap='viridis', s=30
)

ax.set_xlabel("UTM X (scaled)")
ax.set_ylabel("UTM Y (scaled)")
ax.set_zlabel("Value (scaled)")
ax.set_title("DBSCAN Clustering on Scaled Data (3D)")

fig.colorbar(scatter, ax=ax, label="Cluster")
plt.show()