In [3]:
import numpy as np
from sklearn.cluster import KMeans, DBSCAN
from minisom import MiniSom
from sklearn.preprocessing import StandardScaler

# ============================================================
# 1) SYNTHETIC CUSTOMER DATA (R,F,M,Visits)
# ============================================================
np.random.seed(42)
A = np.random.normal([10,15,20000,30],[2,3,3000,5],(50,4))
B = np.random.normal([90,5,2000,5],[5,2,500,2],(50,4))
C = np.random.normal([40,10,8000,15],[6,2,1500,3],(50,4))
X = np.vstack([A,B,C]).astype(np.float32)

# Scale
X = StandardScaler().fit_transform(X)

# ============================================================
# 2) KMEANS
# ============================================================
kmeans = KMeans(n_clusters=3, random_state=42)
km_labels = kmeans.fit_predict(X)

# ============================================================
# 3) DBSCAN
# ============================================================
db = DBSCAN(eps=0.6, min_samples=5)
db_labels = db.fit_predict(X)

# ============================================================
# 4) SOM (map coords -> int labels)
# ============================================================
som = MiniSom(x=3, y=1, input_len=X.shape[1], sigma=1, learning_rate=0.5)
som.random_weights_init(X)
som.train_random(X, 500)

winner_coords = np.array([som.winner(r) for r in X])
uniq = np.unique(winner_coords, axis=0)
map_id = {tuple(c):i for i,c in enumerate(uniq)}
som_labels = np.array([map_id[tuple(c)] for c in winner_coords])

# ============================================================
# 5) PROPER FINAL REPRESENTATION
# ============================================================

print("\n====== K-MEANS CLUSTERS ======")
for c in np.unique(km_labels):
    print(f"Cluster {c}: {np.sum(km_labels==c)} samples")

print("\n====== DBSCAN CLUSTERS ======")
for c in np.unique(db_labels):
    if c == -1:
        print(f"Noise / Outliers: {np.sum(db_labels==c)} samples")
    else:
        print(f"Cluster {c}: {np.sum(db_labels==c)} samples")

print("\n====== SOM CLUSTERS ======")
for c in np.unique(som_labels):
    print(f"Cluster {c}: {np.sum(som_labels==c)} samples")

# ====== OPTIONAL: SHOW FINAL ASSIGNMENT TABLE (first 10 rows) ======
final_table = np.vstack([km_labels, db_labels, som_labels]).T
print("\n====== FIRST 10 ASSIGNMENTS (KMEANS | DBSCAN | SOM) ======")
print(final_table[:10])



Cluster 0: 50 samples
Cluster 1: 50 samples
Cluster 2: 50 samples

Noise / Outliers: 7 samples
Cluster 0: 44 samples
Cluster 1: 49 samples
Cluster 2: 50 samples

Cluster 0: 50 samples
Cluster 1: 50 samples
Cluster 2: 50 samples

[[ 1  0  2]
 [ 1  0  2]
 [ 1  0  2]
 [ 1 -1  2]
 [ 1  0  2]
 [ 1  0  2]
 [ 1  0  2]
 [ 1  0  2]
 [ 1  0  2]
 [ 1  0  2]]
