In [1]:
import numpy as np
from sklearn.datasets import make_blobs
from sklearn.metrics import jaccard_score

In [2]:
X, y_true = make_blobs(n_samples=300, centers=3, random_state=42, cluster_std=1.0)

In [3]:
np.random.seed(42)
cluster_centers = X[np.random.choice(X.shape[0], 3, replace=False)]  


In [4]:
num_iterations = 10

for iteration in range(num_iterations):
    cluster_assignments = np.argmin(np.linalg.norm(X[:, np.newaxis] - cluster_centers, axis=2), axis=1)
    
    for i in range(3):
        cluster_centers[i] = np.mean(X[cluster_assignments == i], axis=0)


In [5]:
print("Final Cluster Means:")
for i, center in enumerate(cluster_centers):
    print(f"Cluster {i + 1}: {center}")

Final Cluster Means:
Cluster 1: [-2.63323268  9.04356978]
Cluster 2: [-6.88387179 -6.98398415]
Cluster 3: [4.74710337 2.01059427]


In [7]:
jaccard_distances = []
for i in range(3):
    true_cluster_indices = np.where(y_true == i)[0]
    predicted_cluster_indices = np.where(cluster_assignments == i)[0]
    jaccard_distance = 1.0 - jaccard_score(true_cluster_indices, predicted_cluster_indices, average='weighted')
    jaccard_distances.append(jaccard_distance)
    print(f"Jaccard Distance (Cluster {i + 1}): {jaccard_distance:.4f}")

Jaccard Distance (Cluster 1): 0.0000
Jaccard Distance (Cluster 2): 1.0000
Jaccard Distance (Cluster 3): 1.0000
