In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import euclidean_distances, manhattan_distances, cosine_similarity

In [3]:
n_samples = 1500
X, y = make_blobs(n_samples=n_samples, centers=4, cluster_std=1.0, random_state=42)
scaler = StandardScaler()
X = scaler.fit_transform(X) 

In [5]:
def custom_distance(X, Y, metric='euclidean'):
    if metric == 'euclidean':
        return euclidean_distances(X, Y)
    elif metric == 'manhattan':
        return manhattan_distances(X, Y)
    elif metric == 'cosine':
        return 1 - cosine_similarity(X, Y)
    else:
        raise ValueError("Unsupported distance metric")

In [7]:
def kmeans_with_distance_metric(X, n_clusters=4, metric='euclidean', init='k-means++'):
    if metric in ['euclidean', 'manhattan', 'cosine']:
        kmeans = KMeans(n_clusters=n_clusters, init=init, n_init=10, max_iter=300)
        kmeans.fit(X)
        return kmeans
    else:
        raise ValueError("Unsupported distance metric")

In [9]:
init_methods = ['random', 'k-means++']
distance_metrics = ['euclidean', 'manhattan', 'cosine']
results = []

for init_method in init_methods:
    for dist_metric in distance_metrics:
        kmeans_model = kmeans_with_distance_metric(X, init=init_method, metric=dist_metric)
        score = silhouette_score(X, kmeans_model.labels_)
        results.append((init_method, dist_metric, score))

In [11]:
print("Comparison of different K-means configurations:")
for result in results:
    print(f"Initialization: {result[0]}, Distance Metric: {result[1]}, Silhouette Score: {result[2]:.4f}")

# Visualize the clusters

Comparison of different K-means configurations:
Initialization: random, Distance Metric: euclidean, Silhouette Score: 0.7962
Initialization: random, Distance Metric: manhattan, Silhouette Score: 0.7962
Initialization: random, Distance Metric: cosine, Silhouette Score: 0.7962
Initialization: k-means++, Distance Metric: euclidean, Silhouette Score: 0.7962
Initialization: k-means++, Distance Metric: manhattan, Silhouette Score: 0.7962
Initialization: k-means++, Distance Metric: cosine, Silhouette Score: 0.7962
