In [1]:
import numpy as np

# Load the Iris dataset
from sklearn.datasets import load_iris
data = load_iris()
X = data.data

# Standardize features
def standardize(X):
    return (X - np.mean(X, axis=0)) / np.std(X, axis=0)

X_scaled = standardize(X)

# K-means clustering
def kmeans(X, n_clusters, n_init=10, max_iter=300):
    best_inertia = np.inf
    best_labels = None
    for _ in range(n_init):
        # Initialize centroids randomly from data points
        centroids = X[np.random.choice(X.shape[0], n_clusters, replace=False), :]
        for _ in range(max_iter):
            # Assign labels based on closest centroid
            distances = np.sqrt(((X - centroids[:, np.newaxis])**2).sum(axis=2))
            labels = np.argmin(distances, axis=0)
            # Calculate new centroids
            new_centroids = np.array([X[labels == i].mean(axis=0) for i in range(n_clusters)])
            # Check for convergence
            if np.all(centroids == new_centroids):
                break
            centroids = new_centroids
        # Compute inertia
        inertia = sum(np.min(distances, axis=0))
        if inertia < best_inertia:
            best_inertia = inertia
            best_labels = labels
    return best_labels

kmeans_labels = kmeans(X_scaled, 3)

# Hierarchical clustering
def hierarchical(X, n_clusters):
    from scipy.cluster.hierarchy import linkage, fcluster
    Z = linkage(X, method='ward')
    labels = fcluster(Z, n_clusters, criterion='maxclust')
    return labels - 1  # Adjust labels to start at 0

hierarchical_labels = hierarchical(X_scaled, 3)

# Using sklearn to compute evaluation metrics (simplified for example)
from sklearn.metrics import silhouette_score, davies_bouldin_score, calinski_harabasz_score

evaluation_metrics = {
    'Silhouette Score': {
        'K-means': silhouette_score(X_scaled, kmeans_labels),
        'Hierarchical': silhouette_score(X_scaled, hierarchical_labels)
    },
    'Davies-Bouldin Score': {
        'K-means': davies_bouldin_score(X_scaled, kmeans_labels),
        'Hierarchical': davies_bouldin_score(X_scaled, hierarchical_labels)
    },
    'Calinski-Harabasz Index': {
        'K-means': calinski_harabasz_score(X_scaled, kmeans_labels),
        'Hierarchical': calinski_harabasz_score(X_scaled, hierarchical_labels)
    }
}

print(evaluation_metrics)


{'Silhouette Score': {'K-means': 0.45994823920518635, 'Hierarchical': 0.4466890410285909}, 'Davies-Bouldin Score': {'K-means': 0.8335949464754338, 'Hierarchical': 0.8034665302876753}, 'Calinski-Harabasz Index': {'K-means': 241.90440170183157, 'Hierarchical': 222.71916382215363}}
