In [3]:
import numpy as np

class HierarchicalClustering:
    """
    Agglomerative Hierarchical Clustering (supports single, complete, average linkage).
    """
    def __init__(self, n_clusters=2, linkage='single'):
        self.n_clusters = n_clusters
        assert linkage in ['single', 'complete', 'average'], \
            "linkage must be 'single', 'complete', or 'average'"
        self.linkage = linkage
        self.labels_ = None

    def fit(self, X):
        n_samples = X.shape[0]
        # Start with each point as its own cluster
        clusters = [[i] for i in range(n_samples)]
        distances = self._compute_distances(X)
        np.fill_diagonal(distances, np.inf)  # Don't merge with self

        while len(clusters) > self.n_clusters:
            # Find the two closest clusters
            min_dist = np.inf
            to_merge = (None, None)
            for i in range(len(clusters)):
                for j in range(i + 1, len(clusters)):
                    dist = self._cluster_distance(clusters[i], clusters[j], distances)
                    if dist < min_dist:
                        min_dist = dist
                        to_merge = (i, j)
            i, j = to_merge
            # Merge clusters i and j
            clusters[i] = clusters[i] + clusters[j]
            del clusters[j]

        # Assign cluster labels
        self.labels_ = np.zeros(n_samples, dtype=int)
        for k, cluster in enumerate(clusters):
            for idx in cluster:
                self.labels_[idx] = k

    def _compute_distances(self, X):
        # Pairwise euclidean distances
        n_samples = X.shape[0]
        dists = np.zeros((n_samples, n_samples))
        for i in range(n_samples):
            for j in range(i+1, n_samples):
                dist = np.linalg.norm(X[i] - X[j])
                dists[i, j] = dists[j, i] = dist
        return dists

    def _cluster_distance(self, cluster1, cluster2, distance_matrix):
        if self.linkage == 'single':
            return np.min([distance_matrix[i, j] for i in cluster1 for j in cluster2])
        elif self.linkage == 'complete':
            return np.max([distance_matrix[i, j] for i in cluster1 for j in cluster2])
        elif self.linkage == 'average':
            return np.mean([distance_matrix[i, j] for i in cluster1 for j in cluster2])
        else:
            raise ValueError("Unknown linkage type")

> ## Example usage:

In [4]:
X = np.array([
        [1.0, 2.0], [1.5, 1.8], [5.0, 8.0],
        [8.0, 8.0], [1.0, 0.6], [9.0, 11.0],
        [8.0, 2.0], [10.0, 2.0], [9.0, 3.0]
    ])
for linkage in ['single', 'complete', 'average']:
    hc = HierarchicalClustering(n_clusters=3, linkage=linkage)
    hc.fit(X)
    print(f"{linkage.capitalize()} linkage labels:", hc.labels_)

Single linkage labels: [0 0 1 1 0 1 2 2 2]
Complete linkage labels: [0 0 1 1 0 1 2 2 2]
Average linkage labels: [0 0 1 1 0 1 2 2 2]
