<a href="https://colab.research.google.com/github/Series-Parallel/Machine_in_Learning/blob/main/Agglomerative_Clustering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [21]:
import numpy as np

In [22]:
def euclidean_distance(p1, p2):
    return np.linalg.norm(np.array(p1) - np.array(p2))

In [23]:
# Find the two closest clusters
def find_closest_clusters(clusters, distance_matrix):
    min_dist = float('inf')  # Initialize minimum distance as infinity
    cluster_pair = None  # Store the pair of clusters to merge

    for c1 in clusters:
        for c2 in clusters:
            if c1 != c2:  # Ensure we are not comparing the same cluster
                dist = min(distance_matrix[p1][p2] for p1 in c1 for p2 in c2)  # Single linkage
                if dist < min_dist:
                    min_dist = dist
                    cluster_pair = (c1, c2)  # Update closest pair

    return cluster_pair


In [27]:
def agglomerative_clustering(data, num_clusters):
    # Step 1: Initialize each point as its own cluster
    clusters = {frozenset([tuple(point)]) for point in data}
    # Compute initial distance matrix using tuples as keys instead of numpy arrays
    distance_matrix = {tuple(p1): {tuple(p2): euclidean_distance(p1, p2) for p2 in data} for p1 in data}
    while len(clusters) > num_clusters:
        c1, c2 = find_closest_clusters(clusters, distance_matrix)
        new_cluster = c1 | c2  # Union of sets
        clusters.remove(c1)
        clusters.remove(c2)
        clusters.add(new_cluster)
    return clusters

In [28]:
# Example Usage
data = np.array([[1, 2], [2, 3], [3, 4], [8, 8], [9, 9]])
clusters = agglomerative_clustering(data, num_clusters=2)

# Display results
for i, cluster in enumerate(clusters):
    print(f"Cluster {i+1}: {list(cluster)}")


Cluster 1: [(8, 8), (9, 9)]
Cluster 2: [(2, 3), (1, 2), (3, 4)]
