In [1]:
import numpy as np

class HierarchicalClustering:
    def __init__(self, n_clusters=2, linkage='single'):
        self.n_clusters = n_clusters
        self.linkage = linkage

    def euclidean_distance(self, x, y):
        return np.sqrt(np.sum((x - y) ** 2))

    def single_linkage(self, cluster1, cluster2):
        min_distance = float('inf')
        for i in range(len(cluster1)):
            for j in range(len(cluster2)):
                dist = self.euclidean_distance(cluster1[i], cluster2[j])
                if dist < min_distance:
                    min_distance = dist
        return min_distance

    def fit(self, X):
        clusters = [[x] for x in X]

        while len(clusters) > self.n_clusters:
            min_dist = float('inf')
            merge_idx = (0, 0)

            for i in range(len(clusters)):
                for j in range(i + 1, len(clusters)):
                    if self.linkage == 'single':
                        dist = self.single_linkage(clusters[i], clusters[j])
                    # Add other linkage methods like complete or average here

                    if dist < min_dist:
                        min_dist = dist
                        merge_idx = (i, j)

            merged_cluster = clusters[merge_idx[0]] + clusters[merge_idx[1]]
            clusters.pop(merge_idx[1])
            clusters[merge_idx[0]] = merged_cluster

        return clusters

# Example usage
# Generate sample data
np.random.seed(42)
data = np.random.rand(10, 2)  # 10 samples, 2 features

# Initialize and fit the hierarchical clustering
model = HierarchicalClustering(n_clusters=3, linkage='single')
result = model.fit(data)

print(result)


[[array([0.37454012, 0.95071431]), array([0.05808361, 0.86617615]), array([0.02058449, 0.96990985]), array([0.73199394, 0.59865848]), array([0.60111501, 0.70807258])], [array([0.15601864, 0.15599452]), array([0.18182497, 0.18340451]), array([0.30424224, 0.52475643]), array([0.43194502, 0.29122914])], [array([0.83244264, 0.21233911])]]


In [2]:
len(result)

3

In [5]:
result[2]

[array([0.83244264, 0.21233911])]

In [6]:
x1 = np.random.normal(loc=np.random.uniform(size=(5,))*10-5,size=(500,5))
x2 = np.random.normal(loc=np.random.uniform(size=(5,))*10-5,size=(500,5))
x3 = np.random.normal(loc=np.random.uniform(size=(5,))*10-5,size=(500,5))
x = np.vstack((x1,x2,x3))
shuffle = np.random.permutation(x.shape[0])
x = x[shuffle,:]
membership = shuffle.copy()
for i in range(len(membership)):
    if membership[i]<500: membership[i]=0
    elif membership[i]<1000: membership[i]=1
    else: membership[i]=2
np.unique(membership,return_counts=True)
membership


array([0, 2, 1, ..., 2, 2, 2])

In [None]:
# Initialize and fit the hierarchical clustering
model = HierarchicalClustering(n_clusters=3, linkage='single')
result = model.fit(x)

print(result)