In [3]:
import numpy as np
import pandas as pd

In [144]:
from sklearn.datasets import make_blobs

X, _ = make_blobs(n_samples=100, centers=5, n_features=5, cluster_std=2.5, random_state=42)
X = pd.DataFrame(X)
X.columns = [f'col_{col}' for col in X.columns]

In [188]:
class MyAgglomerative():
    def __init__(self, n_clusters = 3, metric = 'euclidean'):
        self.n_clusters= n_clusters
        self.metric = metric
        
        self.clusters = {}
        self.clusters_centroids = {}
        self.X = None
        
    def __repr__(self):
        return f'MyAgglomerative class: n_clusters={self.n_clusters}'
    
    def fit_predict(self, X): #X - pandas dataframe        
        
        self.X = X.reset_index(drop=True)
        
        #Initially, each point corresponds to one cluster
        for row in range(self.X.shape[0]):
            self.clusters[row+1] = [row]
            self.clusters_centroids[row+1] = self.X.loc[row,:].values
            
        #clustering beging
        while len(self.clusters) > self.n_clusters:
        
            min_D = float('inf')
            for i in range(len(self.clusters)-1):
                for j in np.arange(i+1, len(self.clusters), 1):
                    D = self.get_distance(list(self.clusters_centroids.values())[i],list(self.clusters_centroids.values())[j])
                    if D <  min_D:
                        min_D = D
                        min_idx = [i,j]
                        min_i = list(self.clusters)[i]
                        min_j = list(self.clusters)[j]
                
           
            values_i = self.clusters.get(min_i)
            values_j = self.clusters.get(min_j)
            self.clusters[min_i] = values_i + values_j
            del self.clusters[min_j]
            
            self.clusters_centroids[min_i] = self.X.loc[self.clusters[min_i],:].mean().values
            del self.clusters_centroids[min_j]
            
        clusters = sorted(self.clusters_centroids.keys()) 
        
        y = np.zeros(X.shape[0])
        
        for k, cluster in enumerate(clusters):
            rows = self.clusters.get(cluster)
            for row in rows:
                y[row] = k+1
        
        return y
            
        
    def get_distance(self, x1, x2): #x1, x2 - numpy arrays or numbers
        if self.metric == 'euclidean':
            return np.sqrt(np.sum(np.square(x2-x1)))
        elif self.metric == 'chebyshev':
            return np.max(np.abs(x2-x1))
        elif self.metric == 'manhattan':
            return np.sum(np.abs(x2-x1))
        elif self.metric == 'cosine':
            return 1- np.dot(x1,x2)/(np.sqrt(np.sum(np.square(x2)))+np.sqrt(np.sum(np.square(x1))))        
    
    