In [6]:
import numpy as np

class AgglomerativeClustering:
    def __init__(self, n_clusters=2):
        self.n_clusters = n_clusters
        self.labels_ = None
        self.clusters = []

    def fit(self, X):
        self.X = X
        self.labels_ = -1 * np.ones(X.shape[0])
        self.clusters = [{i} for i in range(X.shape[0])]
        distances = self.compute_initial_distances(X)

        while len(self.clusters) > self.n_clusters:
            i, j = self.find_closest_clusters(distances)
            self.merge_clusters(i, j)
            self.update_distances(distances, i, j)

        for idx, cluster in enumerate(self.clusters):
            for point in cluster:
                self.labels_[point] = idx

    def compute_initial_distances(self, X):
        distances = np.zeros((X.shape[0], X.shape[0]))
        for i in range(X.shape[0]):
            for j in range(i + 1, X.shape[0]):
                distances[i, j] = distances[j, i] = self.distance(X[i], X[j])
        return distances

    def find_closest_clusters(self, distances):
        min_dist = np.inf
        closest = (0, 1)
        for i in range(len(self.clusters)):
            for j in range(i + 1, len(self.clusters)):
                if distances[i, j] < min_dist:
                    min_dist = distances[i, j]
                    closest = (i, j)
        return closest

    def merge_clusters(self, i, j):
        self.clusters[i] = self.clusters[i].union(self.clusters[j])
        del self.clusters[j]

    def update_distances(self, distances, i, j):
        for k in range(len(self.clusters)):
            if k != i and k != j:
                distances[i, k] = distances[k, i] = self.linkage_criteria(self.clusters[i], self.clusters[k])
        distances = np.delete(distances, j, axis=0)
        distances = np.delete(distances, j, axis=1)

    def linkage_criteria(self, cluster_i, cluster_j):
        distances = [self.distance(self.X[a], self.X[b]) for a in cluster_i for b in cluster_j]
        return np.mean(distances)

    def distance(self, point1, point2):
        return np.sqrt(np.sum((point1 - point2) ** 2))

class SilhouetteScore:
    @staticmethod
    def compute(X, labels):
        silhouette_scores = []
        for i in range(X.shape[0]):
            own_cluster = np.where(labels == labels[i])[0]
            other_cluster = np.where(labels != labels[i])[0]

            # Compute intra-cluster distance (a)
            if len(own_cluster) > 1:
                a = np.mean([np.sqrt(np.sum((X[i] - X[j]) ** 2)) for j in own_cluster if i != j])
            else:
                # Assign a high value to 'a' if there's only one point in the cluster
                # This will result in a negative silhouette score for this point,
                # indicating that the clustering configuration might not be ideal.
                a = np.inf

            # Compute nearest cluster distance (b)
            b = np.min([
                np.mean([np.sqrt(np.sum((X[i] - X[j]) ** 2)) for j in other_cluster if labels[j] == unique_label])
                for unique_label in set(labels) if unique_label != labels[i]
            ])

            # Compute the silhouette score for the current point
            if a < np.inf:
                silhouette_scores.append((b - a) / max(a, b))
            else:
                silhouette_scores.append(-1)  # Assigning a negative score for isolated points

        # Handle the case where all points are isolated or there are no valid silhouette scores
        if not silhouette_scores or np.all(np.isnan(silhouette_scores)):
            return np.nan  # Indicates an issue with the clustering

        return np.mean([s for s in silhouette_scores if not np.isnan(s)])

# Example usage with dummy data
if __name__ == "__main__":
    # Generate some dummy data
    from sklearn.datasets import make_blobs
    X, _ = make_blobs(n_samples=150, centers=3, n_features=2, random_state=42)

    # Initialize and fit the model
    model = AgglomerativeClustering(n_clusters=3)
    model.fit(X)

    # Calculate Silhouette Score
    score = SilhouetteScore.compute(X, model.labels_)

    print(f"Silhouette Score: {score}")


Silhouette Score: -0.29167452093597734
