[Reference](https://medium.com/@francescofranco_39234/birch-clustering-with-python-and-scikit-learn-e14de0a9e62c)

# Implementing BIRCH in Python and Scikit-learn

In [1]:
import numpy as np
from sklearn.datasets import make_blobs
from sklearn.preprocessing import StandardScaler

# Step 1: Define the Clustering Feature (CF) structure
class CF:
    def __init__(self, point):
        """
        A Clustering Feature (CF) is a tuple (n, LS, SS), where:
        - n: Number of points in the subcluster
        - LS: Linear Sum of the points (vector)
        - SS: Squared Sum of the points (scalar)
        """
        self.n = 1  # Number of points
        self.LS = np.array(point)  # Linear Sum
        self.SS = np.sum(np.array(point) ** 2)  # Squared Sum

    def add_point(self, point):
        """
        Add a new point to the CF.
        """
        self.n += 1
        self.LS += np.array(point)
        self.SS += np.sum(np.array(point) ** 2)

    def centroid(self):
        """
        Compute the centroid of the subcluster.
        """
        return self.LS / self.n

    def radius(self):
        """
        Compute the radius of the subcluster.
        """
        if self.n == 0:
            return 0
        return np.sqrt((self.SS / self.n) - np.sum(self.centroid() ** 2))

    def merge(self, other_cf):
        """
        Merge two CFs into one.
        """
        self.n += other_cf.n
        self.LS += other_cf.LS
        self.SS += other_cf.SS

    def distance_to(self, point):
        """
        Compute the Euclidean distance between the centroid of this CF and a given point.
        """
        return np.linalg.norm(self.centroid() - np.array(point))


# Step 2: Define the CF Tree structure
class CFTree:
    def __init__(self, threshold, branching_factor):
        """
        Initialize the CF Tree.
        - threshold: Maximum radius of a subcluster
        - branching_factor: Maximum number of children per node
        """
        self.threshold = threshold
        self.branching_factor = branching_factor
        self.root = CFNode(threshold, branching_factor)

    def insert(self, point):
        """
        Insert a point into the CF Tree.
        """
        self.root.insert(point)

    def get_subclusters(self):
        """
        Retrieve all subclusters from the CF Tree.
        """
        return self.root.get_subclusters()


# Step 3: Define the CF Node structure
class CFNode:
    def __init__(self, threshold, branching_factor):
        """
        A node in the CF Tree.
        """
        self.threshold = threshold
        self.branching_factor = branching_factor
        self.subclusters = []  # List of CFs
        self.children = []  # List of child nodes

    def insert(self, point):
        """
        Insert a point into the CF Tree.
        """
        if not self.children:
            # If this is a leaf node, insert into subclusters
            self._insert_to_leaf(point)
        else:
            # If this is an internal node, find the closest child and insert recursively
            self._insert_to_internal_node(point)

    def _insert_to_leaf(self, point):
        """
        Insert a point into a leaf node.
        """
        if not self.subclusters:
            # If no subclusters exist, create a new one
            self.subclusters.append(CF(point))
        else:
            # Find the closest subcluster
            closest_cf = self._find_closest_subcluster(point)
            if closest_cf.radius() < self.threshold:
                # If the radius is within the threshold, add the point to the subcluster
                closest_cf.add_point(point)
            else:
                # Otherwise, create a new subcluster
                self.subclusters.append(CF(point))
                if len(self.subclusters) > self.branching_factor:
                    # If the number of subclusters exceeds the branching factor, split the node
                    self._split()

    def _insert_to_internal_node(self, point):
        """
        Insert a point into an internal node.
        """
        closest_child = self._find_closest_child(point)
        closest_child.insert(point)

    def _find_closest_subcluster(self, point):
        """
        Find the closest subcluster to a given point.
        """
        distances = [cf.distance_to(point) for cf in self.subclusters]
        return self.subclusters[np.argmin(distances)]

    def _find_closest_child(self, point):
        """
        Find the closest child node to a given point.
        """
        distances = [child.distance_to(point) for child in self.children]
        return self.children[np.argmin(distances)]

    def _split(self):
        """
        Split the node into two nodes.
        """
        pass  # Implement splitting logic here

    def get_subclusters(self):
        """
        Retrieve all subclusters from this node and its children.
        """
        subclusters = self.subclusters.copy()
        for child in self.children:
            subclusters.extend(child.get_subclusters())
        return subclusters


# Step 4: Define the BIRCH algorithm
class BIRCH:
    def __init__(self, threshold, branching_factor):
        """
        Initialize the BIRCH algorithm.
        """
        self.threshold = threshold
        self.branching_factor = branching_factor
        self.tree = CFTree(threshold, branching_factor)

    def fit(self, X):
        """
        Fit the BIRCH algorithm to the data.
        """
        for point in X:
            self.tree.insert(point)

    def predict(self, X):
        """
        Predict cluster labels for the data.
        """
        labels = []
        subclusters = self.tree.get_subclusters()
        for point in X:
            distances = [cf.distance_to(point) for cf in subclusters]
            labels.append(np.argmin(distances))
        return labels


# Step 5: Example usage
if __name__ == "__main__":
    # Generate sample data
    X, _ = make_blobs(n_samples=1000, centers=5, cluster_std=0.60, random_state=0)
    X = StandardScaler().fit_transform(X)

    # Initialize and fit BIRCH
    birch = BIRCH(threshold=0.5, branching_factor=50)
    birch.fit(X)

    # Predict cluster labels
    labels = birch.predict(X)

    # Print the labels
    print("Cluster Labels:", labels)

Cluster Labels: [np.int64(5), np.int64(2), np.int64(13), np.int64(13), np.int64(8), np.int64(7), np.int64(4), np.int64(7), np.int64(7), np.int64(13), np.int64(4), np.int64(6), np.int64(3), np.int64(7), np.int64(8), np.int64(8), np.int64(4), np.int64(13), np.int64(10), np.int64(2), np.int64(4), np.int64(4), np.int64(2), np.int64(3), np.int64(2), np.int64(6), np.int64(2), np.int64(6), np.int64(4), np.int64(4), np.int64(10), np.int64(4), np.int64(4), np.int64(3), np.int64(4), np.int64(4), np.int64(3), np.int64(5), np.int64(4), np.int64(4), np.int64(13), np.int64(2), np.int64(12), np.int64(3), np.int64(7), np.int64(2), np.int64(7), np.int64(2), np.int64(8), np.int64(5), np.int64(3), np.int64(7), np.int64(6), np.int64(7), np.int64(7), np.int64(4), np.int64(8), np.int64(4), np.int64(7), np.int64(3), np.int64(7), np.int64(4), np.int64(4), np.int64(7), np.int64(7), np.int64(12), np.int64(6), np.int64(13), np.int64(2), np.int64(4), np.int64(2), np.int64(7), np.int64(4), np.int64(9), np.int64(13