In [4]:
import numpy as np
from sklearn.datasets import load_iris

In [5]:
iris = load_iris()
data = iris.data
num_clusters = 3  # We want to cluster the data into three classes

In [6]:
population_size = 50
tumble_step = 0.02
swim_length = 0.5
elim_disp_prob = 0.25
reprod_prob = 0.45
collapse_prob = 0.25
steps = 5

In [7]:
class BFOCluster:
    def __init__(self, data, num_clusters, population_size, tumble_step, swim_length,
                 elim_disp_prob, reprod_prob, collapse_prob, steps):
        self.data = data
        self.num_clusters = num_clusters
        self.population_size = population_size
        self.tumble_step = tumble_step
        self.swim_length = swim_length
        self.elim_disp_prob = elim_disp_prob
        self.reprod_prob = reprod_prob
        self.collapse_prob = collapse_prob
        self.steps = steps
        self.population = []

    def initialize_population(self):
        self.population = np.random.uniform(low=np.min(self.data), high=np.max(self.data),
                                            size=(self.population_size, self.num_clusters, self.data.shape[1]))
    
    def fitness_function(self, centroids):
        labels = np.argmin(np.linalg.norm(self.data[:, np.newaxis] - centroids, axis=-1), axis=-1)
        sse = np.sum(np.square(np.linalg.norm(self.data[:, np.newaxis] - centroids[labels], axis=-1)))
        return sse

    def tumble(self, position):
        displacement = np.random.uniform(-self.tumble_step, self.tumble_step, size=position.shape)
        new_position = position + displacement
        new_position = np.clip(new_position, np.min(self.data), np.max(self.data))
        return new_position

    def swim(self, position):
        direction = np.random.uniform(size=position.shape)
        direction /= np.linalg.norm(direction)
        step_size = np.random.uniform(0, self.swim_length)
        new_position = position + step_size * direction
        new_position = np.clip(new_position, np.min(self.data), np.max(self.data))
        return new_position

    def eliminate_disperse(self):
        for i in range(len(self.population)):
            if np.random.uniform() < self.elim_disp_prob:
                self.population[i] = self.swim(self.tumble(self.population[i]))

    def reproduce(self):
        offspring = []
        for i in range(len(self.population)):
            if np.random.uniform() < self.reprod_prob:
                parent = self.population[i]
                child = self.swim(self.tumble(parent))
                offspring.append(child)
        self.population = np.concatenate((self.population, offspring), axis=0)

    def collapse(self):
        if np.random.uniform() < self.collapse_prob:
            self.population = np.unique(self.population, axis=0)

    def run(self):
        self.initialize_population()

        for _ in range(self.steps):
            self.eliminate_disperse()
            self.reproduce()
            self.collapse()

        best_fitness = float('inf')
        best_centroids = None
        for position in self.population:
            centroids = position.reshape((-1, self.data.shape[1]))
            fitness = self.fitness_function(centroids)
            if fitness < best_fitness:
                best_fitness = fitness
                best_centroids = centroids

        return best_centroids

In [None]:
bfo = BFOCluster(data, num_clusters, population_size, tumble_step, swim_length,
                 elim_disp_prob, reprod_prob, collapse_prob, steps)
best_centroids = bfo.run()
best_labels = np.argmin(np.linalg.norm(data[:, np.newaxis] - best_centroids, axis=-1), axis=-1)

In [None]:
import matplotlib.pyplot as plt

# Create a scatter plot of the data points
plt.scatter(data[:, 0], data[:, 1], c=best_labels, cmap='viridis')
plt.xlabel('Sepal Length')
plt.ylabel('Sepal Width')

# Plot the centroids
for centroid in best_centroids:
    plt.scatter(centroid[0], centroid[1], marker='x', color='red', s=100)

plt.title('BFO Clustering of Iris Dataset')
plt.show()