In [7]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.metrics import pairwise_distances_argmin_min


iris = load_iris()
data = iris.data
num_data_points = data.shape[0]
num_features = data.shape[1]


population_size = 50
num_generations = 100
num_clusters = 3
mutation_rate = 0.1



def initialize_population():
    return np.random.randint(0, num_clusters, size=(population_size, num_data_points))



def calculate_fitness(population):
    distances = pairwise_distances_argmin_min(data, data[population], axis=1)[1]
    fitness = np.sum(distances)
    return fitness



def crossover(parent1, parent2):
    crossover_point = np.random.randint(1, num_data_points)
    child1 = np.concatenate([parent1[:crossover_point], parent2[crossover_point:]])
    child2 = np.concatenate([parent2[:crossover_point], parent1[crossover_point:]])
    return child1, child2




def mutate(chromosome):
    mutated_chromosome = chromosome.copy()
    for i in range(num_data_points):
        if np.random.rand() < mutation_rate:
            mutation_operation = np.random.choice(['label_swap', 'label_random'])
            if mutation_operation == 'label_swap':
                mutated_chromosome[i] = np.random.randint(num_clusters)
            elif mutation_operation == 'label_random':
                mutated_chromosome[i] = np.random.choice(np.setdiff1d(np.arange(num_clusters), mutated_chromosome[i]))
    return mutated_chromosome




def genetic_algorithm():
    population = initialize_population()

    for generation in range(num_generations):
        fitness_values = np.array([calculate_fitness(chromosome) for chromosome in population])
        selected_indices = np.argsort(fitness_values)[:population_size // 2]
        selected_population = population[selected_indices]

        new_population = []

        for _ in range(population_size // 2):
            parent1, parent2 = selected_population[np.random.choice(selected_population.shape[0], 2, replace=False)]
            child1, child2 = crossover(parent1, parent2)
            new_population.extend([mutate(child1), mutate(child2)])

        population = np.array(new_population)

    best_chromosome = population[np.argmin([calculate_fitness(chromosome) for chromosome in population])]
    return best_chromosome



best_solution = genetic_algorithm()

print("Final Clustering Result:")
print(best_solution)


Final Clustering Result:
[2 1 2 1 2 1 0 0 2 0 2 2 2 1 2 0 0 1 2 1 1 2 2 0 2 2 1 0 2 0 1 2 2 1 1 1 2
 0 1 2 1 2 1 0 2 0 0 2 0 1 1 0 2 1 0 0 1 0 1 1 0 0 0 0 1 2 0 1 1 2 2 0 2 1
 1 0 2 0 1 0 0 1 1 0 1 0 1 2 1 0 0 0 2 2 0 0 0 0 0 2 2 1 2 2 1 0 2 1 2 0 2
 1 1 0 0 0 0 1 2 1 2 0 1 0 0 0 2 0 1 2 1 0 2 1 1 1 2 1 2 2 1 1 1 2 0 2 0 1
 2 0]
