In [7]:
import numpy as np
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_breast_cancer

# ========== (1) Initialize Population ==========
def initialize_population(feature_count, pop_size):
    return np.random.randint(2, size=(pop_size, feature_count))

# ========== (2) Fitness Function ==========
def fitness_function(population, X, y):
    fitness_scores = np.zeros(population.shape[0])
    
    for i, chromosome in enumerate(population):
        selected_features = np.where(chromosome == 1)[0]
        if selected_features.size == 0:
            continue  # Avoid empty feature selection
        
        X_selected = X[:, selected_features]
        X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)
        
        model = RandomForestClassifier(n_estimators=50, random_state=42)
        model.fit(X_train, y_train)
        predictions = model.predict(X_test)
        
        fitness_scores[i] = accuracy_score(y_test, predictions)
    
    return fitness_scores

# ========== (3) Selection (Roulette Wheel) ==========
def select_parents(population, fitness_scores):
    total_fitness = fitness_scores.sum()
    if total_fitness == 0:
        return population[np.random.choice(len(population), 2, replace=False)]
    
    selection_probs = fitness_scores / total_fitness
    return population[np.random.choice(len(population), 2, replace=False, p=selection_probs)]

# ========== (4) Crossover (Single Point) ==========
def crossover(parent1, parent2):
    point = np.random.randint(1, len(parent1))
    child1 = np.concatenate((parent1[:point], parent2[point:]))
    child2 = np.concatenate((parent2[:point], parent1[point:]))
    return child1, child2

# ========== (5) Mutation (Bit Flip) ==========
def mutate(population, mutation_rate):
    mutation_mask = np.random.rand(*population.shape) < mutation_rate
    return np.logical_xor(population, mutation_mask).astype(int)

# ========== (6) Genetic Algorithm Execution ==========
def genetic_algorithm(X, y, feature_count, pop_size=10, generations=20, mutation_rate=0.1):
    population = initialize_population(feature_count, pop_size)
    
    for generation in range(generations):
        fitness_scores = fitness_function(population, X, y)
        
        new_population = []
        for _ in range(pop_size // 2):
            parent1, parent2 = select_parents(population, fitness_scores)
            child1, child2 = crossover(parent1, parent2)
            new_population.extend([child1, child2])
        
        population = mutate(np.array(new_population), mutation_rate)
        best_index = np.argmax(fitness_scores)
        print(f"Generation {generation + 1}: Best Fitness = {fitness_scores[best_index]:.4f}")
    
    best_solution = population[np.argmax(fitness_scores)]
    print("\nBest Feature Subset:", best_solution)
    return best_solution

# ========== (7) Run the Genetic Algorithm ==========
if __name__ == "__main__":
    data = load_breast_cancer()
    X, y = data.data, data.target
    feature_count = X.shape[1]
    
    best_features = genetic_algorithm(X, y, feature_count)


Generation 1: Best Fitness = 0.9825
Generation 2: Best Fitness = 0.9825
Generation 3: Best Fitness = 0.9912
Generation 4: Best Fitness = 0.9737
Generation 5: Best Fitness = 0.9912
Generation 6: Best Fitness = 0.9825
Generation 7: Best Fitness = 0.9825
Generation 8: Best Fitness = 0.9825
Generation 9: Best Fitness = 1.0000
Generation 10: Best Fitness = 0.9912
Generation 11: Best Fitness = 0.9912
Generation 12: Best Fitness = 0.9825
Generation 13: Best Fitness = 0.9825
Generation 14: Best Fitness = 0.9825
Generation 15: Best Fitness = 0.9825
Generation 16: Best Fitness = 0.9737
Generation 17: Best Fitness = 0.9825
Generation 18: Best Fitness = 0.9737
Generation 19: Best Fitness = 0.9825
Generation 20: Best Fitness = 0.9737

Best Feature Subset: [1 0 1 1 0 0 0 1 0 0 0 1 1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 1 0 1]
