In [23]:
# genetic algorithm code for the feature selection and hyperparameter tuning

import numpy as np
import pandas as pd

class GeneticAlgorithm:
    def __init__(self, model, X_train, y_train, X_test, y_test, population_size=10, generations=10, mutation_rate=0.1):
        self.model = model
        self.X_train = X_train
        self.y_train = y_train
        self.X_test = X_test
        self.y_test = y_test
        self.population_size = population_size
        self.generations = generations
        self.mutation_rate = mutation_rate
        self.population = self.generate_population()
        self.fitness = self.calculate_fitness()
        self.best_individual = self.population[np.argmax(self.fitness)]*
        self.best_fitness = np.max(self.fitness)
    
    def generate_population(self):
        population = []
        for i in range(self.population_size):
            individual = np.random.choice([0, 1], size=self.X_train.shape[1])
            population.append(individual)
        return np.array(population)
    
    def calculate_fitness(self):
        fitness = []
        for individual in self.population:
            self.model.fit(self.X_train[:, individual==1], self.y_train)
            fitness.append(self.model.score(self.X_test[:, individual==1], self.y_test))
        return np.array(fitness)
    
    def selection(self):
        idx = np.random.choice(range(self.population_size), size=2, replace=False)
        return self.population[idx[np.argmax(self.fitness[idx])]]
    
    def crossover(self, parent1, parent2):
        crossover_point = np.random.randint(0, len(parent1))
        child1 = np.concatenate((parent1[:crossover_point], parent2[crossover_point:]))
        child2 = np.concatenate((parent2[:crossover_point], parent1[crossover_point:]))
        return child1, child2
    
    def mutation(self, individual):
        for i in range(len(individual)):
            if np.random.rand() < self.mutation_rate:
                individual[i] = 1 - individual[i]
        return individual
    
    def evolve(self):
        new_population = []
        for i in range(self.population_size):
            parent1 = self.selection()
            parent2 = self.selection()
            child1, child2 = self.crossover(parent1, parent2)
            child1 = self.mutation(child1)
            child2 = self.mutation(child2)
            new_population.append(child1)
            new_population.append(child2)
        self.population = np.array(new_population)
        self.fitness = self.calculate_fitness()
        
        if np.max(self.fitness) > self.best_fitness:
            self.best_individual = self.population[np.argmax(self.fitness)]
            self.best_fitness = np.max(self.fitness)
            
    def run(self):
        for i in range(self.generations):
            self.evolve()
            print(f'Generation {i+1} - Best Fitness: {self.best_fitness}')
        return self.best_individual, self.best_fitness

if __name__ == '__main__':
    from sklearn.datasets import load_breast_cancer
    from sklearn.model_selection import train_test_split
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import accuracy_score
    
    data = load_breast_cancer()
    X = data.data
    y = data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    model = RandomForestClassifier(n_estimators=100)
    ga = GeneticAlgorithm(model, X_train, y_train, X_test, y_test, population_size=10, generations=10, mutation_rate=0.1)
    best_individual, best_fitness = ga.run()
    
    model.fit(X_train[:, best_individual==1], y_train)
    y_pred = model.predict(X_test[:, best_individual==1])
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy: {accuracy}')
    
    print(f'Best Individual: {best_individual}')
    print(f'Best Fitness: {best_fitness}')
    print(f'Number of Features: {np.sum(best_individual)}')
    print(f'Selected Features: {data.feature_names[best_individual==1]}')
    print(f'Feature Importance: {model.feature_importances_}')  
    
    

Generation 1 - Best Fitness: 0.9912280701754386
Generation 2 - Best Fitness: 0.9912280701754386
Generation 3 - Best Fitness: 0.9912280701754386
Generation 4 - Best Fitness: 0.9912280701754386
Generation 5 - Best Fitness: 0.9912280701754386
Generation 6 - Best Fitness: 0.9912280701754386
Generation 7 - Best Fitness: 0.9912280701754386
Generation 8 - Best Fitness: 0.9912280701754386
Generation 9 - Best Fitness: 0.9912280701754386
Generation 10 - Best Fitness: 0.9912280701754386
Accuracy: 0.9824561403508771
Best Individual: [0 1 1 0 0 0 1 0 0 1 1 1 1 0 0 1 0 0 0 0 1 0 1 0 1 0 0 0 0 0]
Best Fitness: 0.9912280701754386
Number of Features: 11
Selected Features: ['mean texture' 'mean perimeter' 'mean concavity' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'compactness error'
 'worst radius' 'worst perimeter' 'worst smoothness']
Feature Importance: [0.05076262 0.13936769 0.14036749 0.01619833 0.04714998 0.01523065
 0.04662216 0.02025551 0.21407762 0.24900582 0.060