In [3]:
#Support vector machine
import random
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from deap import base, creator, tools, algorithms

# Step 1: Load Breast Cancer Dataset
data = load_breast_cancer()
X = pd.DataFrame(data['data'], columns=data['feature_names'])
y = data['target']

# Step 2: Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 3: Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# Step 4: Define Genetic Algorithm Setup
# Define individual fitness function (accuracy score)
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)


# Create an individual (a list of 0s and 1s representing feature inclusion/exclusion)
def create_individual():
    return creator.Individual([random.randint(0, 1) for _ in range(X_train.shape[1])])


# Evaluate the fitness of the individual (train an SVM model using the selected features)
def evaluate(individual):
    # Extract the features selected by the individual (1 means the feature is selected)
    selected_features = [i for i in range(len(individual)) if individual[i] == 1]

    if len(selected_features) == 0:
        return 0.0,  # Return 0 accuracy if no features are selected

    X_train_selected = X_train[:, selected_features]
    X_test_selected = X_test[:, selected_features]

    # Train an SVM classifier
    svm_model = SVC(kernel='linear', random_state=42)
    svm_model.fit(X_train_selected, y_train)

    # Make predictions and compute accuracy
    y_pred = svm_model.predict(X_test_selected)
    accuracy = accuracy_score(y_test, y_pred)

    return accuracy,


# Step 5: Set up Genetic Algorithm toolbox
toolbox = base.Toolbox()
toolbox.register("individual", create_individual)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("evaluate", evaluate)

# Step 6: Genetic Algorithm Configuration
population_size = 50
generations = 40
crossover_probability = 0.5
mutation_probability = 0.2

# Step 7: Initialize population
population = toolbox.population(n=population_size)

# Step 8: Run Genetic Algorithm
result_population, log = algorithms.eaSimple(
    population, toolbox,
    cxpb=crossover_probability,
    mutpb=mutation_probability,
    ngen=generations,
    verbose=True
)

# Step 9: Get the best individual (best feature subset)
best_individual = tools.selBest(result_population, k=1)[0]
print(f"Best individual (selected features): {best_individual}")
selected_features = [i for i, bit in enumerate(best_individual) if bit == 1]
print(f"Selected features (indices): {selected_features}")

# Step 10: Train final model on selected features and evaluate performance
X_train_selected = X_train[:, selected_features]
X_test_selected = X_test[:, selected_features]

final_model = SVC(kernel='linear', random_state=42)
final_model.fit(X_train_selected, y_train)
y_pred = final_model.predict(X_test_selected)
final_accuracy = accuracy_score(y_test, y_pred)

print(f"Final accuracy with selected features: {final_accuracy:.2f}")


gen	nevals
0  	50    
1  	35    
2  	39    
3  	32    
4  	32    
5  	26    
6  	31    
7  	26    
8  	25    
9  	41    
10 	36    
11 	25    
12 	30    
13 	26    
14 	32    
15 	28    
16 	24    
17 	27    
18 	29    
19 	31    
20 	20    
21 	36    
22 	29    
23 	37    
24 	28    
25 	22    
26 	32    
27 	28    
28 	33    
29 	28    
30 	29    
31 	24    
32 	35    
33 	27    
34 	22    
35 	32    
36 	33    
37 	27    
38 	40    
39 	25    
40 	27    
Best individual (selected features): [0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1]
Selected features (indices): [1, 3, 4, 6, 8, 9, 10, 12, 13, 15, 16, 20, 22, 23, 24, 27, 29]
Final accuracy with selected features: 0.99
