In [1]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

In [2]:
dataset = load_breast_cancer()
X = dataset.data
y = dataset.target
feature_names = dataset.feature_names

In [3]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Dataset loaded successfully with {X.shape[1]} features.")
print("-" * 50)

Dataset loaded successfully with 30 features.
--------------------------------------------------


In [4]:
n_particles = 3
max_iters = 10
w = 0.7   # inertia weight
c1 = 2.0  # personal best coefficient
c2 = 2.0  # global best coefficient
r1 = 0.5  # random number for personal best
r2 = 0.3  # random number for global best

n_features = X_train.shape[1]

In [5]:
def evaluate_features(particle_position, X_train, y_train, alpha=0.99):
    selected_features = particle_position > 0.5
    num_selected = np.sum(selected_features)

    if num_selected == 0:
        return 1.0

    accuracy = cross_val_score(DecisionTreeClassifier(random_state=42), X_train[:, selected_features], y_train, cv=5, n_jobs=-1).mean()

    score = (1 - accuracy)
    num_features = X_train.shape[1]

    return alpha * score + (1 - alpha) * (num_selected / num_features)

In [6]:
# Initialize particles' positions (continuous values between 0 and 1)
particles = np.random.uniform(0, 1, (n_particles, n_features))
velocities = np.random.uniform(-0.1, 0.1, (n_particles, n_features))

In [7]:
# Initialize personal bests (pbest)
personal_best_positions = particles.copy()
personal_best_scores = np.array([evaluate_features(p, X_train, y_train) for p in particles])

In [8]:
# Initialize global best (gbest)
global_best_idx = np.argmin(personal_best_scores)
global_best_position = personal_best_positions[global_best_idx].copy()
global_best_score = personal_best_scores[global_best_idx]

In [9]:
print("PSO Initialized...")
print(f"Initial Global Best Score: {global_best_score:.4f}")
print("-" * 50)

PSO Initialized...
Initial Global Best Score: 0.0815
--------------------------------------------------


In [10]:
# --- PSO Main Loop ---
for iteration in range(max_iters):
    for i in range(n_particles):
        # Update velocity
        velocities[i] = (
            w * velocities[i]
            + c1 * r1 * (personal_best_positions[i] - particles[i])
            + c2 * r2 * (global_best_position - particles[i])
        )

        # Update position
        particles[i] += velocities[i]

        # Ensure positions are within [0, 1]
        particles[i] = np.clip(particles[i], 0, 1)

        # Evaluate fitness
        current_fitness = evaluate_features(particles[i], X_train, y_train)

        # Update personal best (we want a SMALLER score)
        if current_fitness < personal_best_scores[i]:
            personal_best_scores[i] = current_fitness
            personal_best_positions[i] = particles[i].copy()

    # Update global best (we want a SMALLER score)
    best_particle_idx = np.argmin(personal_best_scores)
    if personal_best_scores[best_particle_idx] < global_best_score:
        global_best_score = personal_best_scores[best_particle_idx]
        global_best_position = personal_best_positions[best_particle_idx].copy()

    print(f"Iteration {iteration + 1}/{max_iters}: Best Score = {global_best_score:.4f}, Selected Features = {np.sum(global_best_position > 0.5)}")

Iteration 1/10: Best Score = 0.0684, Selected Features = 16
Iteration 2/10: Best Score = 0.0684, Selected Features = 16
Iteration 3/10: Best Score = 0.0684, Selected Features = 16
Iteration 4/10: Best Score = 0.0684, Selected Features = 16
Iteration 5/10: Best Score = 0.0684, Selected Features = 16
Iteration 6/10: Best Score = 0.0684, Selected Features = 16
Iteration 7/10: Best Score = 0.0684, Selected Features = 16
Iteration 8/10: Best Score = 0.0684, Selected Features = 16
Iteration 9/10: Best Score = 0.0684, Selected Features = 16
Iteration 10/10: Best Score = 0.0684, Selected Features = 16


In [11]:
print("-" * 50)
print("PSO Finished.")
print(f"Final Global Best Score: {global_best_score:.4f}")
print("-" * 50)

--------------------------------------------------
PSO Finished.
Final Global Best Score: 0.0684
--------------------------------------------------


In [12]:
# Determine the final set of selected features
final_selected_mask = global_best_position > 0.5
selected_feature_names = feature_names[final_selected_mask]

print(f"Number of selected features: {np.sum(final_selected_mask)}")
print("Selected features:", ", ".join(selected_feature_names))
print("-" * 50)

Number of selected features: 16
Selected features: mean texture, mean perimeter, mean compactness, mean fractal dimension, radius error, texture error, area error, smoothness error, compactness error, concavity error, concave points error, symmetry error, worst texture, worst concavity, worst symmetry, worst fractal dimension
--------------------------------------------------


In [13]:
# Train and evaluate a model using ONLY the selected features
print("Evaluating model with selected features...")
model_selected = DecisionTreeClassifier(random_state=42)
model_selected.fit(X_train[:, final_selected_mask], y_train)
predictions_selected = model_selected.predict(X_test[:, final_selected_mask])
accuracy_selected = accuracy_score(y_test, predictions_selected)

Evaluating model with selected features...


In [14]:
# Train and evaluate a model using ALL features for comparison
print("Evaluating model with all features...")
model_all = DecisionTreeClassifier(random_state=42)
model_all.fit(X_train, y_train)
predictions_all = model_all.predict(X_test)
accuracy_all = accuracy_score(y_test, predictions_all)

Evaluating model with all features...


In [15]:
print("\n--- Final Results ---")
print(f"Accuracy with selected features: {accuracy_selected:.4f}")
print(f"Accuracy with all features:      {accuracy_all:.4f}")


--- Final Results ---
Accuracy with selected features: 0.9561
Accuracy with all features:      0.9474
