In [6]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score


# Generating a dataset with 5 features
X, y = make_classification(n_samples=100, n_features=5, n_informative=3, 
                           n_redundant=2, random_state=42)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# PSO Parameters
num_particles = 10
num_iterations = 5
num_features = 5

w = 0.7   # Inertia weight
c1 = 2    # Cognitive constant
c2 = 2    # Social constant
r1 = 0.5  # random number 1 
r2 = 0.3  # random number 2 

def sigmoid(x):
    """Converts velocity to probability for binary selection."""
    return 1 / (1 + np.exp(-x))

def fitness_function(selected_features_indices):
    """
    Trains a Decision Tree using only selected features and returns accuracy.
    """
   
    if len(selected_features_indices) == 0:
        return 0.0
    
 
    X_train_sub = X_train[:, selected_features_indices]
    X_test_sub = X_test[:, selected_features_indices]
    
    clf = DecisionTreeClassifier(random_state=42)
    clf.fit(X_train_sub, y_train)
    
    preds = clf.predict(X_test_sub)
    return accuracy_score(y_test, preds)


particles_position = np.random.choice([0, 1], size=(num_particles, num_features))

particles_velocity = np.random.uniform(-1, 1, size=(num_particles, num_features))

pbest_position = particles_position.copy()
pbest_fitness = np.zeros(num_particles)

# Global Best (gbest)
gbest_position = np.zeros(num_features)
gbest_fitness = 0.0


for i in range(num_particles):
   
    indices = np.where(particles_position[i] == 1)[0]
    score = fitness_function(indices)
    
    pbest_fitness[i] = score
    if score > gbest_fitness:
        gbest_fitness = score
        gbest_position = particles_position[i].copy()


print(f"{'Iter':<5} | {'Best Accuracy':<15} | {'Selected Features'}")
print("-" * 50)

for t in range(num_iterations):
    for i in range(num_particles):
       
        # Formula: v_new = w*v + c1*r1*(pbest - x) + c2*r2*(gbest - x)
        
        term_inertia = w * particles_velocity[i]
        term_cognitive = c1 * r1 * (pbest_position[i] - particles_position[i])
        term_social = c2 * r2 * (gbest_position - particles_position[i])
        
        new_velocity = term_inertia + term_cognitive + term_social
        particles_velocity[i] = new_velocity
        
        
        # We use sigmoid of velocity to determine probability of position being 1
        probs = sigmoid(new_velocity)
        
        # In standard BPSO, we compare prob to a random number.
        # Here we threshold at 0.5 for a deterministic example, 
        # or use np.random.rand() if you want stochastic behavior.
        # Using random comparison as is standard for BPSO:
        random_thresholds = np.random.rand(num_features) 
        particles_position[i] = (probs > random_thresholds).astype(int)
        
       
        indices = np.where(particles_position[i] == 1)[0]
        current_fitness = fitness_function(indices)
        
        # Update Personal Best
        if current_fitness > pbest_fitness[i]:
            pbest_fitness[i] = current_fitness
            pbest_position[i] = particles_position[i].copy()
            
            # Update Global Best
            if current_fitness > gbest_fitness:
                gbest_fitness = current_fitness
                gbest_position = particles_position[i].copy()
    
    print(f"{t+1:<5} | {gbest_fitness:.4f}          | {gbest_position}")

#Final Result
selected_indices = np.where(gbest_position == 1)[0]
print("\n" + "="*30)
print("FINAL RESULT")
print("="*30)
print(f"Features Selected Indices: {selected_indices}")
print(f"Number of Features: {len(selected_indices)} out of {num_features}")
print(f"Final Accuracy: {gbest_fitness:.4f}")

Iter  | Best Accuracy   | Selected Features
--------------------------------------------------
1     | 0.8333          | [0 1 1 0 1]
2     | 0.8333          | [0 1 1 0 1]
3     | 0.8333          | [0 1 1 0 1]
4     | 0.8333          | [0 1 1 0 1]
5     | 0.8667          | [0 1 1 1 1]

FINAL RESULT
Features Selected Indices: [1 2 3 4]
Number of Features: 4 out of 5
Final Accuracy: 0.8667
