In [1]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
# CREATE SAMPLE DATASET
print("Creating sample dataset...")
np.random.seed(42)
n_samples = 200
n_features = 5

# Create synthetic data where features 1, 3, 4 are most important
X = np.random.randn(n_samples, n_features)
y = (X[:, 0] + X[:, 2] * 1.5 + X[:, 3] * 2 + np.random.randn(n_samples) * 0.5) > 0
y = y.astype(int)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

print(f"Dataset shape: {X.shape}")
print(f"Training samples: {len(X_train)}, Test samples: {len(X_test)}")

Creating sample dataset...
Dataset shape: (200, 5)
Training samples: 140, Test samples: 60


In [3]:
# PSO PARAMETERS
n_particles = 3
max_iterations = 10
w = 0.7   # Inertia weight
c1 = 2.0  # Cognitive coefficient
c2 = 2.0  # Social coefficient


In [4]:
# INITIALIZE PARTICLES (from slides)

particles = np.array([
    [1, 0, 1, 0, 1],
    [0, 1, 1, 0, 0],
    [1, 1, 0, 1, 0]
])

velocities = np.array([
    [0.2, -0.1, 0.3, -0.2, 0.1],
    [-0.1, 0.2, 0.1, -0.3, -0.2],
    [0.1, 0.1, -0.2, 0.3, -0.1]
], dtype=float)

# Personal best
pbest_positions = particles.copy()
pbest_fitness = np.zeros(n_particles)

# Global best
gbest_position = None
gbest_fitness = -np.inf

In [5]:
# HELPER FUNCTIONS (inline)

sigmoid = lambda x: 1 / (1 + np.exp(-np.clip(x, -500, 500)))


# CALCULATE INITIAL FITNESS

print("\n" + "="*70)
print("Initial particles:")
print("="*70)

for i in range(n_particles):
    position = particles[i]
    selected_features = np.where(position == 1)[0]
    
    if len(selected_features) == 0:
        fitness = 0.0
    else:
        X_train_selected = X_train[:, selected_features]
        X_test_selected = X_test[:, selected_features]
        
        clf = DecisionTreeClassifier(random_state=42, max_depth=5)
        clf.fit(X_train_selected, y_train)
        y_pred = clf.predict(X_test_selected)
        fitness = accuracy_score(y_test, y_pred)
    
    pbest_fitness[i] = fitness
    
    print(f"Particle {i+1}: {list(particles[i])} fitness: {fitness:.2f}")
    
    if fitness > gbest_fitness:
        gbest_fitness = fitness
        gbest_position = particles[i].copy()

selected = [j+1 for j in range(n_features) if gbest_position[j] == 1]
print(f"\nInitial global best: {list(gbest_position)} ({gbest_fitness:.2f})")
print(f"Selected features: {selected}")



Initial particles:
Particle 1: [1, 0, 1, 0, 1] fitness: 0.55
Particle 2: [0, 1, 1, 0, 0] fitness: 0.53
Particle 3: [1, 1, 0, 1, 0] fitness: 0.67

Initial global best: [1, 1, 0, 1, 0] (0.67)
Selected features: [1, 2, 4]


In [6]:
# PSO MAIN LOOP

for iteration in range(max_iterations):
    print(f"\nIteration {iteration+1}: global best remains: {list(gbest_position)} ({gbest_fitness:.2f})")
    print("Particles this iteration:")
    
    for i in range(n_particles):
        # Generate random coefficients
        r1 = np.random.rand()
        r2 = np.random.rand()
        
        # Update velocity
        inertia = w * velocities[i]
        cognitive = c1 * r1 * (pbest_positions[i] - particles[i])
        social = c2 * r2 * (gbest_position - particles[i])
        velocities[i] = inertia + cognitive + social
        
        # Update position using sigmoid
        sigmoid_values = sigmoid(velocities[i])
        particles[i] = (np.random.rand(n_features) < sigmoid_values).astype(int)
        
        # Calculate fitness
        position = particles[i]
        selected_features = np.where(position == 1)[0]
        
        if len(selected_features) == 0:
            fitness = 0.0
        else:
            X_train_selected = X_train[:, selected_features]
            X_test_selected = X_test[:, selected_features]
            
            clf = DecisionTreeClassifier(random_state=42, max_depth=5)
            clf.fit(X_train_selected, y_train)
            y_pred = clf.predict(X_test_selected)
            fitness = accuracy_score(y_test, y_pred)
        
        # Update personal best
        if fitness > pbest_fitness[i]:
            pbest_fitness[i] = fitness
            pbest_positions[i] = particles[i].copy()
        
        # Update global best
        if fitness > gbest_fitness:
            gbest_fitness = fitness
            gbest_position = particles[i].copy()
        
        print(f"Particle {i+1}: {list(particles[i])} fitness: {fitness:.2f}")



Iteration 1: global best remains: [1, 1, 0, 1, 0] (0.67)
Particles this iteration:
Particle 1: [0, 1, 0, 0, 1] fitness: 0.47
Particle 2: [1, 0, 0, 1, 1] fitness: 0.62
Particle 3: [0, 1, 0, 1, 0] fitness: 0.65

Iteration 2: global best remains: [1, 1, 0, 1, 0] (0.67)
Particles this iteration:
Particle 1: [1, 0, 0, 1, 1] fitness: 0.62
Particle 2: [1, 1, 1, 1, 0] fitness: 0.80
Particle 3: [1, 0, 1, 1, 0] fitness: 0.77

Iteration 3: global best remains: [1, 1, 1, 1, 0] (0.80)
Particles this iteration:
Particle 1: [1, 1, 1, 1, 0] fitness: 0.80
Particle 2: [0, 1, 1, 1, 0] fitness: 0.73
Particle 3: [1, 0, 1, 0, 0] fitness: 0.58

Iteration 4: global best remains: [1, 1, 1, 1, 0] (0.80)
Particles this iteration:
Particle 1: [1, 1, 1, 0, 0] fitness: 0.62
Particle 2: [1, 1, 0, 0, 1] fitness: 0.53
Particle 3: [1, 1, 1, 1, 1] fitness: 0.77

Iteration 5: global best remains: [1, 1, 1, 1, 0] (0.80)
Particles this iteration:
Particle 1: [0, 1, 1, 1, 0] fitness: 0.73
Particle 2: [1, 1, 1, 1, 0] fitnes

In [7]:
# FINAL RESULTS

print(f"\nFinal global best after {max_iterations} iterations: {list(gbest_position)} ({gbest_fitness:.2f})")
selected = [j+1 for j in range(n_features) if gbest_position[j] == 1]
print(f"Selected features: {selected}")
print(f"Number of features selected: {np.sum(gbest_position)}/{n_features}")



Final global best after 10 iterations: [1, 1, 1, 1, 0] (0.80)
Selected features: [1, 2, 3, 4]
Number of features selected: 4/5


In [8]:
# COMPARISON WITH ALL FEATURES

print("\n" + "="*70)
print("COMPARISON")
print("="*70)
clf_all = DecisionTreeClassifier(random_state=42, max_depth=5)
clf_all.fit(X_train, y_train)
y_pred_all = clf_all.predict(X_test)
accuracy_all = accuracy_score(y_test, y_pred_all)

print(f"Accuracy with ALL features: {accuracy_all:.2f}")
print(f"Accuracy with SELECTED features: {gbest_fitness:.2f}")
print(f"Difference: {(gbest_fitness - accuracy_all):.2f}")


COMPARISON
Accuracy with ALL features: 0.77
Accuracy with SELECTED features: 0.80
Difference: 0.03
