In [5]:
import numpy as np
import random
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

def fitness_feature_subset(mask, X_train, y_train, X_valid, y_valid):
    # mask: 1D numpy array of 0/1 of length = n_features
    # if no feature is selected, assign bad fitness
    if np.sum(mask) == 0:
        return 1.0  # worst (error) = 100%
    # select features
    Xtr = X_train[:, mask==1]
    Xval = X_valid[:, mask==1]
    # simple classifier
    clf = LogisticRegression(max_iter=1000, solver='liblinear')
    clf.fit(Xtr, y_train)
    ypred = clf.predict(Xval)
    acc = accuracy_score(y_valid, ypred)
    # we want to minimise fitness, so return (1-acc) + penalty for many features
    penalty = 0.01 * (np.sum(mask) / mask.shape[0])
    return (1.0 - acc) + penalty

def gwo_feature_selection(X_train, y_train, X_valid, y_valid,
                          n_wolves=20, max_iter=50):
    n_features = X_train.shape[1]
    # Initialize positions: continuous in [0,1], later threshold to binary
    wolves_pos = np.random.rand(n_wolves, n_features)
    wolves_fitness = np.zeros(n_wolves)
    # track best three
    alpha_pos = None; alpha_fit = float('inf')
    beta_pos = None; beta_fit = float('inf')
    delta_pos = None; delta_fit = float('inf')
    # main loop
    for iter in range(max_iter):
        # evaluate fitness
        for i in range(n_wolves):
            # convert to binary mask (threshold at 0.5)
            mask = (wolves_pos[i] > 0.5).astype(int)
            fit = fitness_feature_subset(mask, X_train, y_train, X_valid, y_valid)
            wolves_fitness[i] = fit
            # update alpha, beta, delta
            if fit < alpha_fit:
                delta_fit, delta_pos = beta_fit, beta_pos
                beta_fit, beta_pos = alpha_fit, alpha_pos
                alpha_fit, alpha_pos = fit, wolves_pos[i].copy()
            elif fit < beta_fit:
                delta_fit, delta_pos = beta_fit, beta_pos
                beta_fit, beta_pos = fit, wolves_pos[i].copy()
            elif fit < delta_fit:
                delta_fit, delta_pos = fit, wolves_pos[i].copy()
        # compute a parameter decreasing from 2→0
        a = 2 * (1 - iter / (max_iter - 1))
        # update positions of wolves
        for i in range(n_wolves):
            for j in range(n_features):
                r1 = random.random(); r2 = random.random()
                A1 = 2 * a * r1 - a
                C1 = 2 * r2
                D_alpha = abs(C1 * alpha_pos[j] - wolves_pos[i,j])
                X1 = alpha_pos[j] - A1 * D_alpha

                r1 = random.random(); r2 = random.random()
                A2 = 2 * a * r1 - a
                C2 = 2 * r2
                D_beta = abs(C2 * beta_pos[j] - wolves_pos[i,j])
                X2 = beta_pos[j] - A2 * D_beta

                r1 = random.random(); r2 = random.random()
                A3 = 2 * a * r1 - a
                C3 = 2 * r2
                D_delta = abs(C3 * delta_pos[j] - wolves_pos[i,j])
                X3 = delta_pos[j] - A3 * D_delta

                # new position is average of X1, X2, X3
                wolves_pos[i,j] = (X1 + X2 + X3) / 3.0
            # clip to [0,1]
            wolves_pos[i] = np.clip(wolves_pos[i], 0, 1)
        # optionally print progress
        if iter % 10 == 0 or iter == max_iter-1:
            print(f"Iter {iter} | alpha_fit = {alpha_fit:.4f}")
    # after finishing, return best mask & fitness
    best_mask = (alpha_pos > 0.5).astype(int)
    return best_mask, alpha_fit

if __name__ == "__main__":
    data = load_breast_cancer()
    X, y = data.data, data.target
    Xtr, Xval, ytr, yval = train_test_split(X, y, test_size=0.3, random_state=42)
    mask, fit = gwo_feature_selection(Xtr, ytr, Xval, yval,
                                      n_wolves=100, max_iter=100)
    print("Best fitness:", fit)
    print("Number of selected features:", np.sum(mask))
    print("Selected feature indices:", np.where(mask==1)[0])


Iter 0 | alpha_fit = 0.0164
Iter 10 | alpha_fit = 0.0075
Iter 20 | alpha_fit = 0.0075
Iter 30 | alpha_fit = 0.0075
Iter 40 | alpha_fit = 0.0075
Iter 50 | alpha_fit = 0.0075
Iter 60 | alpha_fit = 0.0075
Iter 70 | alpha_fit = 0.0075
Iter 80 | alpha_fit = 0.0075
Iter 90 | alpha_fit = 0.0075
Iter 99 | alpha_fit = 0.0075
Best fitness: 0.007514619883040991
Number of selected features: 5
Selected feature indices: [ 0  2  3 11 23]
