In [1]:
pip install numpy scikit-learn matplotlib



In [2]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Generate a synthetic dataset
X, y = make_classification(n_samples=100, n_features=10, n_informative=5, n_redundant=5, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Firefly Algorithm Parameters
n_fireflies = 10
max_gen = 20

alpha = 0.5  # Randomness strength
absorption_coefficient = 1.0

# Initialize fireflies randomly
dim = X_train.shape[1]
population = np.random.rand(n_fireflies, dim) > 0.5

def evaluate_firefly(firefly):
    # Select features based on firefly's position
    features_to_use = firefly.astype(bool)
    if np.sum(features_to_use) == 0:
        return 0  # Can't use zero features
    X_train_subset = X_train[:, features_to_use]
    X_test_subset = X_test[:, features_to_use]

    # Train a simple model
    model = LogisticRegression(max_iter=1000)
    model.fit(X_train_subset, y_train)
    predictions = model.predict(X_test_subset)
    return accuracy_score(y_test, predictions)

def move_firefly(fi, fj, beta0=1.0, gamma=1.0):
    """Move firefly i towards firefly j."""
    # Convert boolean arrays to integers
    fi_int = fi.astype(int)
    fj_int = fj.astype(int)

    r = np.linalg.norm(fi_int - fj_int)
    beta = beta0 * np.exp(-gamma * r ** 2)
    new_solution = fi_int + beta * (fj_int - fi_int) + alpha * (np.random.rand(dim) - 0.5)
    new_solution = np.clip(new_solution, 0, 1)
    return new_solution > 0.5

# Main loop of the Firefly Algorithm
for gen in range(max_gen):
    fitness = np.array([evaluate_firefly(ff) for ff in population])
    print(f"Generation {gen}, best fitness: {np.max(fitness)}")

    new_population = population.copy()
    for i in range(n_fireflies):
        for j in range(n_fireflies):
            if fitness[i] < fitness[j]:  # Move i towards j
                new_solution = move_firefly(population[i], population[j])
                new_fitness = evaluate_firefly(new_solution)
                if new_fitness > fitness[i]:
                    new_population[i] = new_solution
                    fitness[i] = new_fitness
    population = new_population

Generation 0, best fitness: 0.96
Generation 1, best fitness: 0.96
Generation 2, best fitness: 0.96
Generation 3, best fitness: 0.96
Generation 4, best fitness: 0.96
Generation 5, best fitness: 0.96
Generation 6, best fitness: 0.96
Generation 7, best fitness: 0.96
Generation 8, best fitness: 0.96
Generation 9, best fitness: 0.96
Generation 10, best fitness: 0.96
Generation 11, best fitness: 0.96
Generation 12, best fitness: 0.96
Generation 13, best fitness: 0.96
Generation 14, best fitness: 0.96
Generation 15, best fitness: 0.96
Generation 16, best fitness: 0.96
Generation 17, best fitness: 0.96
Generation 18, best fitness: 0.96
Generation 19, best fitness: 0.96


In [3]:
# Final evaluation
best_fitness_index = np.argmax([evaluate_firefly(ff) for ff in population])
best_firefly = population[best_fitness_index]
best_features_to_use = best_firefly.astype(bool)

# Training and evaluating the model with the selected features
X_train_best = X_train[:, best_features_to_use]
X_test_best = X_test[:, best_features_to_use]
best_model = LogisticRegression(max_iter=1000)
best_model.fit(X_train_best, y_train)
predictions_best = best_model.predict(X_test_best)
final_accuracy = accuracy_score(y_test, predictions_best)

print(f"Final Best Accuracy: {final_accuracy:.4f}")
print(f"Features Selected ({np.sum(best_features_to_use)} out of {dim}): {np.where(best_features_to_use)[0]}")

Final Best Accuracy: 0.9600
Features Selected (4 out of 10): [2 4 7 8]
