In [3]:
import numpy as np
from tqdm import tqdm
from joblib import Parallel, delayed
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.datasets import load_digits

classifiers = {
    'KNN': KNeighborsClassifier(),
    'Naive Bayes': GaussianNB(),
    'SVC': SVC()
}
def fitness(selected_features, classifier_name='KNN'):
    selected_features = np.array(selected_features, dtype=bool)
    if np.sum(selected_features) == 0:
        return 1  
    X_train_selected = X_train[:, selected_features]
    X_test_selected = X_test[:, selected_features]
    if X_train_selected.shape[1] == 0:
        return 1 
    classifier = classifiers.get(classifier_name, KNeighborsClassifier())  
    classifier.fit(X_train_selected, Y_train)
    preds = classifier.predict(X_test_selected)
    accuracy = accuracy_score(Y_test, preds)
    f1 = f1_score(Y_test, preds, average='weighted')
    return 1 - accuracy  
def PSO(fitness, classifier_name='KNN', n_particles=30, n_dimension=2, n_iterations=100, bounds=None, w=0.8, c1=0.6, c2=0.3):
    """"
    PSO para otimização de uma função multidimensional.

    Parâmetros:
    - fitness_function: Função a ser minimizada.
    - n_particles: Número de partículas no enxame.
    - n_dimension: Número de dimensões do problema.
    - n_iterations: Número de iterações.
    - bounds: Limites (mínimo, máximo) para cada dimensão como uma lista de tuplas [(min1, max1), ..., (minD, maxD)].
    - w: Fator de inércia.
    - c1, c2: Coeficientes de aceleração.

    Retorna:
    - global_best: Melhor solução encontrada (minimização).
    - global_best_fitness: Valor da função objetivo na melhor solução (minimização).
    """
    # Inicializar partículas e suas velocidades
    if bounds is None:
        bounds = [(-10, 10)] * n_dimension
    
    # particles: cada linha i representa uma partícula de dimensão n_dimension
    particles = np.random.uniform([b[0] for b in bounds], [b[1] for b in bounds], (n_particles, n_dimension))
    velocities = np.zeros_like(particles)
    personal_best = particles.copy()
    personal_best_fitness = np.apply_along_axis(fitness, 1, personal_best, classifier_name)
    global_best = particles[np.argmin(personal_best_fitness)]
    global_best_fitness = np.min(personal_best_fitness)
    
    # Loop principal
    #for t in range(n_iterations):
    for t in tqdm(range(n_iterations), desc="Iterações"):

        r1, r2 = np.random.rand(n_particles, n_dimension), np.random.rand(n_particles, n_dimension)

        # Atualizar a velocidade da partícula
        velocities = w * velocities + c1 * r1 * (personal_best - particles) + c2 * r2 * (global_best - particles)
        
        # Atualizar a posição da partícula
        particles += velocities
        
        # Respeitar os limites de cada dimensão
        particles = np.clip(particles, [b[0] for b in bounds], [b[1] for b in bounds])

        fitness_values = Parallel(n_jobs=-1)(delayed(fitness)(p, classifier_name) for p in particles)

        for i in range(n_particles):
            current_fitness = fitness_values[i]
            if current_fitness < personal_best_fitness[i]:
                personal_best[i] = particles[i]
                personal_best_fitness[i] = current_fitness

            if current_fitness < global_best_fitness:
                global_best = particles[i]
                global_best_fitness = current_fitness

    return global_best, global_best_fitness

digits = load_digits()
X = digits.data
Y = digits.target
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42)
n_particles = 50
n_dimension = X_train.shape[1]
n_iterations = 100

for classifier_name in classifiers.keys():
    print(f"Executando PSO para classificador: {classifier_name}")
    
    best_solution, best_fitness = PSO(fitness, classifier_name, n_particles, n_dimension, n_iterations)
    num_features_selected = np.sum(best_solution)
    print(f"Melhor solução (seleção de características): {best_solution}")
    print(f"Fitness da melhor solução: {best_fitness}")
    print(f"Número de características selecionadas: {num_features_selected}")
    X_train_selected = X_train[:, best_solution.astype(bool)]
    X_test_selected = X_test[:, best_solution.astype(bool)]
    classifier = classifiers[classifier_name]
    classifier.fit(X_train_selected, Y_train)
    preds = classifier.predict(X_test_selected)
    accuracy = accuracy_score(Y_test, preds)
    f1 = f1_score(Y_test, preds, average='weighted')

    print(f"Acurácia final: {accuracy}")
    print(f"F1-Score final: {f1}")
    print('-' * 40)


Executando PSO para classificador: KNN


Iterações: 100%|██████████| 100/100 [00:25<00:00,  3.94it/s]


Melhor solução (seleção de características): [ 2.24466424e+00  1.03823800e+00  4.49995045e+00 -2.98127050e+00
  6.96008715e+00 -9.36181050e+00  5.10341545e+00 -9.02501351e+00
  6.25876236e+00 -9.86649083e+00 -9.89065772e+00 -4.06121201e+00
 -1.56067945e+00 -5.98586487e+00 -6.89800317e+00  4.29046921e+00
 -4.57199165e+00 -9.65960542e+00 -2.58874573e+00 -9.76480964e+00
 -2.03849451e+00  9.17249139e+00  3.70247448e-01 -1.94547318e+00
 -3.40295923e+00  2.60143831e+00  2.14786766e+00 -7.98494094e+00
  5.21102181e+00 -9.69240030e+00 -2.37930362e+00  3.79950978e+00
 -4.13711222e+00 -9.33840562e+00  9.24468633e+00 -1.09065083e+00
  3.05569086e+00 -9.60698468e+00 -9.72185245e+00  9.44462536e+00
  6.92911913e+00 -9.51589459e+00 -5.47353034e+00  2.55482676e+00
  3.19572652e+00  5.80873781e+00 -8.29482371e+00  9.53287384e+00
  4.09956888e+00  1.87089680e+00 -3.41027034e+00 -4.35999766e-01
 -9.14643541e+00 -9.27049633e+00  8.84044703e+00 -6.62307080e+00
 -3.97507431e+00 -1.66707580e-01 -6.58325812e

Iterações: 100%|██████████| 100/100 [00:16<00:00,  6.06it/s]


Melhor solução (seleção de características): [-0.78377146  6.88674908 -7.5944841  -6.86933971  5.14411129 -2.11092685
  9.72308052  3.00430641 -2.51875442 -0.51480254  1.77490623  8.19414802
 -8.26140202 -3.39954728  5.62105538  2.84840829 -9.77398637  3.02551072
  3.23535874 -8.5582371   7.87404196  2.07135488 -8.47372804  0.14187634
  6.96200596 -5.4197771  -4.89988697  3.0149884  -5.86661314  8.15813498
  6.91931643 -1.66661712  1.95343973  9.41147131 -3.02114694  9.81931803
 -7.25903906  9.54754446 -4.0218263  -1.12713805  1.16684963 -7.28473544
 -9.74509221 -1.12242169  6.27312538 -3.64553942 -5.88186327  1.36768751
 -8.87607626 -0.32679761  4.00434507 -2.06733924  3.97941853  5.77244485
  6.60360827  2.43896708 -8.01002854 -1.11807298  8.28322346 -0.83877044
  2.92329198 -6.76557071  0.38403036 -5.90326315]
Fitness da melhor solução: 0.14814814814814814
Número de características selecionadas: 4.801523771827593
Acurácia final: 0.8518518518518519
F1-Score final: 0.8541108658074065


Iterações: 100%|██████████| 100/100 [00:50<00:00,  1.98it/s]

Melhor solução (seleção de características): [-7.36290932 -3.44236775 -6.74570503 -7.84126361 -2.40842312  7.53083714
  8.90274302  2.99983009  9.50129167 -3.03994871  3.59768519 -1.04817294
 -3.70946714 -2.02050317 -5.29592374 -4.93752176 -2.87369639  5.89298128
  7.19970231  3.12233293 -1.09434401  4.45813197  1.06576429 -5.41376423
  0.41382195 -9.2838745   5.81918009 -6.34290854 -9.49782812  5.07882427
 -3.33017403 -7.188055   -3.48037158  6.1481185  -3.103846   -3.89846696
 -7.88895379  1.97809627  6.3691166  -9.9541451   3.12911251  9.17787986
  2.72764792  5.4679014  -7.28123722  9.79052078  4.64810568  1.21662072
  2.23049961 -1.64142166  6.37104781 -1.20077882 -9.15622743 -1.12484059
 -1.40587825  4.77659163  3.92609399 -3.84575531 -4.596822    5.34487145
 -3.03458986 -9.44505789  6.51522592 -1.85311033]
Fitness da melhor solução: 0.012962962962962954
Número de características selecionadas: -20.387777084379067
Acurácia final: 0.987037037037037
F1-Score final: 0.987030981409714


