In [None]:
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.ensemble import RandomForestClassifier

# Utility Functions

In [None]:
import numpy as np

# Load Training data
features = np.load('/content/drive/MyDrive/Datasets/UDIAT/Fused/features.npy')
labels = np.load('/content/drive/MyDrive/Datasets/UDIAT/Fused/labels.npy')

from sklearn.model_selection import train_test_split
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size=0.3, random_state=42)

clf = RandomForestClassifier(n_estimators=200, random_state=42)

In [None]:
# Plot Confusion Matrix
from sklearn.metrics import confusion_matrix
def plot_confusion_matrix(y_true, y_pred, method_name):

    # Create a confusion matrix
    y_pred_binary = y_pred.round()

    cm = confusion_matrix(y_true, y_pred_binary)

    # Plot the confusion matrix as a heatmap
    plt.imshow(cm, cmap=plt.cm.Blues)
    plt.ylabel('True label', fontsize=14)
    plt.xlabel('Predicted label', fontsize=14)
    plt.xticks([0, 1], ['Benign', 'Malignant'], fontsize=14)
    plt.yticks([0, 1], ['Benign', 'Malignant'], fontsize=14)
    plt.colorbar()

    # Add the values inside the cells
    for i in range(2):
        for j in range(2):
            plt.text(j, i, cm[i, j],
                     fontsize=20,  # specify the desired font size
                     horizontalalignment='center',
                     color='black')
    # Show the plot
    name = method_name + "Confusion_Matrix.png"
    plt.savefig(name, dpi=300)
    plt.show()

In [None]:
# Plot ROC Curve
from sklearn.metrics import roc_curve, auc
def plot_roc_curve(y_true, y_pred, method_name):
    # Calculate the false positive rate and true positive rate for the ROC curve
    fpr, tpr, thresholds = roc_curve(y_true, y_pred)
    roc_auc = auc(fpr, tpr)

    # Plot the ROC curve
    plt.figure()
    plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.4f)' % roc_auc)
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([-0.01, 1.0])
    plt.ylim([0, 1.05])
    plt.xlabel('False Positive Rate', fontsize=14)
    plt.ylabel('True Positive Rate', fontsize=14)
    plt.title('Receiver operating characteristic', fontsize=14)
    plt.legend(loc="lower right", fontsize=14)
    name = method_name + "ROC_Curve.png"
    plt.savefig(name, dpi=300)
    plt.show()

# Feature Selection With GA

In [None]:
pip install sklearn-genetic-opt

## Train GA

In [None]:
from sklearn_genetic import GAFeatureSelectionCV
from sklearn_genetic.plots import plot_fitness_evolution
method_name = "GA"
evolved_estimator = GAFeatureSelectionCV(
    estimator=clf,
    cv=5,
    scoring="f1",
    population_size=20,
    generations=10,
    n_jobs=-1,
    verbose=True,
    keep_top_k=2,
    elitism=True,
)

In [None]:
# Fit GA on Training Features
start = datetime.now()
evolved_estimator.fit(train_features, train_labels)
stop = datetime.now()

In [None]:
# Print the Training Time

GA_training_time = stop - start
print('GA training time is :', GA_training_time)

In [None]:
# Get the best selected indices
selected_feature_indices = evolved_estimator.best_features_

## Evaluate on Test Data

In [None]:
selected_train_features = train_features[:, selected_feature_indices]
selected_test_features = test_features[:, selected_feature_indices]

In [None]:
# Check the shape of the feature arrays
print("Shape of train_features:", selected_train_features.shape)
print("Shape of test_features:", selected_test_features.shape)

In [None]:
clf.fit(selected_train_features, train_labels)

predictions = clf.predict(selected_test_features)

## Plot Results

In [None]:
# Calculate scores
y_pred = predictions
accuracy = accuracy_score(test_labels, y_pred)
sensitivity = recall_score(test_labels, y_pred)
specificity = recall_score(test_labels, y_pred, pos_label=0)
f1 = f1_score(test_labels, y_pred)
roc = roc_auc_score(test_labels, y_pred)

In [None]:
# Print the results
print("scores")
print("==================================================")
print("Accuracy score: %.4f" % (accuracy))
print("Sensitivity score: %.4f" % (sensitivity))
print("Specificity score: %.4f" % (specificity))
print("F1 score: %.4f" % (f1))
print("roc_auc score: %.4f" % (roc))
print("==================================================")

In [None]:
plot_confusion_matrix(test_labels, y_pred, method_name)

In [None]:
plot_roc_curve(test_labels, y_pred, method_name)

# Feature Selection With PSO

In [None]:
pip install pyswarms

## Train PSO

In [None]:
import pyswarms as ps
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
method_name = "PSO"

In [None]:
# Define a fitness function to be minimized
def fitness_function(features):
    # Convert the selected features to binary values (0 or 1)
    selected_features = (features > 0.5).astype(int)

    # Get the indices of selected features
    selected_indices = np.where(selected_features)[0]

    selected_features = train_features[:, selected_indices]

    # Split the data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(selected_features, train_labels, test_size=0.2, random_state=42)

    # Create a classifier (You can choose any classifier you like)
    classifier = RandomForestClassifier(n_estimators=100, random_state=42)

    # Train the classifier
    classifier.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = classifier.predict(X_test)

    # Calculate f1 score as the fitness value
    f1 = f1_score(y_test, y_pred)

    # Since PSO tries to minimize the fitness function, we return 1 - f1
    return 1 - f1

In [None]:
# Define the number of features in your dataset
num_features = train_features.shape[1]

In [None]:
# Initialize the PSO optimizer
options = {'c1': 0.5, 'c2': 0.3, 'w': 0.9}
optimizer = ps.single.GlobalBestPSO(n_particles=20, dimensions=num_features, options=options)

In [None]:
# Perform feature selection using PSO
start = datetime.now()
cost, best_solution= optimizer.optimize(fitness_function, iters=100)
stop = datetime.now()

In [None]:
# Print the Training Time

PSO_training_time = stop - start
print('PSO training time is :', PSO_training_time)

In [None]:
# Get the selected feature indices from the best solution

# Convert the selected features to binary values (0 or 1)
selected_features = (best_solution> 0.5).astype(int)

# Get the indices of selected features
selected_feature_indices = np.where(selected_features)[0]

In [None]:
print("Selected Feature Indices:", selected_feature_indices)
print("Shape of selected_feature_indices:", selected_feature_indices.shape)

## Evaluate on Test Data

In [None]:
selected_train_features = train_features[:, selected_feature_indices]
selected_test_features = test_features[:, selected_feature_indices]

In [None]:
# Check the shape of the feature arrays
print("Shape of train_features:", train_features.shape)
print("Shape of train labels:", train_labels.shape)

In [None]:
# Check the shape of the feature arrays
print("Shape of train_features:", selected_train_features.shape)
print("Shape of test_features:", selected_test_features.shape)

In [None]:
clf.fit(selected_train_features, train_labels)

predictions = clf.predict(selected_test_features)

## Plot Results

In [None]:
# Calculate scores
y_pred = predictions
accuracy = accuracy_score(test_labels, y_pred)
sensitivity = recall_score(test_labels, y_pred)
specificity = recall_score(test_labels, y_pred, pos_label=0)
f1 = f1_score(test_labels, y_pred)
roc = roc_auc_score(test_labels, y_pred)

In [None]:
# Print the results
print("scores")
print("==================================================")
print("Accuracy score: %.4f" % (accuracy))
print("Sensitivity score: %.4f" % (sensitivity))
print("Specificity score: %.4f" % (specificity))
print("F1 score: %.4f" % (f1))
print("roc_auc score: %.4f" % (roc))
print("==================================================")

In [None]:
plot_confusion_matrix(test_labels, y_pred, method_name)

In [None]:
plot_roc_curve(test_labels, y_pred, method_name)

# Feature Selection With GWO

## Define the GWO algorithm functions:

In [None]:
def initialize_population(num_wolves, num_features):
    return np.random.randint(0, 2, size=(num_wolves, num_features))

In [None]:
def fitness(wolf, train_features, train_labels, test_features, test_labels, classifier):
    selected_features = train_features[:, wolf.astype(bool)]
    clf = classifier.fit(selected_features, train_labels)
    test_selected_features = test_features[:, wolf.astype(bool)]
    accuracy = clf.score(test_selected_features, test_labels)
    return accuracy

In [None]:
def get_alpha_beta_delta_wolves(accuracy_values):
    sorted_indices = np.argsort(accuracy_values)
    alpha, beta, delta = sorted_indices[-1], sorted_indices[-2], sorted_indices[-3]
    return alpha, beta, delta

In [None]:
def update_wolves_positions(alpha, beta, delta, wolves_positions, a=2):
    r1 = np.random.rand()
    r2 = np.random.rand()
    r3 = np.random.rand()

    A1 = 2 * a * r1 - a
    C1 = 2 * r2
    A2 = 2 * a * r3 - a
    C2 = 2 * r2

    D_alpha = abs(C1 * wolves_positions[alpha, :] - wolves_positions)
    X1 = wolves_positions[alpha, :] - A1 * D_alpha

    r1 = np.random.rand()
    r2 = np.random.rand()
    r3 = np.random.rand()

    A1 = 2 * a * r1 - a
    C1 = 2 * r2
    A2 = 2 * a * r3 - a
    C2 = 2 * r2

    D_beta = abs(C2 * wolves_positions[beta, :] - wolves_positions)
    X2 = wolves_positions[beta, :] - A2 * D_beta

    r1 = np.random.rand()
    r2 = np.random.rand()
    r3 = np.random.rand()

    A1 = 2 * a * r1 - a
    C1 = 2 * r2
    A2 = 2 * a * r3 - a
    C2 = 2 * r2

    D_delta = abs(C2 * wolves_positions[delta, :] - wolves_positions)
    X3 = wolves_positions[delta, :] - A1 * D_delta

    new_positions = (X1 + X2 + X3) / 3
    return new_positions

In [None]:
def grey_wolf_optimization(train_features, train_labels, test_features, test_labels, num_wolves=30, num_iterations=100, classifier=None):
    num_wolves, num_features = num_wolves, train_features.shape[1]
    wolves_positions = initialize_population(num_wolves, num_features)

    if classifier is None:
        from sklearn.ensemble import RandomForestClassifier
        classifier = RandomForestClassifier(n_estimators=100, random_state=42)

    for iteration in range(num_iterations):
        accuracy_values = np.zeros(num_wolves)
        for i in range(num_wolves):
            accuracy_values[i] = fitness(wolves_positions[i], train_features, train_labels, test_features, test_labels, classifier)

        alpha, beta, delta = get_alpha_beta_delta_wolves(accuracy_values)
        wolves_positions = update_wolves_positions(alpha, beta, delta, wolves_positions)

    alpha, _, _ = get_alpha_beta_delta_wolves(accuracy_values)
    selected_feature_indices = np.where(wolves_positions[alpha].astype(bool))[0]
    selected_train_features = train_features[:, selected_feature_indices]
    selected_test_features = test_features[:, selected_feature_indices]

    clf = classifier.fit(selected_train_features, train_labels)
    predictions = clf.predict(selected_test_features)

    return selected_feature_indices, predictions


## Train the GWO

In [None]:
start = datetime.now()
selected_feature_indices, predictions = grey_wolf_optimization(train_features, train_labels, test_features, test_labels)
stop = datetime.now()

In [None]:
# Print the Training Time
method_name = "GWO"
GWO_training_time = stop - start
print('GWO training time is :', GWO_training_time)

In [None]:
num_total_features = train_features.shape[1]
num_selected_features = len(selected_feature_indices)
print("Total input features:", num_total_features)
print("Number of selected features:", num_selected_features)

## Plot Results

In [None]:
# Calculate scores
y_pred = predictions
accuracy = accuracy_score(test_labels, y_pred)
sensitivity = recall_score(test_labels, y_pred)
specificity = recall_score(test_labels, y_pred, pos_label=0)
f1 = f1_score(test_labels, y_pred)
roc = roc_auc_score(test_labels, y_pred)

In [None]:
# Print the results
print("scores")
print("==================================================")
print("Accuracy score: %.4f" % (accuracy))
print("Sensitivity score: %.4f" % (sensitivity))
print("Specificity score: %.4f" % (specificity))
print("F1 score: %.4f" % (f1))
print("roc_auc score: %.4f" % (roc))
print("==================================================")

In [None]:
plot_confusion_matrix(test_labels, y_pred, method_name)

In [None]:
plot_roc_curve(test_labels, y_pred, method_name)

# Feature Selection With WOA

## Define the WOA algorithm functions:

In [None]:
def initialize_population(num_whales, num_features):
    return np.random.randint(0, 2, size=(num_whales, num_features))

In [None]:
def fitness(whale, train_features, train_labels, test_features, test_labels, classifier):
    selected_features = train_features[:, whale.astype(bool)]
    clf = classifier.fit(selected_features, train_labels)
    test_selected_features = test_features[:, whale.astype(bool)]
    accuracy = clf.score(test_selected_features, test_labels)
    return accuracy

In [None]:
def update_whale_positions(whales_positions, a=2, num_iterations=100):
    num_whales, num_features = whales_positions.shape
    c = 1.0
    iterations = 0
    while iterations < num_iterations:
        a_linear = 2 - iterations * (2 / num_iterations)
        for i in range(num_whales):
            A = 2 * a * np.random.random() - a
            A_linear = 2 * np.random.random()
            distance_to_leader = abs(2 * A * whales_positions[i, :] - whales_positions[0, :])
            new_position = whales_positions[0, :] - A * distance_to_leader

            if A_linear < 1:
                whales_positions[i, :] = (new_position + A_linear * distance_to_leader) / 2
            else:
                random_whale_index = np.random.randint(0, num_whales)
                whales_positions[i, :] = whales_positions[random_whale_index, :]

        iterations += 1

    return whales_positions

In [None]:
def whale_optimization(train_features, train_labels, test_features, test_labels, num_whales=30, num_iterations=100, classifier=None):
    num_whales, num_features = num_whales, train_features.shape[1]
    whales_positions = initialize_population(num_whales, num_features)

    if classifier is None:
        from sklearn.ensemble import RandomForestClassifier
        classifier = RandomForestClassifier(n_estimators=100, random_state=42)

    for iteration in range(num_iterations):
        accuracy_values = np.zeros(num_whales)
        for i in range(num_whales):
            accuracy_values[i] = fitness(whales_positions[i], train_features, train_labels, test_features, test_labels, classifier)

        whales_positions = update_whale_positions(whales_positions)

    leader_index = np.argmax(accuracy_values)
    selected_feature_indices = np.where(whales_positions[leader_index].astype(bool))[0]
    selected_train_features = train_features[:, selected_feature_indices]
    selected_test_features = test_features[:, selected_feature_indices]

    clf = classifier.fit(selected_train_features, train_labels)
    predictions = clf.predict(selected_test_features)

    return selected_feature_indices, predictions


## Train The WOA

In [None]:
start = datetime.now()
selected_feature_indices, predictions = whale_optimization(train_features, train_labels, test_features, test_labels)
stop = datetime.now()

In [None]:
# Print the Training Time
method_name = "WOA"
WOA_training_time = stop - start
print('WOA training time is :', WOA_training_time)

In [None]:
num_total_features = train_features.shape[1]
num_selected_features = len(selected_feature_indices)
print("Total input features:", num_total_features)
print("Number of selected features:", num_selected_features)

## Plot Results

In [None]:
# Calculate scores
y_pred = predictions
accuracy = accuracy_score(test_labels, y_pred)
sensitivity = recall_score(test_labels, y_pred)
specificity = recall_score(test_labels, y_pred, pos_label=0)
f1 = f1_score(test_labels, y_pred)
roc = roc_auc_score(test_labels, y_pred)

In [None]:
# Print the results
print("scores")
print("==================================================")
print("Accuracy score: %.4f" % (accuracy))
print("Sensitivity score: %.4f" % (sensitivity))
print("Specificity score: %.4f" % (specificity))
print("F1 score: %.4f" % (f1))
print("roc_auc score: %.4f" % (roc))
print("==================================================")

In [None]:
plot_confusion_matrix(test_labels, y_pred, method_name)

In [None]:
plot_roc_curve(test_labels, y_pred, method_name)

# Feature Selection With Artificial Bee Colony (ABC)

## Define the Artificial Bee Colony (ABC) algorithm functions:

In [None]:
def initialize_population(num_bees, num_features):
    return np.random.randint(0, 2, size=(num_bees, num_features))

In [None]:
def fitness(bee, train_features, train_labels, test_features, test_labels, classifier):
    selected_features = train_features[:, bee.astype(bool)]
    clf = classifier.fit(selected_features, train_labels)
    test_selected_features = test_features[:, bee.astype(bool)]
    accuracy = clf.score(test_selected_features, test_labels)
    return accuracy

In [None]:
def scout_bees(num_bees, num_features):
    return initialize_population(num_bees, num_features)

In [None]:
def employed_bees(population, train_features, train_labels, test_features, test_labels, classifier):
    num_bees, num_features = population.shape
    fitness_values = np.zeros(num_bees)
    for i in range(num_bees):
        fitness_values[i] = fitness(population[i], train_features, train_labels, test_features, test_labels, classifier)

    best_bee_index = np.argmax(fitness_values)
    best_bee = population[best_bee_index]
    return best_bee, best_bee_index

In [None]:
def onlooker_bees(population, employed_bee, employed_bee_index, train_features, train_labels, test_features, test_labels, classifier):
    num_bees, num_features = population.shape
    fitness_values = np.zeros(num_bees)

    for i in range(num_bees):
        if i != employed_bee_index:
            fitness_values[i] = fitness(population[i], train_features, train_labels, test_features, test_labels, classifier)

    if np.all(fitness_values >= 0):
        # Normalize fitness values to probabilities
        probabilities = fitness_values / np.sum(fitness_values)
    else:
        # Handle the case where there are negative fitness values
        # Assign equal probabilities to all bees
        probabilities = np.ones(num_bees) / num_bees
    selected_bee_index = np.random.choice(np.arange(num_bees), p=probabilities)
    selected_bee = population[selected_bee_index]

    return selected_bee, selected_bee_index


In [None]:
def update_employed_bees(employed_bee, onlooker_bee, train_features, train_labels, test_features, test_labels, classifier):
    num_features = employed_bee.shape[0]
    selected_features = np.copy(employed_bee)
    changed_indices = np.random.choice(np.arange(num_features), size=2, replace=False)
    selected_features[changed_indices] = onlooker_bee[changed_indices]

    employed_fitness = fitness(employed_bee, train_features, train_labels, test_features, test_labels, classifier)
    onlooker_fitness = fitness(onlooker_bee, train_features, train_labels, test_features, test_labels, classifier)

    if onlooker_fitness > employed_fitness:
        return selected_features
    else:
        return employed_bee

In [None]:
def scout_bee(population, employed_bee_index, train_features, train_labels, test_features, test_labels, classifier):
    num_features = population.shape[1]
    scout_index = np.random.randint(0, num_features)
    population[employed_bee_index, scout_index] = 1 - population[employed_bee_index, scout_index]
    return population

In [None]:
def abc_optimization(train_features, train_labels, test_features, test_labels, num_bees=30, num_iterations=100, classifier=None):
    num_bees, num_features = num_bees, train_features.shape[1]
    population = initialize_population(num_bees, num_features)

    if classifier is None:
        from sklearn.ensemble import RandomForestClassifier
        classifier = RandomForestClassifier(n_estimators=100, random_state=42)

    for iteration in range(num_iterations):
        employed_bee, employed_bee_index = employed_bees(population, train_features, train_labels, test_features, test_labels, classifier)
        onlooker_bee, _ = onlooker_bees(population, employed_bee, employed_bee_index, train_features, train_labels, test_features, test_labels, classifier)
        updated_employed_bee = update_employed_bees(employed_bee, onlooker_bee, train_features, train_labels, test_features, test_labels, classifier)
        population[employed_bee_index] = updated_employed_bee
        population = scout_bee(population, employed_bee_index, train_features, train_labels, test_features, test_labels, classifier)

    best_bee, _ = employed_bees(population, train_features, train_labels, test_features, test_labels, classifier)
    selected_feature_indices = np.where(best_bee.astype(bool))[0]
    selected_train_features = train_features[:, selected_feature_indices]
    selected_test_features = test_features[:, selected_feature_indices]

    clf = classifier.fit(selected_train_features, train_labels)
    predictions = clf.predict(selected_test_features)

    return selected_feature_indices, predictions

## Train the ABC

In [None]:
start = datetime.now()
selected_feature_indices, predictions = abc_optimization(train_features, train_labels, test_features, test_labels)
stop = datetime.now()

In [None]:
# Print the Training Time
method_name = "ABC"
ABC_training_time = stop - start
print('ABC training time is :', ABC_training_time)

In [None]:
num_total_features = train_features.shape[1]
num_selected_features = len(selected_feature_indices)
print("Total input features:", num_total_features)
print("Number of selected features:", num_selected_features)

## Plot Results

In [None]:
# Calculate scores
y_pred = predictions
accuracy = accuracy_score(test_labels, y_pred)
sensitivity = recall_score(test_labels, y_pred)
specificity = recall_score(test_labels, y_pred, pos_label=0)
f1 = f1_score(test_labels, y_pred)
roc = roc_auc_score(test_labels, y_pred)

In [None]:
# Print the results
print("scores")
print("==================================================")
print("Accuracy score: %.4f" % (accuracy))
print("Sensitivity score: %.4f" % (sensitivity))
print("Specificity score: %.4f" % (specificity))
print("F1 score: %.4f" % (f1))
print("roc_auc score: %.4f" % (roc))
print("==================================================")

In [None]:
plot_confusion_matrix(test_labels, y_pred, method_name)

In [None]:
plot_roc_curve(test_labels, y_pred,method_name)

# Feature Selection With Butterfly Optimization Algorithm (BOA)

## Define the Butterfly Optimization Algorithm (BOA) functions:

In [None]:
def initialize_population(num_butterflies, num_features):
    return np.random.randint(0, 2, size=(num_butterflies, num_features))

In [None]:
def fitness(butterfly, train_features, train_labels, test_features, test_labels, classifier):
    selected_features = train_features[:, butterfly.astype(bool)]
    clf = classifier.fit(selected_features, train_labels)
    test_selected_features = test_features[:, butterfly.astype(bool)]
    accuracy = clf.score(test_selected_features, test_labels)
    return accuracy

In [None]:
def butterfly_movement(butterfly, best_butterfly, step_size=0.1):
    delta = step_size * (butterfly - best_butterfly)
    return butterfly + delta

In [None]:
def update_population(population, fitness_values, best_butterfly_index, step_size=0.1):
    num_butterflies, num_features = population.shape
    best_butterfly = population[best_butterfly_index]

    for i in range(num_butterflies):
        if i != best_butterfly_index:
            population[i] = butterfly_movement(population[i], best_butterfly, step_size)

    return population

In [None]:
def boa_optimization(train_features, train_labels, test_features, test_labels, num_butterflies=30, num_iterations=100, step_size=0.1, classifier=None):
    num_butterflies, num_features = num_butterflies, train_features.shape[1]
    population = initialize_population(num_butterflies, num_features)

    if classifier is None:
        from sklearn.ensemble import RandomForestClassifier
        classifier = RandomForestClassifier(n_estimators=100, random_state=42)

    best_accuracy = 0.0
    best_butterfly_index = 0

    for iteration in range(num_iterations):
        accuracy_values = np.zeros(num_butterflies)
        for i in range(num_butterflies):
            accuracy_values[i] = fitness(population[i], train_features, train_labels, test_features, test_labels, classifier)
            if accuracy_values[i] > best_accuracy:
                best_accuracy = accuracy_values[i]
                best_butterfly_index = i

        population = update_population(population, accuracy_values, best_butterfly_index, step_size)

    best_butterfly = population[best_butterfly_index]
    selected_feature_indices = np.where(best_butterfly.astype(bool))[0]
    selected_train_features = train_features[:, selected_feature_indices]
    selected_test_features = test_features[:, selected_feature_indices]

    clf = classifier.fit(selected_train_features, train_labels)
    predictions = clf.predict(selected_test_features)

    return selected_feature_indices, predictions

## Train the BOA

In [None]:
start = datetime.now()
selected_feature_indices, predictions = boa_optimization(train_features, train_labels, test_features, test_labels)
stop = datetime.now()

In [None]:
# Print the Training Time
method_name = "BOA"
BOA_training_time = stop - start
print('BOA training time is :', BOA_training_time)

In [None]:
num_total_features = train_features.shape[1]
num_selected_features = len(selected_feature_indices)
print("Total input features:", num_total_features)
print("Number of selected features:", num_selected_features)

## Plot Results

In [None]:
# Calculate scores
y_pred = predictions
accuracy = accuracy_score(test_labels, y_pred)
sensitivity = recall_score(test_labels, y_pred)
specificity = recall_score(test_labels, y_pred, pos_label=0)
f1 = f1_score(test_labels, y_pred)
roc = roc_auc_score(test_labels, y_pred)

In [None]:
# Print the results
print("scores")
print("==================================================")
print("Accuracy score: %.4f" % (accuracy))
print("Sensitivity score: %.4f" % (sensitivity))
print("Specificity score: %.4f" % (specificity))
print("F1 score: %.4f" % (f1))
print("roc_auc score: %.4f" % (roc))
print("==================================================")

In [None]:
plot_confusion_matrix(test_labels, y_pred, method_name)

In [None]:
plot_roc_curve(test_labels, y_pred, method_name)

# Feature Selection With RFE

## Train RFE

In [None]:
from sklearn.feature_selection import RFE

In [None]:
# Define the number of features to select
num_features_to_select = 10  # Adjust this based on your preference

# Initialize the RFE selector
rfe = RFE(estimator=clf, n_features_to_select=num_features_to_select)

# Fit the RFE selector on your data
rfe.fit(train_features, train_labels)

# Get the selected features
selected_features = train_features[:, rfe.support_]


In [None]:
selected_feature_indices = np.where(rfe.support_)[0]

## Evaluate on Test Data

In [None]:
selected_train_features = train_features[:, selected_feature_indices]
selected_test_features = test_features[:, selected_feature_indices]

In [None]:
# Check the shape of the feature arrays
print("Shape of train_features:", train_features.shape)
print("Shape of train labels:", train_labels.shape)

In [None]:
# Check the shape of the feature arrays
print("Shape of train_features:", selected_train_features.shape)
print("Shape of test_features:", selected_test_features.shape)

In [None]:
clf.fit(selected_train_features, train_labels)

predictions = clf.predict(selected_test_features)

## Plot Results

In [None]:
# Calculate scores
y_pred = predictions
accuracy = accuracy_score(test_labels, y_pred)
sensitivity = recall_score(test_labels, y_pred)
specificity = recall_score(test_labels, y_pred, pos_label=0)
f1 = f1_score(test_labels, y_pred)
roc = roc_auc_score(test_labels, y_pred)

In [None]:
# Print the results
print("scores")
print("==================================================")
print("Accuracy score: %.4f" % (accuracy))
print("Sensitivity score: %.4f" % (sensitivity))
print("Specificity score: %.4f" % (specificity))
print("F1 score: %.4f" % (f1))
print("roc_auc score: %.4f" % (roc))
print("==================================================")

In [None]:
plot_confusion_matrix(test_labels, y_pred, "RFE")

In [None]:
plot_roc_curve(test_labels, y_pred, "RFE")