Reference Code Link : https://niapy.org/en/stable/tutorials/feature_selection.html

# Import Libraries

In [2]:
# !pip install niapy
# !pip install scikit-learn
# !pip install pandas
# !pip install numpy

import pandas as pd
import numpy as np

# Load Dataset

In [3]:
df = pd.read_csv("./Dataset.csv")

In [4]:
df.drop(columns=['Unnamed: 0'], inplace=True)

In [5]:
df.head()

In [6]:
#Reading Target Dataset
target = pd.read_csv("./PSO_label.csv")
target.head()

In [7]:
target.drop(columns=['Unnamed: 0'],inplace=True)
target.head()

In [8]:
X = df # Feature Values
y = target.Class # Target Values
feature_names = df.columns # Column Names Will be feature

# Applying PSO

In [9]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import SVC

from niapy.problems import Problem
from niapy.task import Task
from niapy.algorithms.basic import ParticleSwarmOptimization

#Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

class SVMFeatureSelection(Problem):
    def __init__(self, X_train, y_train, alpha=0.99):
        super().__init__(dimension=X_train.shape[1], lower=0, upper=1)
        self.X_train = X_train
        self.y_train = y_train
        self.alpha = alpha

    def _evaluate(self, x):
        selected_indices = np.where(x > 0.99)[0]
        num_selected = len(selected_indices)
        
        if num_selected == 0:
            return 1.0

        # Convert selected indices to integers
        selected_indices = selected_indices.astype(int)

        # Use the indices for indexing
        X_train_selected = self.X_train.iloc[:, selected_indices]

        # Cross Validation Scoring
        accuracy = cross_val_score(SVC(), X_train_selected, self.y_train, cv=2, n_jobs=-1).mean()
        score = 1 - accuracy
        num_features = self.X_train.shape[1]
        return self.alpha * score + (1 - self.alpha) * (num_selected / num_features)


In [10]:
#Our issue
problem = SVMFeatureSelection(X_train, y_train)

#Task
task = Task(problem, max_iters=100)

# Algorithm to solve the task
algorithm = ParticleSwarmOptimization(population_size=15, seed=42)

# Best Useful Features
best_features, best_fitness = algorithm.run(task)

# Selected Features for PSO 
selected_features = (best_features > 0.99)
print('Number of selected features:', selected_features.sum())
print('Selected features:', ', '.join(feature_names[selected_features].tolist()))

# Models
model_selected = SVC()
model_all = SVC()

# Training on Model_1 using selected features
X_train_selected = X_train[:, selected_features]
X_test_selected = X_test[:, selected_features]

model_1 = model_selected.fit(X_train_selected, y_train)
subset_accuracy = model_1.score(X_test_selected, y_test)
print('Subset accuracy:', subset_accuracy)


# Training on Model_2 using all features
model_2 = model_all.fit(X_train, y_train)
all_features_accuracy = model_2.score(X_test, y_test)
print('All Features Accuracy:', all_features_accuracy)


# Confusion Matrix For All Features

In [11]:
y_predicted_2 = model_2.predict(X_test)

from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score, f1_score

# calculate precision, recall, and f1 score
precision = precision_score(y_test, y_predicted_2, average='micro')
recall = recall_score(y_test, y_predicted_2, average='micro')
f1 = f1_score(y_test, y_predicted_2, average='micro')
cm = confusion_matrix(y_test, y_predicted_2)
print("Confusion Matrix:\n", cm)

scores = []
scores.append({
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1
    })

df = pd.DataFrame(scores, columns=['Precision','Recall','F1 Score'])
df

In [12]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sn
plt.figure(figsize=(10,7))
sn.heatmap(cm, annot=True)
plt.xlabel('Predicted')
plt.ylabel('Truth')