In [18]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
import numpy as np
import pandas as pd
import pyswarms as ps
import time

# swarm parameters
#partical>> k>> c1+c2>> w 
'''
C1: from 0.0 to 4.1 in steps of 0.50
C2: 4.1- C1
W: from 0.10 to 1 in steps of 0.2.
Neighborhood Size: from 2 to all in steps of 2.
'''

options = {'c1': 0, 'c2': 4.1, 'w':0.1,'k':99,'p':2}
num_particles_for_pso = 100   #to be change     #10-200 >>20+
num_iters_for_pso = 100


start = time.time()
df = pd.read_csv('/Users/afrah/Desktop/cs789_cs769_project/data4test/vectors_dataset.csv') ## Loading the dataset
x = df.drop('label',axis =1)
y = df['label']
print("Complete dataset loaded! Time taken:", round(time.time() - start,2), "seconds.")

#devide dataset into train & test
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.20,random_state =42)
X = X_train.values
y = y_train.values


num_dims = X.shape[1]
particleScore = list()
particleSize = list()

from sklearn import linear_model

# Create an instance of the classifier

from sklearn.svm import SVC # for Support Vector Classification model
classifier = SVC(kernel='rbf', probability=True)

# Define objective function
def f_per_particle(m, alpha):
    """Computes for the objective function per particle

    Inputs
    ------
    m : numpy.ndarray
        Binary mask that can be obtained from BinaryPSO, will
        be used to mask features.
    alpha: float (default is 0.5)
        Constant weight for trading-off classifier performance
        and number of features

    Returns
    -------
    numpy.ndarray
        Computed objective function
    """
    total_features = num_dims

    # Get the subset of the features from the binary mask
    if np.count_nonzero(m) == 0:
        X_subset = X
    else:
        X_subset = X[:, m == 1]
    scores = cross_val_score(classifier, X_subset, y, cv=3)
    #print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
    
    P = scores.mean()
    particleScore.append(P)
    particleSize.append(X_subset.shape[1])
    # Compute for the objective function
    j = (alpha * (1.0 - P)
        + (1.0 - alpha) * (1 - (X_subset.shape[1] / total_features)))
    
    #j = (alpha * (1.0 - P)) + (1 - alpha) * (1 - (total_features - X_subset.shape[1]) / total_features)
    #print("Particle j: ", j)
    return j



def f(x, alpha=0.88):
    """Higher-level method to do classification in the
    whole swarm.

    Inputs
    ------
    x: numpy.ndarray of shape (n_particles, dimensions)
        The swarm that will perform the search

    Returns
    -------
    numpy.ndarray of shape (n_particles, )
        The computed loss for each particle
    """
    n_particles = x.shape[0]
    j = [f_per_particle(x[i], alpha) for i in range(n_particles)]
    return np.array(j)


# Call instance of PSO
dimensions = num_dims # dimensions should be the number of features
# optimizer.reset()
optimizer = ps.discrete.BinaryPSO(n_particles=num_particles_for_pso, dimensions=dimensions, options=options)

# Perform optimization
cost, pos = optimizer.optimize(f, iters=num_iters_for_pso, verbose=2)



# classifier =  svm.SVC(kernel = 'linear') # Linear Kernel
# Get the selected features from the final positions
X_selected_features = X[:,pos==1]  # subset
X_selected_test = X_test.values[:,pos==1]
start = time.time()

classifier = SVC(kernel='rbf', probability=True)
classifier.fit(X_selected_features, y)
subset_performance_train = (classifier.predict(X_selected_features) == y).mean()
print('Subset performance on train with feature selection: %.3f percent' % (100*subset_performance_train))
subset_performance_test = (classifier.predict(X_selected_test) == y_test.values).mean()
print('Subset performance on test with feature selection(accuracy): %.3f percent' % (100*subset_performance_test))

#find selected features
selectd_pos = np.array(pos)
Num_features=np.count_nonzero(y == 1)
print("Number of Features Selected : ", Num_features)
print("Complete  process ! Time taken:", round(time.time() - start,2), "seconds.")

#calculate metrics
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

y_pred = classifier.predict(X_selected_test)
f1 = f1_score(y_test, y_pred, average="macro")
precision = precision_score(y_test, y_pred, average="macro")
recall = recall_score(y_test, y_pred, average="macro")
print('F1:', (100*f1))
print('precision:',(100*precision))
print('recall:',(100*recall))

2021-12-08 05:12:50,397 - pyswarms.discrete.binary - INFO - Optimize for 100 iters with {'c1': 0, 'c2': 4.1, 'w': 0.1, 'k': 99, 'p': 2}
pyswarms.discrete.binary:   0%|          |0/100

Complete dataset loaded! Time taken: 0.14 seconds.


pyswarms.discrete.binary: 100%|██████████|100/100, best_cost=0.382
2021-12-08 14:07:01,374 - pyswarms.discrete.binary - INFO - Optimization finished | best cost: 0.3821125138427464, best pos: [1 1 1 1 1 0 1 1 1 0 1 0 0 1 1 1 0 1 1 0 1 1 0 1 1 1 1 0 1 1 1 0 1 0 1 1 1
 1 0 0 1 0 1 0 0 0 0 0 0 0 1 1 0 1 1 1 1 1 1 1 0 0 1 1 0 1 1 1 0 1 1 0 0 1
 1 0 1 0 1 0 1 1 0 1 1 0 0 0 1 1 1 0 0 1 1 0 1 0 1 0 0 1 0 0 1 1 1 1 1 0 1
 0 0 0 1 0 1 0 1 1 0 1 0 0 1 1 1 0 1 1 1 1 0 0 1 1 1 0 0 1 0 1 1 1 1 1 1 1
 1 1 0 1 1 1 1 0 1 0 1 1 0 0 1 0 1 0 1 1 0 1 1 0 1 1 1 1 0 1 1 1 1 1 1 0 0
 0 1 0 1 0 0 1 1 0 0 1 1 0 1 0 0 1 1 1 1 1 0 1 1 0 1 0 1 0 1 1 1 1 1 1 1 0
 1 1 1 1 0 1 1 1 1 0 0 0 1 0 1 1 1 1 1 0 0 0 1 0 0 0 1 0 1 1 0 1 1 1 1 0 1
 1 1 1 1 0 0 0 0 1 1 1 1 0 0 1 0 1 0 0 0 1 1 0 1 1 0 0 0 1 1 1 1 0 1 1 0 0
 1 1 1 0 0]


Subset performance on train with feature selection: 68.833 percent
Subset performance on test with feature selection(accuracy): 66.333 percent
Number of Features Selected :  395
Complete  process ! Time taken: 1.0 seconds.
F1: 66.31899747967185
precision: 66.29406850459482
recall: 66.4570344266739


In [19]:
from sklearn.metrics import accuracy_score, classification_report
print(classification_report(classifier.predict(X_selected_test),y_test))

              precision    recall  f1-score   support

         0.0       0.71      0.66      0.68       100
         1.0       0.68      0.68      0.68       105
         2.0       0.61      0.65      0.63        95

    accuracy                           0.66       300
   macro avg       0.66      0.66      0.66       300
weighted avg       0.67      0.66      0.66       300



In [20]:
pos

array([1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1,
       0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1,
       1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0,
       1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0,
       1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1,
       0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
       1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0,
       1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
       0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0,
       0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0]

In [29]:
selectd_pos = np.array(pos)
Num_features=selectd_pos[(selectd_pos > 0)]
len(Num_features)

185