In [6]:
import pandas as pd 

from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import SVC

from niapy.problems import Problem
from niapy.task import Task
from niapy.algorithms.basic import ParticleSwarmOptimization


In [7]:
df= pd.read_csv("chronic-kidney_clean.csv")
df.head()

Unnamed: 0,age,bp,sg,al,su,rbc,pc,pcc,ba,bgr,...,pcv,wc,rc,htn,dm,cad,appet,pe,ane,classification
0,48,80,1.02,1,0,0,0,0,0,121,...,44,7800,5.2,1,1,0,0,0,0,ckd
1,7,50,1.02,4,0,0,0,0,0,121,...,38,6000,4.8,0,0,0,0,0,0,ckd
2,62,80,1.01,2,3,0,0,0,0,423,...,31,7500,4.8,0,1,0,1,0,1,ckd
3,48,70,1.005,4,0,0,1,1,0,117,...,32,6700,3.9,1,0,0,1,1,1,ckd
4,51,80,1.01,2,0,0,0,0,0,106,...,35,7300,4.6,0,0,0,0,0,0,ckd


In [8]:
len(df.columns)

25

In [9]:
X=df.drop(['classification'],axis = 1 )
y=df['classification']
feature_names=X.columns

In [10]:
len(X.columns)/2

12.0

In [11]:
class SVMFeatureSelection(Problem):
    def __init__(self, X_train, y_train, alpha=0.99):
        super().__init__(dimension=X_train.shape[1], lower=0, upper=1)
        self.X_train = X_train
        self.y_train = y_train
        self.alpha = alpha

    def _evaluate(self, x):
        selected = x > 0.1
        num_selected = selected.sum()
        if num_selected == 0:
            return 1.0
        accuracy = cross_val_score(SVC(), self.X_train[:, selected], self.y_train, cv=2, n_jobs=-1).mean()
        score = 1 - accuracy
        num_features = self.X_train.shape[1]
        return self.alpha * score + (1 - self.alpha) * (num_selected / num_features)

In [None]:

problem = SVMFeatureSelection(X.values, y)
task = Task(problem, max_iters=20)
algorithm = ParticleSwarmOptimization(population_size=20, seed=1234)
best_features, best_fitness = algorithm.run(task)

selected_features = best_features > 0.1


In [None]:
print('Number of selected features:', selected_features.sum())
print('Selected features:', ', '.join(X.columns[selected_features].tolist()))

In [None]:
X.columns[selected_features]