# Feature Selection Method Based on Grey Wolf Optimization for Coronary Artery Disease Classification

In [20]:
import random
import time
import math
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
import benchmarks

data = pd.read_csv("cleavland.csv")
data.head()

Unnamed: 0,AGE,SEX,CP,TRESTBPS,CHOL,FBD,REST ECG,THALACH,EXANG,OLDPEAK,SLOPE,CA,THAL,RESULT
0,63,1,1,145,233,1,2,150,0,2.3,3,0,6,0
1,67,1,4,160,286,0,2,108,1,1.5,2,3,3,2
2,67,1,4,120,229,0,2,129,1,2.6,2,2,7,1
3,37,1,3,130,250,0,0,187,0,3.5,3,0,3,0
4,41,0,2,130,204,0,2,172,0,1.4,1,0,3,0


### Data preprocessing, extraction and splitting

In [21]:
X = data.iloc[:,:13].values
Y = data['RESULT'].values
print(X.shape)
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.40, random_state = 14)
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

(303, 13)


### Applying SVM classification without data processing

In [22]:
svcclassifier = SVC(kernel = 'poly', random_state = 50)
svcclassifier.fit(X_train, y_train)
y_pred = svcclassifier.predict(X_test)
P = accuracy_score(y_pred,y_test)
print("Accuracy score for SVM:",P)

Accuracy score for SVM: 0.5983606557377049


### GWO Optimization algo

In [23]:
def GWO(objf,lb,ub,dim,SearchAgents_no,Max_iter):
    
    # initialize alpha, beta, and delta_pos
    Alpha_pos=np.zeros(dim)
    Alpha_score=float("inf")
    
    Beta_pos=np.zeros(dim)
    Beta_score=float("inf")
    
    Delta_pos=np.zeros(dim)
    Delta_score=float("inf")

    if not isinstance(lb, list):
        lb = [lb] * dim
    if not isinstance(ub, list):
        ub = [ub] * dim
    
    #Initialize the positions of search agents
    Positions = np.zeros((SearchAgents_no, dim))
    for i in range(dim):
        Positions[:, i] = np.random.uniform(0,1, SearchAgents_no) * (ub[i] - lb[i]) + lb[i]
    
    Convergence_curve=np.zeros(Max_iter)

     # Loop counter
    print("GWO is optimizing  \""+objf.__name__+"\"")    

    # Main loop
    for l in range(0,Max_iter):
        for i in range(0,SearchAgents_no):
            
            # Return back the search agents that go beyond the boundaries of the search space
            for j in range(dim):
                Positions[i,j]=np.clip(Positions[i,j], lb[j], ub[j])

            # Calculate objective function for each search agent
            fitness=objf(Positions[i,:])
            
            # Update Alpha, Beta, and Delta
            if fitness<Alpha_score :
                Alpha_score=fitness; # Update alpha
                Alpha_pos=Positions[i,:].copy()
            
            
            if (fitness>Alpha_score and fitness<Beta_score ):
                Beta_score=fitness  # Update beta
                Beta_pos=Positions[i,:].copy()
            
            
            if (fitness>Alpha_score and fitness>Beta_score and fitness<Delta_score): 
                Delta_score=fitness # Update delta
                Delta_pos=Positions[i,:].copy()
        
        a=2-l*((2)/Max_iter); # a decreases linearly fron 2 to 0
        
        # Update the Position of search agents including omegas
        for i in range(0,SearchAgents_no):
            for j in range (0,dim):     
                           
                r1=random.random() # r1 is a random number in [0,1]
                r2=random.random() # r2 is a random number in [0,1]
                
                A1=2*a*r1-a; # Equation (3.3)
                C1=2*r2; # Equation (3.4)
                
                D_alpha=abs(C1*Alpha_pos[j]-Positions[i,j]); # Equation (3.5)-part 1
                X1=Alpha_pos[j]-A1*D_alpha; # Equation (3.6)-part 1
                           
                r1=random.random()
                r2=random.random()
                
                A2=2*a*r1-a; # Equation (3.3)
                C2=2*r2; # Equation (3.4)
                
                D_beta=abs(C2*Beta_pos[j]-Positions[i,j]); # Equation (3.5)-part 2
                X2=Beta_pos[j]-A2*D_beta; # Equation (3.6)-part 2       
                
                r1=random.random()
                r2=random.random() 
                
                A3=2*a*r1-a; # Equation (3.3)
                C3=2*r2; # Equation (3.4)
                
                D_delta=abs(C3*Delta_pos[j]-Positions[i,j]); # Equation (3.5)-part 3
                X3=Delta_pos[j]-A3*D_delta; # Equation (3.5)-part 3             
                
                Positions[i,j]=(X1+X2+X3)/3  # Equation (3.7)        
        Convergence_curve[l]=Alpha_score;

        #if (l%1==0):
               #print(['At iteration '+ str(l)+ ' the best fitness is '+ str(Alpha_score)]);
    
    print(Positions.shape)
    print("Alpha position=",Alpha_pos);
    print("Beta position=",Beta_pos);
    print("Delta position=",Delta_pos);
    return Alpha_pos,Beta_pos;


### Setting GWO parameters

In [24]:
iters=100
wolves=5
dimension=13
search_domain=[0,1]
lb=-1.28
ub=1.28
colneeded=[0,1,2,4,5,7,8,10,11]
modified_data=pd.DataFrame()
for i in colneeded:
    modified_data[data.columns[i]]=data[data.columns[i]].astype(float)
func_details=benchmarks.getFunctionDetails(6)

for i in range(0,10):
    alpha,beta=GWO(getattr(benchmarks,'F7'),lb,ub,dimension,wolves,iters)

GWO is optimizing  "F7"
(5, 13)
Alpha position= [-0.11477551 -0.13259127  0.05616011 -0.07175298 -0.08212554 -0.00742754
 -0.19122398  0.03872122  0.0260453  -0.18744158 -0.0614064  -0.08111803
 -0.12644705]
Beta position= [-0.10722581 -0.14229754  0.05679391 -0.07212266 -0.08342723 -0.00747798
 -0.19386887  0.03825517  0.02584449 -0.19511588 -0.05409783 -0.08414043
 -0.13221456]
Delta position= [-0.10227783 -0.14955417  0.05739265 -0.0696549  -0.08459355 -0.00759125
 -0.19291296  0.03762383  0.02638019 -0.2001831  -0.05000957 -0.08524741
 -0.13250809]
GWO is optimizing  "F7"
(5, 13)
Alpha position= [ 0.11865144 -0.07613252 -0.03685896 -0.08868257  0.01645572 -0.00252833
 -0.06275602  0.01182328  0.03177475 -0.16170545  0.06877218 -0.01795701
 -0.03841731]
Beta position= [ 0.12646898 -0.05995521 -0.02515772 -0.11082223  0.01984816 -0.00216254
 -0.05409367  0.00952231  0.04909286 -0.13837717  0.08268632 -0.02725589
 -0.02664472]
Delta position= [ 0.12145188 -0.06171244 -0.03131733 -0.09

In [25]:
##Applying feature selection on the given dataset
##considering alpha as best solution and putting a threshold
threshold=-0.05
index=[]
print("alpha shape=",alpha.shape[0])
modified_daata=pd.DataFrame();
for i in range(0,alpha.shape[0]):
    if(alpha[i]>=threshold):
        modified_daata[data.columns[i]]=data[data.columns[i]].astype(float)
print("The modified data is following")
modified_daata.head()

alpha shape= 13
The modified data is following


Unnamed: 0,AGE,SEX,CP,TRESTBPS,EXANG,SLOPE,THAL
0,63.0,1.0,1.0,145.0,0.0,3.0,6.0
1,67.0,1.0,4.0,160.0,1.0,2.0,3.0
2,67.0,1.0,4.0,120.0,1.0,2.0,7.0
3,37.0,1.0,3.0,130.0,0.0,3.0,3.0
4,41.0,0.0,2.0,130.0,0.0,1.0,3.0


### Applying svm on the modified data

In [26]:
Y = data['RESULT'].values
X_train, X_test, y_train, y_test = train_test_split(modified_daata, Y, test_size = 0.40, random_state = 14)
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
print(X_train[0])
modified_daata.info()

[-0.37086712  0.6520712  -0.1543891  -1.21573705 -0.76662853 -0.99376866
 -0.95367089]
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 7 columns):
AGE         303 non-null float64
SEX         303 non-null float64
CP          303 non-null float64
TRESTBPS    303 non-null float64
EXANG       303 non-null float64
SLOPE       303 non-null float64
THAL        303 non-null float64
dtypes: float64(7)
memory usage: 16.7 KB


In [37]:
svcclassifier = SVC(kernel = 'rbf', random_state = 50)
svcclassifier.fit(X_train, y_train)
y_pred = svcclassifier.predict(X_test)
P1=accuracy_score(y_pred,y_test)
print("Modified Accuracy score for SVM:", P1)
print("Earlier Accuracy score was", P)

Modified Accuracy score for SVM: 0.6147540983606558
Earlier Accuracy score was 0.5983606557377049
