In [2]:
import scipy.io
import numpy as np
from sklearn.naive_bayes import GaussianNB

In [3]:
from sklearn.model_selection import KFold, train_test_split

In [4]:
mat = scipy.io.loadmat('USPS.mat')

In [5]:
X = np.array(mat['X'])
Y = np.array(mat['Y']).reshape(-1,)

In [255]:
class GAFS():
    def __init__(self,pop_size=138,iteration=80,e_size=6,t_size =4,c_p=0.93,mp = 0.02 ):
        """
        Constructor for GAFS
        Default paramenters are those given in paper for bitwise operator
        
        :param pop_size: Population Size
        :param iteration: No. of iteration
        :param e_size: no of elities to keep during each iteration
        :param t_size: no of individual to play tournament
        :param c_p: cross_over Probability
        :param m_p: Mutation Probability
        
        :return: Nothing
        """
        
        
        self.pop_size = pop_size
        self.iterations = iteration
        self.elite_size = e_size
        self.tournament_size = t_size
        self.cross_prob = c_p
        self.mut_prob = mp
        
        
        
        
    def init_population(self):
        """
        To generator initial Population of size pop_size
        
        :return: Random Populationo of size (pop_size,no of feature in dataset)
        """
        return np.random.randint(0,2,(self.pop_size,self.X.shape[1]))
        
    def crossover(self,parent1,parent2):
        """
        Crossover to generate two new child .

        :param parent1: First Parent
        :param parent2: Second Parent
        :return: returns two new child
        """
        c1 = np.zeros(len(parent1),dtype= int)
        c2 = np.zeros(len(parent1),dtype= int)
        for i in range(len(parent1)):
            if(np.random.rand()<self.cross_prob):
                c1[i],c2[i] = parent2[i],parent1[i]
            else:
                c1[i],c2[i] = parent1[i],parent2[i]
                
        return c1,c2
    
    def bitWiseMutation(self,individual):
        """
        bitWiseMutation operator to mutate
        a individual with mut probability
        
        :param individual: an individual for mutation
        
        :return: mutated individual"""
        
        for i in range(0,len(individual)):
            if(np.random.rand() < self.mut_prob):
                if individual[i] == 0:
                    individual[i] = 1 
                else:
                    individual[i] = 0
        return individual
    
    def playTournament(self,population):
        """
        
        """
        accuracy = {}
        nFold = 5
        gnb = GaussianNB()
        i = 0
        kf = KFold(n_splits=nFold,shuffle=True)
        for individual in population:
            mask = np.array(individual,dtype = bool)
            _X = self.X[:,mask] #extract data with selected feature
            avg_acc = 0
            for train_index, test_index in kf.split(_X):
                predict = gnb.fit(_X[train_index],self.Y[train_index]).predict(_X[test_index])
                acc = ((self.Y[test_index] == predict).sum()/test_index.shape[0])*100
                avg_acc += acc
            accuracy[i] = avg_acc/nFold
            i += 1
        return accuracy
            
    def selection(self,population):
        sel_idx  = np.random.choice(population.shape[0],self.tournament_size,replace = False)
        sel_parent = np.array(population[sel_idx])
        accuracy  = self.playTournament(sel_parent)
        sorted_accuracy = sorted(accuracy.items() , key = lambda item: item[1], reverse = True)
        nC1,nC2 = self.crossover(sel_parent[sorted_accuracy[0][0]],sel_parent[sorted_accuracy[1][0]])
        return nC1,nC2
    def updateElities(self,new_elities):
        #append elities
        old_elities = list(self.elities)
        for elite in new_elities:
            old_elities.append(elite)
        accuracy = {}
        gnb  = GaussianNB()
        i = 0
        for individual in old_elities:
            mask = np.array(individual, dtype = bool)
            _X = self.X[:,mask]
            X_train, X_test, Y_train, Y_test = train_test_split(_X,self.Y,test_size = 0.2)
            predict = gnb.fit(X_train,Y_train).predict(X_test)
            accuracy[i] = ((Y_test == predict).sum()/Y_test.shape[0])*100 
            i +=1
        sorted_acc = sorted(accuracy.items(),key = lambda item:item[1],reverse = True)
        elite_idx = np.asarray(sorted_acc,dtype = int)[0:self.elite_size,0]
        elities = []
        for elite in elite_idx:
            elities.append(old_elities[elite])
        self.elities = np.asarray(elities)
        return
        
    def findElities(self,population):
        accuracy = {}
        gnb  = GaussianNB()
        i = 0
        for individual in population:
            mask = np.array(individual, dtype = bool)
            _X = self.X[:,mask]
            X_train, X_test, Y_train, Y_test = train_test_split(_X,self.Y,test_size = 0.2)
            predict = gnb.fit(X_train,Y_train).predict(X_test)
            accuracy[i] = ((Y_test == predict).sum()/Y_test.shape[0])*100 
            i +=1
        sorted_acc = sorted(accuracy.items(),key = lambda item:item[1],reverse = True)
        elite_idx = np.asarray(sorted_acc,dtype = int)[0:self.elite_size,0]
        elities = []
        for elite in elite_idx:
            elities.append(population[elite])
        self.updateElities(elities)    
        return 
    
    def run(self,X,Y):
        self.X = X
        self.Y = Y
        self.elities = np.empty(0)
        population = self.init_population()
        for itr in range(self.iterations):
            print("ITERATION:{}".format(itr))
            new_pop = []
            for i in range(int(self.pop_size/2)):
                nC1,nC2 = self.selection(population)
                new_pop.append(nC1)
                new_pop.append(nC2)
                print(i)
            population = np.asarray(new_pop)
            mut_pop = []
            for indivd in population:
                mut_pop.append(self.bitWiseMutation(indivd))
            population = np.asarray(mut_pop)
            self.findElities(population)
        return self.elities
        

In [257]:
ga = GAFS(pop_size=10)
ga.run(X,Y)

ITERATION:0
0
1
2
3
4
ITERATION:1
0
1
2
3
4
ITERATION:2
0
1
2
3
4
ITERATION:3
0
1
2
3
4
ITERATION:4
0
1
2
3
4
ITERATION:5
0
1
2
3
4
ITERATION:6
0
1
2
3
4
ITERATION:7
0
1
2
3
4
ITERATION:8
0
1
2
3
4
ITERATION:9
0
1
2
3
4
ITERATION:10
0
1
2
3
4
ITERATION:11
0
1
2
3
4
ITERATION:12
0
1
2
3
4
ITERATION:13
0
1
2
3
4
ITERATION:14
0
1
2
3
4
ITERATION:15
0
1
2
3
4
ITERATION:16
0
1
2
3
4
ITERATION:17
0
1
2
3
4
ITERATION:18
0
1
2
3
4
ITERATION:19
0
1
2
3
4
ITERATION:20
0
1
2
3
4
ITERATION:21
0
1
2
3
4
ITERATION:22
0
1
2
3
4
ITERATION:23
0
1
2
3
4
ITERATION:24
0
1
2
3
4
ITERATION:25
0
1
2
3
4
ITERATION:26
0
1
2
3
4
ITERATION:27
0
1
2
3
4
ITERATION:28
0
1
2
3
4
ITERATION:29
0
1
2
3
4
ITERATION:30
0
1
2
3
4
ITERATION:31
0
1
2
3
4
ITERATION:32
0
1
2
3
4
ITERATION:33
0
1
2
3
4
ITERATION:34
0
1
2
3
4
ITERATION:35
0
1
2
3
4
ITERATION:36
0
1
2
3
4
ITERATION:37
0
1
2
3
4
ITERATION:38
0
1
2
3
4
ITERATION:39
0
1
2
3
4
ITERATION:40
0
1
2
3
4
ITERATION:41
0
1
2
3
4
ITERATION:42
0
1
2
3
4
ITERATION:43
0
1
2
3


array([[1, 0, 0, ..., 0, 0, 0],
       [1, 1, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0]])

In [258]:
accuracy = {}
gnb  = GaussianNB()
i = 0
for individual in ga.elities:
    mask = np.array(individual, dtype = bool)
    _X = X[:,mask]
    X_train, X_test, Y_train, Y_test = train_test_split(_X,Y,test_size = 0.2)
    predict = gnb.fit(X_train,Y_train).predict(X_test)
    accuracy[i] = ((Y_test == predict).sum()/Y_test.shape[0])*100 
    i +=1
print(accuracy)

{0: 87.41935483870968, 1: 86.0752688172043, 2: 86.18279569892474, 3: 86.66666666666667, 4: 85.80645161290322, 5: 85.64516129032258}


In [276]:
for i in range(ga.elities.shape[0]):
    print("Individual:{0} , accuracy: {1} , No.of feature:{2}".format(i+1,accuracy[i],np.array(np.where(ga.elities[i] == 1)).shape[1]))

Individual:1 , accuracy: 87.41935483870968 , No.of feature:108
Individual:2 , accuracy: 86.0752688172043 , No.of feature:105
Individual:3 , accuracy: 86.18279569892474 , No.of feature:109
Individual:4 , accuracy: 86.66666666666667 , No.of feature:111
Individual:5 , accuracy: 85.80645161290322 , No.of feature:111
Individual:6 , accuracy: 85.64516129032258 , No.of feature:103


In [277]:
np.savetxt('elities.csv',ga.elities,delimiter = ',')