In [1]:
from sklearn.tree import DecisionTreeClassifier
import pandas as pd
clf = DecisionTreeClassifier()

# Particles

In [2]:
import numpy as np, random, math

class Particle:

        def __init__(self, size):
            self.position = np.array([random.choice((0, 1)) for _ in range(size)])
            self.velocity = np.array([random.uniform(0, 1) for _ in range(size)])
            self.best = 0
            self.currBest = 0
            self.currBestPosition = self.position
            self.inertiaWeight = random.uniform(0, 1)

        def update_velocity(self, c1, c2, particleBestPosition):
            self.velocity = np.array([self.calculate_velocity(v, c1, c2, px, pbx, x) for v, px, x, pbx in zip(self.velocity, self.position, self.currBestPosition, particleBestPosition)])

        def update_position(self,particleBestPosition):
            self.position = np.array([(1 if self.sigmoid(v) > random.uniform(0, 1) else 0) for v in self.velocity])

        def calculate_velocity(self, v0, c1, c2, px, pbx, x):
            return self.inertiaWeight * v0 + c1 * random.uniform(0, 1) * (-(px - pbx)) + c2 * random.uniform(0, 1) * (-(px - x))

        def sigmoid(self, v):
            if v < 0:
                return 1 - (1 / (1 + math.exp(-v)))
            return 1 / (1 + math.exp(-v))

#         def calculate_best(self, train, test):
#             pos = self.position.astype(bool)
# #             tfidf = TFIDF(train["Review"])
# #             tfidf.weights = tfidf.remove_zero_tfidf(tfidf.weights, 0.5)
# #             tfidf.termIndex = {key:val for i, (key, val) in enumerate(tfidf.termIndex.items()) if pos[i] == True}
# #             print(f"Selected attributes: {len(tfidf.termIndex)}")
#             clf = C45(tfidf, train)
#             clf.train()
#             self.best = clf.score(tfidf, test)
#             return self.best
        def calculate_best(self, xtrain,ytrain, xtest, ytest):
            pos = self.position.astype(bool)
            x_train=dataFrame(pos,xtrain)
            y_train=ytrain.values
            x_test=dataFrame(pos,xtest)
            y_test=ytest.values
            #clf = DecisionTreeClassifier()
            clf.fit(x_train,y_train)
            self.best = clf.score(x_test,y_test)
            return self.best

        def tent_map(self):
            if self.inertiaWeight < 0.7:
                self.inertiaWeight = self.inertiaWeight / 0.7
            else:
                self.inertiaWeight = (10 / 3) * (self.inertiaWeight * (1 - self.inertiaWeight))
            return self.inertiaWeight
        def __repr__(self):
            return '<%s.%s object at %s>' % (
                self.__class__.__module__,
                self.__class__.__name__,
                hex(id(self))
            )

# PSO

In [3]:
import random

class PSO:
        def __init__(self, particleSize, populationSize, numIteration, c1, c2, target):
            self.particleSize = particleSize
            self.populationSize = populationSize
            self.numIteration = numIteration
            self.c1 = c1
            self.c2 = c2
            self.target = target
            self.particles = [Particle(self.particleSize) for _ in range(self.populationSize)]
            self.iterationBest = []

        def exec(self, xtrain, ytrain, xtest, ytest):
            for _ in range(self.numIteration):
                for i in range(self.populationSize):
                    print(self.particles[i].position)
                    b = self.particles[i].calculate_best(xtrain, ytrain, xtest, ytest)
                    print(f"Iter-{_} Particle-{i} best: {b}")
                    self.particles[i].tent_map()

                self.particles = sorted(self.particles, key=lambda particle: particle.best, reverse=True)
                self.iterationBest.append(self.particles[0])
                #print("party123",self.particles[0].currBestPosition)
                print(f"Target: {self.target}")
                print(f"Iteration {_} best: {self.particles[0].best}")
                if self.particles[0].best > self.target:
                    return self.particles[0]

                for i in range(self.populationSize):
                    self.particles[i].update_velocity(self.c1, self.c2, self.particles[0].position)
                    self.particles[i].update_position(self.particles[0].position)
            self.iterationBest = sorted(self.iterationBest, key=lambda particle: particle.best, reverse=True)
            #scoring(self.iterationBest[0].position)
            #scoring(self.iterationBest[0].currBestPosition)
            return self.iterationBest[0]

In [4]:
def dataFrame(pos,df):
    df=df.loc[:,pos==True]
    return df.values

In [5]:
def optimize_model(xtrain,ytrain, xtest, ytest):
    results = []
  
    particleSize = xtrain.shape[1]
    popSize=xtrain.shape[0]
    numIteration=10
    c1=2
    c2=2
    target=0.98
    pso = PSO(particleSize, popSize, numIteration, c1, c2,target)
    bestParticle = pso.exec(xtrain,ytrain, xtest, ytest)
    
    results.append(bestParticle)

    return bestParticle


In [6]:
xtrain=pd.read_csv('xtrain.csv',delimiter=',')
ytrain=pd.read_csv('ytrain.csv',delimiter=',')
xtest=pd.read_csv('xtest.csv',delimiter=',')
ytest=pd.read_csv('ytest.csv',delimiter=',')
xtrain.shape
#xtest.head()
#ytrain

(38, 7130)

In [7]:
xtrain.shape,xtest.shape,ytrain.shape,ytest.shape

((38, 7130), (34, 7130), (38, 3), (34, 3))

In [8]:
xtrain=xtrain.iloc[:,1:]
xtest=xtest.iloc[:,1:]
xtrain.shape,xtest.shape

((38, 7129), (34, 7129))

In [9]:
ytrain=ytrain['cancer']
ytrain = ytrain.replace({'ALL':0,'AML':1})
ytest=ytest['cancer']
ytest = ytest.replace({'ALL':0,'AML':1})

In [10]:
xtrain.shape,xtest.shape,ytrain.shape,ytest.shape

((38, 7129), (34, 7129), (38,), (34,))

In [11]:
resu=optimize_model(xtrain,ytrain, xtest, ytest)


[1 0 0 ... 1 1 1]
Iter-0 Particle-0 best: 0.6470588235294118
[0 1 1 ... 0 1 1]
Iter-0 Particle-1 best: 0.6470588235294118
[0 1 0 ... 1 1 0]
Iter-0 Particle-2 best: 0.6176470588235294
[0 0 0 ... 0 1 0]
Iter-0 Particle-3 best: 0.5588235294117647
[1 0 1 ... 1 0 1]
Iter-0 Particle-4 best: 0.6176470588235294
[0 1 1 ... 0 1 0]
Iter-0 Particle-5 best: 0.6176470588235294
[1 0 1 ... 1 1 1]
Iter-0 Particle-6 best: 0.6176470588235294
[0 1 0 ... 0 0 0]
Iter-0 Particle-7 best: 0.6176470588235294
[0 0 1 ... 1 1 1]
Iter-0 Particle-8 best: 0.6176470588235294
[0 0 1 ... 1 0 1]
Iter-0 Particle-9 best: 0.6176470588235294
[1 1 1 ... 1 0 1]
Iter-0 Particle-10 best: 0.6176470588235294
[1 1 1 ... 0 1 0]
Iter-0 Particle-11 best: 0.5882352941176471
[0 1 1 ... 1 1 1]
Iter-0 Particle-12 best: 0.6176470588235294
[1 1 1 ... 0 1 1]
Iter-0 Particle-13 best: 0.6176470588235294
[0 1 1 ... 0 0 1]
Iter-0 Particle-14 best: 0.5882352941176471
[1 1 1 ... 0 1 1]
Iter-0 Particle-15 best: 0.6176470588235294
[1 1 0 ... 0 1 0]


Iter-3 Particle-20 best: 0.5588235294117647
[1 1 0 ... 1 1 1]
Iter-3 Particle-21 best: 0.6176470588235294
[0 0 0 ... 1 1 1]
Iter-3 Particle-22 best: 0.6176470588235294
[0 0 0 ... 1 0 0]
Iter-3 Particle-23 best: 0.6470588235294118
[1 0 1 ... 1 1 1]
Iter-3 Particle-24 best: 0.6176470588235294
[1 1 0 ... 1 1 1]
Iter-3 Particle-25 best: 0.6176470588235294
[1 0 0 ... 0 0 1]
Iter-3 Particle-26 best: 0.6176470588235294
[1 1 1 ... 1 0 1]
Iter-3 Particle-27 best: 0.6176470588235294
[1 1 1 ... 1 1 1]
Iter-3 Particle-28 best: 0.6176470588235294
[1 0 0 ... 1 1 1]
Iter-3 Particle-29 best: 0.6176470588235294
[1 0 1 ... 1 1 1]
Iter-3 Particle-30 best: 0.6176470588235294
[0 1 1 ... 0 1 1]
Iter-3 Particle-31 best: 0.6176470588235294
[1 1 1 ... 0 1 1]
Iter-3 Particle-32 best: 0.6176470588235294
[1 1 1 ... 1 1 0]
Iter-3 Particle-33 best: 0.6176470588235294
[1 1 1 ... 1 0 0]
Iter-3 Particle-34 best: 0.6176470588235294
[1 1 1 ... 0 0 1]
Iter-3 Particle-35 best: 0.6176470588235294
[0 0 0 ... 1 1 1]
Iter-3 P

[1 0 0 ... 1 0 0]
Iter-7 Particle-0 best: 0.6176470588235294
[1 0 1 ... 0 1 1]
Iter-7 Particle-1 best: 0.6176470588235294
[1 1 1 ... 1 1 0]
Iter-7 Particle-2 best: 0.6176470588235294
[0 0 1 ... 1 1 1]
Iter-7 Particle-3 best: 0.6176470588235294
[1 0 1 ... 1 0 1]
Iter-7 Particle-4 best: 0.6176470588235294
[0 1 0 ... 0 1 1]
Iter-7 Particle-5 best: 0.6176470588235294
[1 1 1 ... 1 1 1]
Iter-7 Particle-6 best: 0.6176470588235294
[1 0 1 ... 1 1 1]
Iter-7 Particle-7 best: 0.6176470588235294
[1 1 1 ... 1 1 0]
Iter-7 Particle-8 best: 0.6176470588235294
[0 1 0 ... 1 0 1]
Iter-7 Particle-9 best: 0.5882352941176471
[1 1 1 ... 1 0 0]
Iter-7 Particle-10 best: 0.6176470588235294
[1 0 1 ... 1 1 1]
Iter-7 Particle-11 best: 0.6176470588235294
[0 1 1 ... 1 1 0]
Iter-7 Particle-12 best: 0.6176470588235294
[1 1 1 ... 0 1 1]
Iter-7 Particle-13 best: 0.6176470588235294
[1 0 1 ... 1 1 0]
Iter-7 Particle-14 best: 0.6176470588235294
[1 0 1 ... 1 1 1]
Iter-7 Particle-15 best: 0.6176470588235294
[1 1 1 ... 1 1 1]


In [12]:
print("hello",resu.currBestPosition)
print("hello",resu.position)


hello [1 1 1 ... 1 1 1]
hello [0 1 1 ... 1 0 1]


In [13]:
pos = resu.position.astype(bool)
x_train=dataFrame(pos,xtrain)
x_test=dataFrame(pos,xtest)
y_train=ytrain.values
y_test=ytest.values

print(x_train.shape,x_test.shape,y_train.shape,y_test.shape)
#clf = DecisionTreeClassifier()
clf.fit(x_train,y_train)
sc = clf.score(x_test,y_test)
sc

(38, 4429) (34, 4429) (38,) (34,)


0.6176470588235294

In [14]:
#note-currentBestposition is optimal as it's value is ranging from 58% to 67% but in position it 
#is varying in vast range from 47% to 67%
pos = resu.currBestPosition.astype(bool)
x_train=dataFrame(pos,xtrain)
x_test=dataFrame(pos,xtest)
y_train=ytrain.values
y_test=ytest.values
print(x_train.shape,x_test.shape,y_train.shape,y_test.shape)
#clf = DecisionTreeClassifier()
clf.fit(x_train,y_train)
sc = clf.score(x_test,y_test)
sc

(38, 3615) (34, 3615) (38,) (34,)


0.6470588235294118

0.33292443482016887