In [29]:
from sklearn.tree import DecisionTreeClassifier
import pandas as pd

# Particles

In [30]:
import numpy as np, random, math

class Particle:

        def __init__(self, size):
            self.position = np.array([random.choice((0, 1)) for _ in range(size)])
            self.velocity = np.array([random.uniform(0, 1) for _ in range(size)])
            self.best = 0
            self.currBest = 0
            self.currBestPosition = self.position
            self.inertiaWeight = random.uniform(0, 1)

        def update_velocity(self, c1, c2, particleBestPosition):
            self.velocity = np.array([self.calculate_velocity(v, c1, c2, px, pbx, x) for v, px, x, pbx in zip(self.velocity, self.position, self.currBestPosition, particleBestPosition)])

        def update_position(self):
            self.position = np.array([(1 if self.sigmoid(v) > random.uniform(0, 1) else 0) for v in self.velocity])

        def calculate_velocity(self, v0, c1, c2, px, pbx, x):
            return self.inertiaWeight * v0 + c1 * random.uniform(0, 1) * (px - pbx) + c2 * random.uniform(0, 1) * (px - x)

        def sigmoid(self, v):
            if v < 0:
                return 1 - (1 / (1 + math.exp(-v)))
            return 1 / (1 + math.exp(-v))

#         def calculate_best(self, train, test):
#             pos = self.position.astype(bool)
# #             tfidf = TFIDF(train["Review"])
# #             tfidf.weights = tfidf.remove_zero_tfidf(tfidf.weights, 0.5)
# #             tfidf.termIndex = {key:val for i, (key, val) in enumerate(tfidf.termIndex.items()) if pos[i] == True}
# #             print(f"Selected attributes: {len(tfidf.termIndex)}")
#             clf = C45(tfidf, train)
#             clf.train()
#             self.best = clf.score(tfidf, test)
#             return self.best
        def calculate_best(self, xtrain,ytrain, xtest, ytest):
            pos = self.position.astype(bool)
            x_train=dataFrame(pos,xtrain)
            y_train=ytrain.values
            x_test=dataFrame(pos,xtest)
            y_test=ytest.values
            clf = DecisionTreeClassifier()
            clf.fit(x_train,y_train)
            self.best = clf.score(x_test,y_test)
            return self.best

        def tent_map(self):
            if self.inertiaWeight < 0.7:
                self.inertiaWeight = self.inertiaWeight / 0.7
            else:
                self.inertiaWeight = (10 / 3) * (self.inertiaWeight * (1 - self.inertiaWeight))
            return self.inertiaWeight

# PSO

In [31]:
import random

class PSO:
        def __init__(self, particleSize, populationSize, numIteration, c1, c2, target):
            self.particleSize = particleSize
            self.populationSize = populationSize
            self.numIteration = numIteration
            self.c1 = c1
            self.c2 = c2
            self.target = target
            self.particles = [Particle(self.particleSize) for _ in range(self.populationSize)]
            self.iterationBest = []

        def exec(self, xtrain, ytrain, xtest, ytest):
            for _ in range(self.numIteration):
                for i in range(self.populationSize):
                    print(self.particles[i].position)
                    b = self.particles[i].calculate_best(xtrain, ytrain, xtest, ytest)
                    print(f"Iter-{_} Particle-{i} best: {b}")
                    self.particles[i].tent_map()

                self.particles = sorted(self.particles, key=lambda particle: particle.best, reverse=True)
                self.iterationBest.append(self.particles[0])
                print(f"Target: {self.target}")
                print(f"Iteration {_} best: {self.particles[0].best}")
                if self.particles[0].best > self.target:
                    return self.particles[0]

                for i in range(self.populationSize):
                    self.particles[i].update_velocity(self.c1, self.c2, self.particles[0].position)
                    self.particles[i].update_position()
            self.iterationBest = sorted(self.iterationBest, key=lambda particle: particle.best, reverse=True)
            return self.iterationBest[0]

In [32]:
def dataFrame(pos,df):
    df=df.loc[:,pos==True]
    return df.values

In [56]:
def optimize_model(xtrain,ytrain, xtest, ytest):
    results = []
  
    particleSize = xtrain.shape[1]
    popSize=xtrain.shape[0]
    numIteration=2
    c1=2
    c2=2
    target=0.98
    pso = PSO(particleSize, popSize, numIteration, c1, c2,target)
    bestParticle = pso.exec(xtrain,ytrain, xtest, ytest)
    results.append(bestParticle)
    print(bestParticle)

    return results


In [34]:
xtrain=pd.read_csv('xtrain.csv',delimiter=',')
ytrain=pd.read_csv('ytrain.csv',delimiter=',')
xtest=pd.read_csv('xtest.csv',delimiter=',')
ytest=pd.read_csv('ytest.csv',delimiter=',')
xtrain.shape
#xtest.head()
#ytrain

(38, 7130)

In [35]:
xtrain.shape,xtest.shape,ytrain.shape,ytest.shape

((38, 7130), (34, 7130), (38, 3), (34, 3))

In [36]:
xtrain=xtrain.iloc[:,1:]
xtest=xtest.iloc[:,1:]
xtrain.shape,xtest.shape

((38, 7129), (34, 7129))

In [37]:
ytrain=ytrain['cancer']
ytrain = ytrain.replace({'ALL':0,'AML':1})
ytest=ytest['cancer']
ytest = ytest.replace({'ALL':0,'AML':1})

In [38]:
xtrain.shape,xtest.shape,ytrain.shape,ytest.shape

((38, 7129), (34, 7129), (38,), (34,))

In [57]:
if __name__ == '__main__':
    resu=optimize_model(xtrain,ytrain, xtest, ytest)
    print(resu)

[0 0 0 ... 0 1 0]
Iter-0 Particle-0 best: 0.6176470588235294
[0 0 0 ... 0 0 1]
Iter-0 Particle-1 best: 0.5882352941176471
[0 0 0 ... 1 0 1]
Iter-0 Particle-2 best: 0.6176470588235294
[0 1 1 ... 0 0 0]
Iter-0 Particle-3 best: 0.5882352941176471
[1 0 0 ... 0 0 1]
Iter-0 Particle-4 best: 0.5882352941176471
[1 0 1 ... 1 0 1]
Iter-0 Particle-5 best: 0.6176470588235294
[0 1 0 ... 1 1 1]
Iter-0 Particle-6 best: 0.6176470588235294
[1 1 0 ... 0 1 0]
Iter-0 Particle-7 best: 0.6176470588235294
[1 0 0 ... 1 1 0]
Iter-0 Particle-8 best: 0.6176470588235294
[1 1 0 ... 1 0 1]
Iter-0 Particle-9 best: 0.6176470588235294
[0 0 1 ... 1 0 1]
Iter-0 Particle-10 best: 0.6176470588235294
[1 1 0 ... 1 0 0]
Iter-0 Particle-11 best: 0.5882352941176471
[0 1 0 ... 0 0 0]
Iter-0 Particle-12 best: 0.47058823529411764
[0 0 0 ... 0 0 0]
Iter-0 Particle-13 best: 0.5882352941176471
[1 0 1 ... 0 0 0]
Iter-0 Particle-14 best: 0.6470588235294118
[0 1 0 ... 0 0 0]
Iter-0 Particle-15 best: 0.6176470588235294
[0 1 1 ... 0 1 0]

In [50]:
resu


[<__main__.Particle at 0x1ad9f6cb828>]