In [1]:
# Import modules
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
import pandas as pd

# Import PySwarms
import pyswarms as ps
import pyswarms.backend as P
from pyswarms.backend.swarms import Swarm
from pyswarms.backend.topology import Star
from pyswarms.utils.functions import single_obj as fx


# Some more magic so that the notebook will reload external python modules
%load_ext autoreload
%autoreload 2

In [47]:
file = pd.read_csv("Preprocessed_BRCA_RNA_data_normalized.csv", header=None)
file.to_csv("pso_feature_reduction.csv", index=False)

In [38]:
#FORMAT DATA
rows = file.shape[0]
cols = file.shape[1] -2
new = file.loc[1:rows, 2:cols] # REAL DATA
#new = file.loc[1:rows, 2:5] #trial data
X=np.array(new, dtype=np.float)


print(X.shape)


(12, 16381)


In [39]:
n = X.shape[0]
dim = X.shape[1]
# generate a swarm with
init_positions = P.generate_swarm(n_particles=n, dimensions=dim, init_pos=X)
init_velocities = P.generate_velocity(n_particles=n, dimensions=dim)
my_options = {'c1': 0.5, 'c2': 0.3, 'w':0.9}
# Initialize the swarm
my_swarm = Swarm(position=init_positions, velocity=init_velocities, options=my_options)

pos = my_swarm.position
vel = my_swarm.velocity
n_particles = my_swarm.n_particles
pbest = my_swarm.pbest_pos
pbest_cost = my_swarm.pbest_cost
best = my_swarm.best_pos
best_cost = my_swarm.best_cost


In [40]:
# Set-up hyperparameters
options = {'c1': 0.5, 'c2': 0.3, 'w':0.9}

# Call instance of PSO
optimizer = ps.single.GlobalBestPSO(n_particles=n, dimensions=dim, options=options, init_pos=X ) #, #vh_strategy='zero')

# Perform optimization
cost, pos = optimizer.optimize(fx.sphere, iters=100)

2021-04-15 13:17:25,146 - pyswarms.single.global_best - INFO - Optimize for 100 iters with {'c1': 0.5, 'c2': 0.3, 'w': 0.9}
pyswarms.single.global_best: 100%|█████████████████████████████████████████████████████████|100/100, best_cost=8.58e+10
2021-04-15 13:17:29,530 - pyswarms.single.global_best - INFO - Optimization finished | best cost: 85788661770.98926, best pos: [   1.06068836    8.63077806   12.3841225  ...    3.30362361    2.51828207
 -104.4223186 ]


In [41]:
print(len(pos))

16381


In [46]:
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#FIND FITNESS OF GLOBAL BEST
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
gfitness = []
for x in pos:
    gfitness.append(cost *(1 -x) + (1 - cost)*(1 - (1/len(pos))))
n_g_total = len(gfitness)
print("Initial number of features: " , n_g_total)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#FIND NUMBER OF FEATURES
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
n_global_feet = []
for x in gfitness:
    if x > cost:
        n_global_feet.append(x)
print("Number of selected features: ",len(n_global_feet))
selected_g_feet = len(n_global_feet)
percent_g_reduction = ((n_g_total-selected_g_feet)/n_g_total)*100
print("Percent of feature reduction: ",percent_g_reduction)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#SELECT FEATURES
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
global_feet = []
for x in gfitness:
    if x > cost:
        global_feet.append(1)
    else:
        global_feet.append('')


Initial number of features:  16381
Number of selected features:  1052
Perfent of feature reduction:  93.57792564556499


In [43]:
# Set-up hyperparameters
options = {'c1': 0.5, 'c2': 0.3, 'w': 0.9, 'k': 3, 'p': 2}

# Call instance of LBestPSO 
Loptimizer = ps.single.LocalBestPSO(n_particles=n, dimensions=dim,
                                   options=options, init_pos=X)

# Perform optimization
stats = Loptimizer.optimize(fx.sphere, iters=100)

2021-04-15 13:17:31,478 - pyswarms.single.local_best - INFO - Optimize for 100 iters with {'c1': 0.5, 'c2': 0.3, 'w': 0.9, 'k': 3, 'p': 2}
pyswarms.single.local_best: 100%|██████████████████████████████████████████████████████████|100/100, best_cost=9.86e+10
2021-04-15 13:17:39,075 - pyswarms.single.local_best - INFO - Optimization finished | best cost: 98627885485.00345, best pos: [ 1.41728902 14.99185472 10.72049728 ...  1.77322196 13.91631739
 50.63283441]


In [45]:
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#FIND FITNESS OF LOCAL BEST
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Lpos = stats[1]
Lcost = stats[0]
lfitness = []
for x in Lpos:
    lfitness.append(Lcost *(1 -x) + (1 - Lcost)*(1 - (1/len(Lpos))))
n_l_total = len(lfitness)
print("Initial number of features: " , n_l_total)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#FIND NUMBER OF FEATURES
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
n_local_feet = []
for x in lfitness:
    if x > Lcost:
        n_local_feet.append(x)
print("Number of selected features: ",len(n_local_feet))
selected_l_feet = len(n_local_feet)
percent_l_reduction = ((n_l_total-selected_l_feet)/len(lfitness))*100
print("Percent of feature reduction: ",percent_l_reduction)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#SELECT FEATURES
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
local_feet = []
for x in lfitness:
    if x > Lcost:
        local_feet.append(1)
    else:
        local_feet.append('')

        

print(len(local_feet))
print(local_feet[0:20])




Initial number of features:  16381
Number of selected features:  216
Percent of feature reduction:  98.68139918197912
16381
['', '', '', '', '', '', '', 1, '', '', '', '', '', '', '', '', '', '', '', '']
