# K-MCI
### Intro
In this notebook we have the main algorithm of the article. All necessary functions are implemented in function.py file.<br>
This document have blow sections:<br>
1. Importing Libraries & classes
2. Reading datasets
3. Preprocessing datasets
4. Main Algorithm

### Importing Libraries & classes
We import some libraries from third party.

In [2]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import random
import copy
from sklearn import preprocessing

In [3]:
# defnition of Candidate class
class Candidate:    
    def __init__(self,nodes,sampling_interval,fitness=-1):
        self.nodes = nodes
        self.sampling_interval = sampling_interval 
        self.variation = []
        self.random_number
        self.fitness = fitness
        self.centers = []
        self.probability = []
        self.labels = []
        self.each_fitness = []
        
    def random_number(self):
        self.random_number = random.random()
        
    def describe(self):
        print("Candidate with #{} nodes, #{} centers, fitness={}\n clusters:\n{} \n Centers:\n{}\n sampling_interval:\n{}"
              .format(len(self.nodes),len(self.centers),self.fitness,self.nodes[np.random.randint(self.nodes.shape[0], size=5), :],self.centers,self.sampling_interval))

### Reading Datasets and Preprocessing
We show the structure of our data for all of datasets.<br>
We have these datasets from UCI Machine Learning Repository:
1. Breast Cancer Wisconsin
2. Contraceptive Method Choice data
3. Glass
4. Iris
5. Vowel
6. Wine

In [4]:
""" In this cell we have some code which will do preprocessing for us on all datasets """
""" the resourses of these datasets are mentioned in README.md file. """

path = 'datasets/' 
# because we are using local files, you need to download datasets and change the "path" variable
# local folder of your downloaded datasets


def bcw(): 
    # importing dataset
    dataset = pd.read_csv(path+'bcw.csv',
                          names = ['Sample code number','Clump Thickness','Uniformity of Cell Size',
                                   'Uniformity of Cell Shape','Marginal Adhesion','Single Epithelial',
                                   'Bare Nuclei','Bland Chromatin','Normal Nucleoli','Mitoses','Class'])
    dataset = dataset.drop(dataset.columns[0], axis=1)
    dataset= dataset.replace(to_replace='?',value=np.nan)
    dataset = dataset.dropna(axis =0) # resolving missing values
    dataset = dataset.astype('int64') # casting string valued column to int64
    x = dataset.iloc[:,:-1].values # features
    y = dataset.iloc[:,-1].values # target values
    return (x,y,dataset)


def cmc():
    # importing dataset
    dataset = pd.read_csv(path+'cmc.csv', names=["Wife's age","Wife's education","Husband's education",
                                                 "Number of children ever born","Wife's religion",
                                                 "Wife's now working?","Husband's occupation",
                                                 "Standard-of-living index","Media exposure","Contraceptive method used"])
    x = dataset.iloc[:,:-1].values # features
    y = dataset.iloc[:,-1].values # target values
    return (x,y,dataset)


def glass():
    # importing dataset
    dataset = pd.read_csv(path+'glass.csv', names= ['Id','refractive index','Sodium','Magnesium','Aluminum','Silicon','Potassium'
                                                    ,'Calcium','Barium','Iron','glass'])
    x = dataset.iloc[:,1:10].values # features
    y = dataset.iloc[:,-1].values # target values
    return (x,y,dataset)


def iris():
    # importing dataset
    dataset = pd.read_csv(path+'iris.csv', names= ['sepal length','sepal width','petal length','petal width','class'])
    x = dataset.iloc[:,:-1].values # features
    y = dataset.iloc[:,-1].values # target values
    
    # encoding categorial data types to labelEncoder
    from sklearn.preprocessing import LabelEncoder
    labelencoder_y = LabelEncoder()
    labelencoder_y = labelencoder_y.fit(y)
    y = labelencoder_y.transform(y)  # 0 for 'Iris-setosa', 1 for 'Iris-versicolor', 2 for 'Iris-virginica'
    return (x,y,dataset)


def vowel():
    # importing dataset
    dataset = pd.read_csv(path+'vowel.csv', names= ['vowel','type 1 frq','type 2 frq','type 3 frq'])
    x = dataset.iloc[:,1:].values # features
    y = dataset.iloc[:,0].values # target values
    return (x,y,dataset)

def wine():
    # importing dataset
    dataset = pd.read_csv(path+'wine.csv', names= ['class','Alcohol','Malic acid','Ash','Alcalinity of ash','Magnesium','Total phenols','Flavanoids','Nonflavanoid phenols','Proanthocyanins','Color intensity','Hue','OD280/OD315','Proline'])
    x = dataset.iloc[:,1:].values # features
    y = dataset.iloc[:,0].values # target values
    return (x,y,dataset)

In [5]:
# Getting data from our preprocessing class Datasets
x_bcw, y_bcw, df_bcw = bcw()
x_cmc, y_cmc, df_cmc = cmc()
x_glass, y_glass, df_glass = glass()
x_iris, y_iris, df_iris = iris()
x_vowel, y_vowel, df_vowel = vowel()
x_wine, y_wine, df_wine = wine()

In [6]:
# Showing dataset of Breast Cancer Wisconsin
df_bcw.head(3)

Unnamed: 0,Clump Thickness,Uniformity of Cell Size,Uniformity of Cell Shape,Marginal Adhesion,Single Epithelial,Bare Nuclei,Bland Chromatin,Normal Nucleoli,Mitoses,Class
0,5,1,1,1,2,1,3,1,1,2
1,5,4,4,5,7,10,3,2,1,2
2,3,1,1,1,2,2,3,1,1,2


In [7]:
# Showing dataset of Contraceptive Method Choice data
df_cmc.head(3)

Unnamed: 0,Wife's age,Wife's education,Husband's education,Number of children ever born,Wife's religion,Wife's now working?,Husband's occupation,Standard-of-living index,Media exposure,Contraceptive method used
0,24,2,3,3,1,1,2,3,0,1
1,45,1,3,10,1,1,3,4,0,1
2,43,2,3,7,1,1,3,4,0,1


In [8]:
# Showing dataset of Glass
df_glass.head(3)

Unnamed: 0,Id,refractive index,Sodium,Magnesium,Aluminum,Silicon,Potassium,Calcium,Barium,Iron,glass
0,1,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,1
1,2,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,1
2,3,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,1


In [9]:
# Showing dataset of Iris
df_iris.head(3)

Unnamed: 0,sepal length,sepal width,petal length,petal width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa


In [10]:
# Showing dataset of Vowel
df_vowel.head(3)

Unnamed: 0,vowel,type 1 frq,type 2 frq,type 3 frq
0,1,700,1500,2600
1,1,550,1550,2400
2,1,700,1500,2600


In [11]:
# Showing dataset of Wine
df_wine.head(3)

Unnamed: 0,class,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,OD280/OD315,Proline
0,1,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,1,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185


### Standardization
In this cell, we standard our data using this equation: <img src='http://3.bp.blogspot.com/_xqXlcaQiGRk/RpO4yR0oKtI/AAAAAAAAABM/7rUWCMwizus/s200/fig2.png'></img>

In [12]:
x_bcw = preprocessing.scale(x_bcw)
x_wine = preprocessing.scale(x_wine)
x_cmc = preprocessing.scale(x_cmc)
#x_glass = preprocessing.scale(x_glass)
x_iris = preprocessing.scale(x_iris)
x_vowel = preprocessing.scale(x_vowel)



# Main Algorithm 

The implementation of main algorithm have below sections:
1. <Strong>Initializing Parameters and Candidates</Strong>
    1. Sampling Interval
    2. Apply Random Centers to Candidates
    3. Visualizing Candidates
2. <Strong>Running Kmean to Exploit</Strong>
    1. K-means Algorithm
    2. Visualizing Candidates
3. <Strong>Mutation Algorithm</Strong>
    1. Fitness over Mutated Candidate
    2. Probablility Equation
    3. Mutation Logic

**Extra details will be explain in other next sections**


## Initializing Parameters and Candidates
In this step, we initialize our algorithm. There are some parameters which are important to convergance speed and quality of solution. So we initialize them here based on the paper.<br>
Each time you want to run K-MCI algorithm, you should first do this initialization.<br>

In [13]:
# notice: this values related to IRIS dataset
candidates_array = [] # the array of all candidates
number_of_candidates = 5
sampling_interval_reduction_factor = 0.95
convergence_parameter = None # what is this in paper???
mutation_random = 0.7
iterations_count = 3500
variations_count = 15
number_of_clusters = 6 # in this article number of clusters are predefined

## Sampling Interval
this function use for making sampling_interval


In [14]:
def sampling_intervals(input_array):
    Maxes = np.amax(input_array,axis=0)
    Mins  = np.amin(input_array,axis=0)
    return (Maxes,Mins)

In [15]:
# initializing candidates
for i in range(0 ,number_of_candidates):
    candidates_array.append(Candidate(x_glass,sampling_interval = sampling_intervals(x_glass)))

## Visualizing Candidates
some parameters are uninitialized. They will have value after running Kmean and Cohort and mutations.

In [16]:
candidates_array[random.randint(0,number_of_candidates-1)].describe()

Candidate with #214 nodes, #0 centers, fitness=-1
 clusters:
[[ 1.52058 12.85     1.61     2.17    72.18     0.76     9.7      0.24
   0.51   ]
 [ 1.51892 13.46     3.83     1.26    72.55     0.57     8.21     0.
   0.14   ]
 [ 1.51898 13.58     3.35     1.23    72.08     0.59     8.91     0.
   0.     ]
 [ 1.51776 13.53     3.41     1.52    72.04     0.58     8.79     0.
   0.     ]
 [ 1.51892 13.46     3.83     1.26    72.55     0.57     8.21     0.
   0.14   ]] 
 Centers:
[]
 sampling_interval:
(array([ 1.53393, 17.38   ,  4.49   ,  3.5    , 75.41   ,  6.21   ,
       16.19   ,  3.15   ,  0.51   ]), array([ 1.51115, 10.73   ,  0.     ,  0.29   , 69.81   ,  0.     ,
        5.43   ,  0.     ,  0.     ]))


## Apply Random Centers to Candidates
In this step, we just select some random nodes as centers with psuedorandom functions in python native library.

In [17]:
for c in range(0,number_of_candidates):
    candidates_array[c].centers =  np.asarray(random.sample(list(candidates_array[c].nodes),number_of_clusters))
    print(candidates_array[c].centers)

[[ 1.51727 14.7      0.       2.34    73.28     0.       8.95     0.66
   0.     ]
 [ 1.51652 13.56     3.57     1.47    72.45     0.64     7.96     0.
   0.     ]
 [ 1.51832 13.33     3.34     1.54    72.14     0.56     8.99     0.
   0.     ]
 [ 1.51839 12.85     3.67     1.24    72.57     0.62     8.68     0.
   0.35   ]
 [ 1.51796 13.5      3.36     1.63    71.94     0.57     8.81     0.
   0.09   ]
 [ 1.51824 12.87     3.48     1.29    72.95     0.6      8.43     0.
   0.     ]]
[[ 1.51937 13.79     2.41     1.19    72.76     0.       9.77     0.
   0.     ]
 [ 1.51613 13.88     1.78     1.79    73.1      0.       8.67     0.76
   0.     ]
 [ 1.52213 14.21     3.82     0.47    71.77     0.11     9.57     0.
   0.     ]
 [ 1.51727 14.7      0.       2.34    73.28     0.       8.95     0.66
   0.     ]
 [ 1.5164  14.37     0.       2.74    72.85     0.       9.45     0.54
   0.     ]
 [ 1.51618 13.01     3.5      1.48    72.89     0.6      8.12     0.
   0.     ]]
[[ 1.51514 14.85  

## K-means Algorithm 
this function fitting k-means model on dataset

In [18]:
def doKmeans(x,clusters_count,init_centers):
    kmeans = KMeans(n_clusters = clusters_count, init = init_centers,n_init = 50)
    kmeans = kmeans.fit(x)
    labels = kmeans.labels_
    return (labels,kmeans.cluster_centers_,kmeans.inertia_)


## Running Kmean to Exploit
Here by running Kmean on all of our candidates, they centers initialized and nodes assigns to clusters based on Kmean++ algorithm.<br>
The objective function of Kmean is also the object function of K-MCI. <br>
So the fitness of any candidates is calculated using Kmean inertia or mean square error of nodes to their centers distances.

In [19]:

for c in range(0,number_of_candidates):
    y_predict_temp,centers_temp,fitness_temp = doKmeans(candidates_array[c].nodes,clusters_count = number_of_clusters,init_centers = candidates_array[c].centers)
    candidates_array[c].centers = centers_temp
    candidates_array[c].fitness = fitness_temp
    candidates_array[c].labels   = y_predict_temp
    
candidates_array[2].labels

  return_n_iter=True)


array([5, 5, 5, 5, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, 3, 3, 3, 1, 5, 3, 3, 1,
       3, 3, 5, 3, 5, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 3, 1, 1, 3, 3, 3, 1,
       3, 5, 3, 1, 1, 5, 1, 5, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 1, 1, 1, 1,
       1, 1, 1, 1, 5, 5, 3, 5, 3, 3, 5, 3, 5, 3, 3, 3, 5, 3, 5, 5, 3, 5,
       3, 3, 5, 3, 3, 3, 3, 5, 5, 3, 3, 3, 3, 3, 3, 1, 1, 4, 4, 4, 4, 0,
       4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 5, 5, 3, 3, 1, 1, 1, 4, 4,
       5, 5, 5, 5, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, 5, 5, 5, 3, 5, 1, 5, 5,
       3, 3, 5, 1, 5, 5, 5, 5, 1, 2, 1, 4, 4, 4, 0, 4, 4, 2, 2, 4, 1, 4,
       1, 1, 1, 1, 0, 0, 0, 0, 0, 2, 2, 5, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

## Visualizing Candidates
In this step, by Kmeans, we assigned centers to each candidates and calculated the fitness of each candidate using inertia

In [20]:
candidates_array[random.randint(0,number_of_candidates-1)].describe()

Candidate with #214 nodes, #6 centers, fitness=379.1282755933587
 clusters:
[[1.51852e+00 1.40900e+01 2.19000e+00 1.66000e+00 7.26700e+01 0.00000e+00
  9.32000e+00 0.00000e+00 0.00000e+00]
 [1.51735e+00 1.30200e+01 3.54000e+00 1.69000e+00 7.27300e+01 5.40000e-01
  8.44000e+00 0.00000e+00 7.00000e-02]
 [1.51593e+00 1.32500e+01 3.45000e+00 1.43000e+00 7.31700e+01 6.10000e-01
  7.86000e+00 0.00000e+00 0.00000e+00]
 [1.51670e+00 1.32400e+01 3.57000e+00 1.38000e+00 7.27000e+01 5.60000e-01
  8.44000e+00 0.00000e+00 1.00000e-01]
 [1.51811e+00 1.29600e+01 2.96000e+00 1.43000e+00 7.29200e+01 6.00000e-01
  8.79000e+00 1.40000e-01 0.00000e+00]] 
 Centers:
[[1.52352895e+00 1.27352632e+01 2.59473684e-01 1.32210526e+00
  7.25357895e+01 2.56315789e-01 1.25184211e+01 1.65789474e-01
  6.94736842e-02]
 [1.51632464e+00 1.45125000e+01 1.53571429e-01 2.08500000e+00
  7.34007143e+01 1.96785714e-01 8.64964286e+00 9.16428571e-01
  1.39285714e-02]
 [1.52165781e+00 1.38634375e+01 3.11437500e+00 1.07187500e+00
 

In [21]:
sum_of_squares(candidates_array[0].nodes,candidates_array[0].centers,candidates_array[0].labels)

379.1282755933587

## Fitness over Mutated Candidate
Just simple sum of square of distances

In [22]:
def sum_of_squares(data, centroids, labels):
    sqe = 0
    for l in np.unique(labels):
        data_l = data[labels == l]
        resid = data_l - centroids[l]
        sqe += (resid**2).sum()
    return sqe

### Mean Square Distance to a Specific Center
This function is exactly based on "sum_of_squares" function. But here all centers not engaged because we want to find the sum of square distances of nodes of a cluster to their cluster center.<br>
The idea is when mutation process done, at least one dimension may get better center but the other goes bad. So we want to have that center which is better to the previous time.

In [23]:
def center_dist(data, center, labels, center_position, center_cluster):
    sqe = 0
    for l in range(0,len(labels)):
        if center_cluster == labels[l] :
            data_l = data[l][center_position]
            resid = data_l - center
            sqe += (resid**2)
    return sqe

In [24]:
def each_fitness(candidates_array_example):
    fitness_of_each_centers = []
    for i in range(0,len(candidates_array_example.centers)):
        Second_layer = []
        for j in range(0,len(candidates_array_example.centers[i])):
            Second_layer.append(center_dist(candidates_array_example.nodes,candidates_array_example.centers[i][j],candidates_array_example.labels,j,i))
        fitness_of_each_centers.append(Second_layer)
    return np.array(fitness_of_each_centers)
print(each_fitness(candidates_array[0]))

[[3.36024579e-04 2.77644737e+01 6.30829474e+00 4.45571579e+00
  2.41770632e+01 1.06804211e+00 4.53240526e+01 9.40026316e+00
  2.54694737e-01]
 [7.66932964e-05 2.09117250e+01 6.14404286e+00 7.62950000e+00
  1.19303857e+01 9.17281071e+00 1.65754964e+01 1.33504429e+01
  2.46678571e-02]
 [1.22133147e-04 1.22829219e+01 2.16421875e+01 7.40928750e+00
  1.11758219e+01 1.42498750e+00 9.22562187e+00 2.80302187e+00
  4.48571875e-01]
 [1.73468684e-04 5.59073793e+00 2.79664828e+00 3.41838793e+00
  6.72971034e+00 3.53945000e+00 1.85716086e+01 1.84784138e+00
  4.22615517e-01]
 [1.80306750e-05 1.38227500e+00 8.98910000e+00 8.48300000e-01
  9.49400000e-01 2.15538000e+01 1.24687500e+00 3.82967500e+00
  0.00000000e+00]
 [7.47750219e-05 4.24982192e+00 4.53400548e+00 3.38541096e+00
  4.04053699e+00 6.26865753e-01 8.84894795e+00 4.82109589e-02
  7.73156164e-01]]


# test paper own answer
## result:
**paper has wrong answer and numbers**

In [25]:
def just_test(data,center):
    temp1 = np.subtract(center,data)
    temp1 = np.power(temp1, 2)
    sqe = np.sum(temp1)
    return sqe


center =[[1.52434 , 12.03344 , 0.01215 , 1.12869 , 71.98256 , 0.19252 , 14.34306 , 0.23039 , 0.15156],
         [1.51956 , 13.25068 , 0.45229 , 1.53305 , 73.01401 , 0.38472 , 11.15803 , 0.00433 , 0.06599],
         [1.51362 , 13.15690 , 0.65548 , 3.13123 , 70.50411 , 5.33024 , 6.73773  , 0.67322 , 0.01490],
         [1.52132 , 13.74692 , 3.51952 , 1.01524 , 71.89517 , 0.21094 , 9.44764  , 0.03588 , 0.04680],
         [1.51933 , 13.08412 , 3.52765 , 1.36555 , 72.85826 , 0.57913 , 8.36271  , 0.00837 , 0.06182],
         [1.51567 , 14.65825 , 0.06326 , 2.21016 , 73.25324 , 0.02744 , 8.68548  , 1.02698 , 0.00382]]

label = []
for i in x_glass:
    test = []
    for j in center:
        test.append(just_test(i,j))
    label.append(np.argmin(test))
    
print(label)    
print(sum_of_squares(x_glass,center,label))
print(candidates_array[0].fitness)

[3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 3, 3, 4, 4, 4, 3, 4, 3, 4, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 0, 0, 0, 1, 1, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 1, 1, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 3, 3, 3, 3, 5, 5, 1, 1, 5, 4, 4, 4, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5]
343.7814274034
379.1282755933587


## Mutation algorithm 
this function do mutation algorithm on candidates interval's centers

In [26]:
'''def mutation(candidate_array,mutation_random):
    New_candidate = copy.deepcopy(candidate_array)
    Mutant_candidate = copy.deepcopy(candidate_array)
    Trial_candidate = copy.deepcopy(candidate_array)
    for x in range(len(candidate_array)):
        a = np.full(len(candidate_array), 1/(len(candidate_array)-1))
        a[x] = 0
        temp = np.random.choice(len(candidate_array), 3, replace = False, p=a)
        for i in range(len(candidate_array[0].centers)):
            for j in range(len(candidate_array[0].centers[i])):
                Mutant_candidate[x].centers[i][j] = candidate_array[temp[0]].centers[i][j]+ random.random()*(candidate_array[temp[1]].centers[i][j] - candidate_array[temp[2]].centers[i][j])
                if random.random() < mutation_random:
                    Trial_candidate[x].centers[i][j] = Mutant_candidate[x].centers[i][j] #Trial & Mutant must be copy of same candidate
                    
                    
        Trial_candidate[x].fitness = sum_of_squares(Trial_candidate[x].nodes,Trial_candidate[x].centers,Trial_candidate[x].labels)
        
        if Trial_candidate[x].fitness < sum_of_squares(candidate_array[x].nodes,candidates_array[2].centers,candidates_array[2].labels):
            New_candidate[x] = Trial_candidate[x]
            print("sakjfnr")
        else:
            New_candidate[x] = candidate_array[x]
        #print(Trial_candidate[x].centers)
        #print(candidate_array[x].centers)
        #print(New_candidate[x].centers)
        #print(Mutant_candidate[x].centers)
        print("###########################################################################################################")
        Trial_candidate[x].each_fitness = each_fitness(Trial_candidate[x])
        print(Trial_candidate[x].each_fitness)
        candidate_array[x].each_fitness = each_fitness(candidate_array[x])
        print(candidate_array[x].each_fitness)
        print("###########################################################################################################")
        
    return New_candidate
#print(candidates_array[1].fitness)
#for i in range(0,3500):
New_candidates_array = mutation(candidates_array,mutation_random)
    #if New_candidates_array[1].fitness != candidates_array[1].fitness:
     #   print(New_candidates_array[1].fitness)#never happen'''

'def mutation(candidate_array,mutation_random):\n    New_candidate = copy.deepcopy(candidate_array)\n    Mutant_candidate = copy.deepcopy(candidate_array)\n    Trial_candidate = copy.deepcopy(candidate_array)\n    for x in range(len(candidate_array)):\n        a = np.full(len(candidate_array), 1/(len(candidate_array)-1))\n        a[x] = 0\n        temp = np.random.choice(len(candidate_array), 3, replace = False, p=a)\n        for i in range(len(candidate_array[0].centers)):\n            for j in range(len(candidate_array[0].centers[i])):\n                Mutant_candidate[x].centers[i][j] = candidate_array[temp[0]].centers[i][j]+ random.random()*(candidate_array[temp[1]].centers[i][j] - candidate_array[temp[2]].centers[i][j])\n                if random.random() < mutation_random:\n                    Trial_candidate[x].centers[i][j] = Mutant_candidate[x].centers[i][j] #Trial & Mutant must be copy of same candidate\n                    \n                    \n        Trial_candidate[x].f

In [33]:
def mutation(candidate_array,mutation_random):
    New_candidate = copy.deepcopy(candidate_array)
    Trial_candidate = copy.deepcopy(candidate_array)
    for x in range(len(candidate_array)):
        a = np.full(len(candidate_array), 1/(len(candidate_array)-1))
        a[x] = 0
        temp = np.random.choice(len(candidate_array), 3, replace = False, p=a)
        Centers = []
        for i in range(len(candidate_array[0].centers)):
            Second_layer = []
            for j in range(len(candidate_array[0].centers[i])):
                temporary_Center = candidate_array[temp[0]].centers[i][j]+ random.random()*(candidate_array[temp[1]].centers[i][j] - candidate_array[temp[2]].centers[i][j])
                if random.random() < mutation_random:
                    Second_layer.append(temporary_Center) #Trial & Mutant must be copy of same candidate
                else:
                    Second_layer.append(candidate_array[x].centers[i][j])
            Centers.append(Second_layer)
            
        
        y_predict_temp,centers_temp,fitness_temp = doKmeans(candidate_array[x].nodes,clusters_count = number_of_clusters, init_centers = np.asarray(Centers))
        if fitness_temp < candidates_array[x].fitness:
            New_candidate[x].labels  =  y_predict_temp
            New_candidate[x].fitness = fitness_temp
            New_candidate[x].centers = centers_temp
        New_candidate[x].each_fitness = each_fitness(New_candidate[c])
        print(New_candidate[x].fitness)
        print(candidate_array[x].fitness)
        print("#############################################################################################")
    return New_candidate

New_candidates_array = mutation(candidates_array,mutation_random)

336.29263337746863
379.1282755933587
#############################################################################################
336.0605389372351
336.0605389372351
#############################################################################################
380.4407585733279
380.4407585733279
#############################################################################################
336.2131430296451
336.2131430296451
#############################################################################################
338.7448885161782
378.89950397756104
#############################################################################################


  return_n_iter=True)


# Probability Equation

In [35]:
def probability(candidates):
    for c in range(0,len(candidates)):
        temp_array = []
        for clusters in range(0,len(candidates[c].centers)):
            temp_array.append((1/np.sum(candidates[c].each_fitness[clusters]))/(np.sum(1/(np.sum(x.each_fitness[clusters])) for x in candidates)))
        print(temp_array)    
        candidates[c].probability = temp_array
    return
probability(New_candidates_array)


[0.1706071263073633, 0.22473042258957604, 0.16808876645701187, 0.2268351006327947, 0.0008156083052242583, 0.1831356843611011]
[0.1706071263073633, 0.22473042258957604, 0.16808876645701187, 0.2268351006327947, 0.0008156083052242583, 0.1831356843611011]
[0.1706071263073633, 0.22473042258957604, 0.16808876645701187, 0.2268351006327947, 0.0008156083052242583, 0.1831356843611011]
[0.1706071263073633, 0.22473042258957604, 0.16808876645701187, 0.2268351006327947, 0.0008156083052242583, 0.1831356843611011]
[0.31757149477054675, 0.10107830964169581, 0.32764493417195245, 0.09265959746882128, 0.9967375667791031, 0.26745726255559554]


  """


### Roulette Wheel Selection
This is a logic for selecting targets with higher probability.<br>
In this article, each candidate want to follow the other candidate with higher fitness. So in the previous section <strong><em>Probability Equation<strong><em>, we calculated all probabilities of candidates based on their fitnesses.

In [30]:
def roulette_wheel_selection(inertia_array):
    maximum = np.sum(inertia_array)
    pick = random.uniform(0, maximum)
    current = 0
    for fitness in inertia_array:
        current += fitness
        if current > pick:
            return fitness

# Shrink sampling interval
**here we shrink sampeling interval using reduction factor ro creat neighborhood of each center**

In [31]:
def shrinked_sampling_interval(candidates_array):
    for i in range(0,len(candidates_array)):
        shrinked_sampling_interval = []
        for j in range(0,len(candidates_array[i].centers)):
            Second_layer = []
            for k in range(0,len(candidates_array[i].centers[j])):
                Third_layer = []
                Temp = abs(candidates_array[i].sampling_interval[0][k] - candidates_array[i].sampling_interval[1][k]) * sampling_interval_reduction_factor
                Third_layer.append(candidates_array[i].centers[j][k] - (Temp/2))
                Third_layer.append(candidates_array[i].centers[j][k] + (Temp/2))
                Second_layer.append(Third_layer)
            shrinked_sampling_interval.append(Second_layer)
        candidates_array[i].sampling_interval = shrinked_sampling_interval
    return
shrinked_sampling_interval(New_candidates_array)
print(New_candidates_array[0].sampling_interval)

[[[1.512708447368421, 1.5343494473684212], [9.576513157894738, 15.894013157894737], [-1.8732763157894734, 2.392223684210527], [-0.20264473684210516, 2.8468552631578943], [69.87578947368421, 75.1957894736842], [-2.693434210526316, 3.206065789473684], [7.4074210526315785, 17.629421052631578], [-1.3304605263157894, 1.6620394736842103], [-0.17277631578947367, 0.3117236842105263]], [[1.5055041428571427, 1.527145142857143], [11.353750000000002, 17.67125], [-1.9791785714285708, 2.2863214285714295], [0.5602500000000001, 3.60975], [70.74071428571429, 76.06071428571428], [-2.7529642857142855, 3.146535714285714], [3.5386428571428556, 13.760642857142857], [-0.5798214285714285, 2.4126785714285712], [-0.22832142857142856, 0.2561785714285714]], [[1.5108373125, 1.5324783124999999], [10.7046875, 17.022187499999998], [0.9816249999999997, 5.2471250000000005], [-0.4528749999999999, 2.5966249999999995], [69.1484375, 74.4684375], [-2.750375, 3.1491249999999997], [4.627437499999999, 14.8494375], [-1.4278125,

# variations
**Choose t diffrent random center of each cluster's center in shrinked sampling interval neghborhood**

In [32]:
def variation(candidates_array):
    for candidate in candidates_array:
        variations = []
        for i in range(0,variations_count):
            Second_layer = []
            for j in candidate.sampling_interval:
                Third_layer = []
                for k in range(0,len(j)):
                    #print(k)
                    Third_layer.append(random.uniform(j[k][0],j[k][1]))
                Second_layer.append(Third_layer)
            variations.append(Second_layer)
        candidate.variation = variations    
variation(New_candidates_array)
print(New_candidates_array[3].variation[1])

[[1.5117988244923966, 15.155189210359461, 0.5698171781619015, 2.3253342215311004, 69.58700919112138, 2.995499213182032, 6.6257593298140955, 0.9783307686893449, -0.18700424046020342], [1.5290841576502885, 12.652935337255629, 5.133049724728425, 0.7514168549114606, 72.3470309592928, -1.1175628848684998, 10.773063158793498, 0.7972090131211647, -0.1273241135649642], [1.5352879223864129, 12.619903352618056, -0.24166913664207512, 2.484168482656054, 69.01348945006391, 0.39405372340485867, 15.123205441399179, 0.2242250479537351, -0.058923861837003016], [1.5227474371298468, 14.119015277272428, 5.488382368828059, 2.238806057385311, 74.75031193088418, -0.94276644752078, 3.9421140125288603, 0.4345371874485444, 0.2583066535946638], [1.5099028881957668, 16.570463870530336, -1.4217700128881214, 1.9340236662503485, 70.96755098550683, -1.5276010506326754, 12.250925975327089, 0.3775365680876084, -0.03838288865992967], [1.516517762055763, 15.390495546150232, 2.1238840548450573, 2.0453287326298666, 74.4229