# K-MCI
### Intro
In this notebook we have the main algorithm of the article. All necessary functions are implemented in function.py file.<br>
This document have blow sections:<br>
1. Importing Libraries & classes
2. Reading datasets
3. Preprocessing datasets
4. Main Algorithm

### Importing Libraries & classes
We import some libraries from third party.

In [1]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import random
import copy
from sklearn import preprocessing

In [2]:
# defnition of Candidate class
class Candidate:    
    def __init__(self,nodes,sampling_interval,fitness=-1):
        self.nodes = nodes
        self.sampling_interval = sampling_interval 
        self.variation = []
        self.fitness = fitness
        self.clusters = None
        self.centers = []
        self.probability = []
        self.labels = []
        self.each_fitness = []
        
    def describe(self):
        print("Candidate with #{} nodes, #{} centers, fitness={}\n clusters:\n{} \n Centers:\n{}\n sampling_interval:\n{}"
              .format(len(self.nodes),len(self.centers),self.fitness,self.nodes[np.random.randint(self.nodes.shape[0], size=5), :],self.centers,self.sampling_interval))

### Reading Datasets and Preprocessing
We show the structure of our data for all of datasets.<br>
We have these datasets from UCI Machine Learning Repository:
1. Breast Cancer Wisconsin
2. Contraceptive Method Choice data
3. Glass
4. Iris
5. Vowel
6. Wine

In [3]:
""" In this cell we have some code which will do preprocessing for us on all datasets """
""" the resourses of these datasets are mentioned in README.md file. """

path = 'datasets/' 
# because we are using local files, you need to download datasets and change the "path" variable
# local folder of your downloaded datasets


def bcw(): 
    # importing dataset
    dataset = pd.read_csv(path+'bcw.csv',
                          names = ['Sample code number','Clump Thickness','Uniformity of Cell Size',
                                   'Uniformity of Cell Shape','Marginal Adhesion','Single Epithelial',
                                   'Bare Nuclei','Bland Chromatin','Normal Nucleoli','Mitoses','Class'])
    dataset = dataset.drop(dataset.columns[0], axis=1)
    dataset= dataset.replace(to_replace='?',value=np.nan)
    dataset = dataset.dropna(axis =0) # resolving missing values
    dataset = dataset.astype('int64') # casting string valued column to int64
    x = dataset.iloc[:,:-1].values # features
    y = dataset.iloc[:,-1].values # target values
    return (x,y,dataset)


def cmc():
    # importing dataset
    dataset = pd.read_csv(path+'cmc.csv', names=["Wife's age","Wife's education","Husband's education",
                                                 "Number of children ever born","Wife's religion",
                                                 "Wife's now working?","Husband's occupation",
                                                 "Standard-of-living index","Media exposure","Contraceptive method used"])
    x = dataset.iloc[:,:-1].values # features
    y = dataset.iloc[:,-1].values # target values
    return (x,y,dataset)


def glass():
    # importing dataset
    dataset = pd.read_csv(path+'glass.csv', names= ['Id','refractive index','Sodium','Magnesium','Aluminum','Silicon','Potassium'
                                                    ,'Calcium','Barium','Iron','glass'])
    x = dataset.iloc[:,1:10].values # features
    y = dataset.iloc[:,-1].values # target values
    return (x,y,dataset)


def iris():
    # importing dataset
    dataset = pd.read_csv(path+'iris.csv', names= ['sepal length','sepal width','petal length','petal width','class'])
    x = dataset.iloc[:,:-1].values # features
    y = dataset.iloc[:,-1].values # target values
    
    # encoding categorial data types to labelEncoder
    from sklearn.preprocessing import LabelEncoder
    labelencoder_y = LabelEncoder()
    labelencoder_y = labelencoder_y.fit(y)
    y = labelencoder_y.transform(y)  # 0 for 'Iris-setosa', 1 for 'Iris-versicolor', 2 for 'Iris-virginica'
    return (x,y,dataset)


def vowel():
    # importing dataset
    dataset = pd.read_csv(path+'vowel.csv', names= ['vowel','type 1 frq','type 2 frq','type 3 frq'])
    x = dataset.iloc[:,1:].values # features
    y = dataset.iloc[:,0].values # target values
    return (x,y,dataset)

def wine():
    # importing dataset
    dataset = pd.read_csv(path+'wine.csv', names= ['class','Alcohol','Malic acid','Ash','Alcalinity of ash','Magnesium','Total phenols','Flavanoids','Nonflavanoid phenols','Proanthocyanins','Color intensity','Hue','OD280/OD315','Proline'])
    x = dataset.iloc[:,1:].values # features
    y = dataset.iloc[:,0].values # target values
    return (x,y,dataset)

In [4]:
# Getting data from our preprocessing class Datasets
x_bcw, y_bcw, df_bcw = bcw()
x_cmc, y_cmc, df_cmc = cmc()
x_glass, y_glass, df_glass = glass()
x_iris, y_iris, df_iris = iris()
x_vowel, y_vowel, df_vowel = vowel()
x_wine, y_wine, df_wine = wine()

In [5]:
# Showing dataset of Breast Cancer Wisconsin
df_bcw.head(3)

Unnamed: 0,Clump Thickness,Uniformity of Cell Size,Uniformity of Cell Shape,Marginal Adhesion,Single Epithelial,Bare Nuclei,Bland Chromatin,Normal Nucleoli,Mitoses,Class
0,5,1,1,1,2,1,3,1,1,2
1,5,4,4,5,7,10,3,2,1,2
2,3,1,1,1,2,2,3,1,1,2


In [6]:
# Showing dataset of Contraceptive Method Choice data
df_cmc.head(3)

Unnamed: 0,Wife's age,Wife's education,Husband's education,Number of children ever born,Wife's religion,Wife's now working?,Husband's occupation,Standard-of-living index,Media exposure,Contraceptive method used
0,24,2,3,3,1,1,2,3,0,1
1,45,1,3,10,1,1,3,4,0,1
2,43,2,3,7,1,1,3,4,0,1


In [7]:
# Showing dataset of Glass
df_glass.head(3)

Unnamed: 0,Id,refractive index,Sodium,Magnesium,Aluminum,Silicon,Potassium,Calcium,Barium,Iron,glass
0,1,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,1
1,2,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,1
2,3,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,1


In [8]:
# Showing dataset of Iris
df_iris.head(3)

Unnamed: 0,sepal length,sepal width,petal length,petal width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa


In [9]:
# Showing dataset of Vowel
df_vowel.head(3)

Unnamed: 0,vowel,type 1 frq,type 2 frq,type 3 frq
0,1,700,1500,2600
1,1,550,1550,2400
2,1,700,1500,2600


In [10]:
# Showing dataset of Wine
df_wine.head(3)

Unnamed: 0,class,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,OD280/OD315,Proline
0,1,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,1,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185


# Main Algorithm 

The implementation of main algorithm have below sections:
1. <Strong>Initializing Parameters and Candidates</Strong>
    1. Sampling Interval
    2. Apply Random Centers to Candidates
    3. Visualizing Candidates
2. <Strong>Running Kmean to Exploit</Strong>
    1. K-means Algorithm
    2. Visualizing Candidates
3. <Strong>Mutation Algorithm</Strong>
    1. Fitness over Mutated Candidate
    2. Probablility Equation
    3. Mutation Logic

**Extra details will be explain in other next sections**


## Initializing Parameters and Candidates
In this step, we initialize our algorithm. There are some parameters which are important to convergance speed and quality of solution. So we initialize them here based on the paper.<br>
Each time you want to run K-MCI algorithm, you should first do this initialization.<br>

In [57]:
# notice: this values related to IRIS dataset
number_of_candidates = 5
sampling_interval_reduction_factor = 0.95
convergence_parameter = None # what is this in paper???
mutation_random = 0.7
iterations_count = 3500
variations_count = 15
number_of_clusters = 3 # in this article number of clusters are predefined
m,n = x_iris.shape

In [12]:
# initializing candidates
candidates_array = [] # the array of all candidates
def initialize_candidates(candidates_array,number_of_candidates,dataset):
    for i in range(0 ,number_of_candidates):
        candidates_array.append(Candidate(dataset,sampling_interval=None))
    return candidates_array

candidates_array = initialize_candidates(candidates_array,number_of_candidates,x_iris)

## Apply Random Centers to Candidates
In this step, we just select some random nodes as centers with psuedorandom functions in python native library.

In [13]:
for c in range(0,number_of_candidates):
    candidates_array[c].centers =  np.asarray(random.sample(list(candidates_array[c].nodes),number_of_clusters))
    print(candidates_array[c].centers)

[[ 4.3  3.   1.1  0.1]
 [ 6.5  3.   5.2  2. ]
 [ 6.1  3.   4.9  1.8]]
[[ 6.1  2.8  4.   1.3]
 [ 6.7  3.   5.   1.7]
 [ 6.4  2.9  4.3  1.3]]
[[ 4.6  3.6  1.   0.2]
 [ 5.1  3.3  1.7  0.5]
 [ 5.1  3.8  1.6  0.2]]
[[ 7.7  3.   6.1  2.3]
 [ 5.1  3.5  1.4  0.2]
 [ 5.1  3.8  1.5  0.3]]
[[ 5.2  3.5  1.5  0.2]
 [ 5.9  3.2  4.8  1.8]
 [ 6.7  3.3  5.7  2.5]]


## K-means Algorithm 
First we define our fitness function, then apply kmeans and measure our error using introduced fitness function.

In [14]:
# fitness function
def sum_of_squares(data, centroids, labels):
    sqe = 0
    for l in np.unique(labels):
        data_l = data[labels == l]
        resid = data_l - centroids[l]
        sqe += np.sum(np.sqrt(np.sum(resid**2,axis=1)))
    return sqe

In [15]:
def kmeans(x,clusters_count,init_centers):
    kmeans = KMeans(n_clusters = clusters_count, init = 'k-means++',n_init = 50)
    kmeans = kmeans.fit(x)
    labels = kmeans.labels_
    return (labels,kmeans.cluster_centers_,sum_of_squares(x,kmeans.cluster_centers_,labels))

## Running Kmeans to Exploit
Here by running Kmeans on all of our candidates, they centers initialized and nodes assigns to clusters based on Kmean++ algorithm.<br>
The objective function of Kmean is also the object function of K-MCI. <br>
So the fitness of any candidates is calculated using mean square error of nodes to their centers distances.

In [16]:
for c in range(0,number_of_candidates):
    candidates_array[c].labels,candidates_array[c].centers,candidates_array[c].fitness = kmeans(candidates_array[c].nodes
                                                                                                ,clusters_count = number_of_clusters,
                                                                                                init_centers = candidates_array[c].centers)

In [25]:
candidates_array[random.randint(0,number_of_candidates-1)].describe()

Candidate with #150 nodes, #3 centers, fitness=97.20457357401651
 clusters:
[[ 5.1  3.8  1.9  0.4]
 [ 4.6  3.4  1.4  0.3]
 [ 4.7  3.2  1.6  0.2]
 [ 6.1  2.9  4.7  1.4]
 [ 6.2  3.4  5.4  2.3]] 
 Centers:
[[ 5.006       3.428       1.462       0.246     ]
 [ 5.9016129   2.7483871   4.39354839  1.43387097]
 [ 6.85        3.07368421  5.74210526  2.07105263]]
 sampling_interval:
([array([ 5.8,  4.4,  1.9,  0.6]), array([ 7. ,  3.4,  5.1,  2.4]), array([ 7.9,  3.8,  6.9,  2.5])], [array([ 4.3,  2.3,  1. ,  0.1]), array([ 4.9,  2. ,  3. ,  1. ]), array([ 6.1,  2.5,  4.9,  1.4])])


## Sampling Interval
In this step, **first** we should assign each node to the **specific center** then we find **min** and **max** within a specific axis correspond to the features.<br>
So we have to first do `kmeans`, then we can calculate `min` and `max` in within each cluster.

In [49]:
def sampling_intervals_generator(candidate,number_of_clusters):
    clusters_list = []
    maxes = []
    mins = []
    for i in range(0,number_of_clusters):
        clusters_list.append(candidate.nodes[candidate.labels == i])
        maxes.append(np.amax(clusters_list[i],axis=0))
        mins.append(np.amin(clusters_list[i],axis=0))
    candidate.clusters = clusters_list # update clusters of candidate
    return (np.array(maxes),np.array(mins))

In [50]:
for c in range(0,number_of_candidates):
    candidates_array[c].sampling_interval = sampling_intervals_generator(candidates_array[c],number_of_clusters)

## Visualizing Candidates
In this step, by Kmeans, we assigned centers to each candidates and calculated the fitness of each candidate using fitness function

In [51]:
candidates_array[random.randint(0,number_of_candidates-1)].describe()

Candidate with #150 nodes, #3 centers, fitness=97.20457357401651
 clusters:
[[ 7.6  3.   6.6  2.1]
 [ 5.2  2.7  3.9  1.4]
 [ 6.2  2.2  4.5  1.5]
 [ 7.2  3.   5.8  1.6]
 [ 5.8  2.7  4.1  1. ]] 
 Centers:
[[ 5.9016129   2.7483871   4.39354839  1.43387097]
 [ 5.006       3.428       1.462       0.246     ]
 [ 6.85        3.07368421  5.74210526  2.07105263]]
 sampling_interval:
(array([[ 7. ,  3.4,  5.1,  2.4],
       [ 5.8,  4.4,  1.9,  0.6],
       [ 7.9,  3.8,  6.9,  2.5]]), array([[ 4.9,  2. ,  3. ,  1. ],
       [ 4.3,  2.3,  1. ,  0.1],
       [ 6.1,  2.5,  4.9,  1.4]]))


Now we will implement a function over `sampling_intervals_generator` function. This function get sampling intervals arrays and assign each `(max,min)` tuples to corresponding element in `centers` of any `candidates_array`.<br>
We should do this for easily accessing intervals when we trying to shrink them.

In [58]:
#def sampling_intervals(max_min_lists):



In [47]:
(candidates_array[random.randint(0,number_of_candidates-1)].sampling_interval[0][0][0], candidates_array[random.randint(0,number_of_candidates-1)].sampling_interval[1][0][0])

(7.0, 4.9000000000000004)

## Fitness over Mutated Candidate

### Mean Square Distance to a Specific Center
This function is exactly based on "sum_of_squares" function. But here all centers not engaged because we want to find the sum of square distances of nodes of a cluster to their cluster center.<br>
The idea is when mutation process done, at least one dimension may get better center but the other goes bad. So we want to have that center which is better to the previous time.

In [26]:
def center_dist(data, center, labels, center_position, center_cluster):
    sqe = 0
    for l in range(0,len(labels)):
        if center_cluster == labels[l] :
            data_l = data[l][center_position]
            resid = data_l - center
            sqe += np.sum(np.sqrt(np.sum(resid**2)))
    return sqe

In [27]:
def each_fitness(candidates_array_example):
    fitness_of_each_centers = []
    for i in range(0,len(candidates_array_example.centers)):
        Second_layer = []
        for j in range(0,len(candidates_array_example.centers[i])):
            Second_layer.append(center_dist(candidates_array_example.nodes,candidates_array_example.centers[i][j],candidates_array_example.labels,j,i))
        fitness_of_each_centers.append(Second_layer)
    return np.array(fitness_of_each_centers)
print(each_fitness(candidates_array[0]))

[[ 13.536       14.368        6.576        4.128     ]
 [ 15.4          8.14736842  14.45263158   8.67368421]
 [ 22.90645161  14.70967742  25.47741935  14.37096774]]


# test paper own answer
## result:
**paper has wrong answer and numbers**

In [25]:
def just_test(data,center):
    temp1 = np.subtract(center,data)
    temp1 = np.power(temp1, 2)
    sqe = np.sum(temp1)
    return sqe


center_glass  =[ [1.52434 , 12.03344 , 0.01215 , 1.12869 , 71.98256 , 0.19252 , 14.34306 , 0.23039 , 0.15156],
                 [1.51956 , 13.25068 , 0.45229 , 1.53305 , 73.01401 , 0.38472 , 11.15803 , 0.00433 , 0.06599],
                 [1.51362 , 13.15690 , 0.65548 , 3.13123 , 70.50411 , 5.33024 , 6.73773  , 0.67322 , 0.01490],
                 [1.52132 , 13.74692 , 3.51952 , 1.01524 , 71.89517 , 0.21094 , 9.44764  , 0.03588 , 0.04680],
                 [1.51933 , 13.08412 , 3.52765 , 1.36555 , 72.85826 , 0.57913 , 8.36271  , 0.00837 , 0.06182],
                 [1.51567 , 14.65825 , 0.06326 , 2.21016 , 73.25324 , 0.02744 , 8.68548  , 1.02698 , 0.00382] ]

center_iris   =[ [5.01213 , 3.40309 , 1.47163 , 0.23540],
                 [5.93432 , 2.79781 , 4.41787 , 1.41727],
                 [6.73334 , 3.06785 , 5.63008 , 2.10679] ]

center_wine   =[ [13.81262 , 1.83004 , 2.42432 , 17.01717 , 105.41208 , 2.93966 , 3.21965 , 0.34183 , 1.87181 , 5.75329 , 1.05368 , 
                  2.89757  , 1136.97230], 
                 [12.74160 , 2.51921 , 2.41113 , 19.57418 , 98.98807  , 1.97496 , 1.26308 , 0.37480 , 1.46902 , 5.73752 , 1.00197 ,
                  2.38197  , 687.01356 ],
                 [12.50086 , 2.48843 , 2.43785 , 21.43603 , 92.55049  , 2.02977 , 1.54943 , 0.32085 , 1.38624 , 4.38814 , 0.94045 ,
                  2.43190  , 463.86513 ] ]

center_cmc    =[ [43.64742 , 2.99091 , 3.44673 , 4.59136 , 0.80254 , 0.76971 , 1.82586 , 3.42522 , 0.10127 , 1.67635],
                 [24.41296 , 3.03823 , 3.51059 , 1.79036 , 0.92502 , 0.78935 , 2.29463 , 2.97378 , 0.03692 , 2.00149],
                 [33.50648 , 3.13272 , 3.55176 , 3.65914 , 0.79533 , 0.69725 , 2.10130 , 3.28562 , 0.06151 , 2.11479] ]

center_vowel  =[ [506.98650 , 1839.66652 , 2556.20000],
                 [623.71854 , 1309.59677 , 2333.45721],
                 [407.89515 , 1018.05210 , 2317.82688], 
                 [439.24323 , 987.68488  , 2665.47618], 
                 [357.26154 , 2291.44000 , 2977.39697],
                 [375.45357 , 2149.40364 , 2678.44208] ]

center_cancer =[ [7.11701 , 6.64106 , 6.62548 ,5.61469, 5.24061 , 8.10094 , 6.07818 , 6.02147 , 2.32582],
                 [2.88942 , 1.12774 , 1.20072 ,1.16404, 1.99334 , 1.12116 , 2.00537 , 1.10133 , 1.03162] ] 


label = []
for i in x_iris:
    test = []
    for j in center_iris:
        test.append(just_test(i,j))
        print(test)
    label.append(np.argmin(test))
print(label)   
print(sum_of_squares(x_iris,center_iris,label))



[0.02349670189999992]
[0.02349670189999992, 11.778446248299998]
[0.02349670189999992, 11.778446248299998, 24.38397808860001]
[0.18143870190000008]
[0.18143870190000008, 11.699984248299998]
[0.18143870190000008, 11.699984248299998, 24.895164088600012]
[0.16938070189999987]
[0.16938070189999987, 12.8881622483]
[0.16938070189999987, 12.8881622483, 26.537376088600006]
[0.26377270190000029]
[0.26377270190000029, 11.867440248299999]
[0.26377270190000029, 11.867440248299999, 25.24558208860001]
[0.045304701899999973]
[0.045304701899999973, 12.105748248299998]
[0.045304701899999973, 12.105748248299998, 24.817076088600011]
[0.47660870190000004]
[0.47660870190000004, 9.9219762482999965]
[0.47660870190000004, 9.9219762482999965, 20.828930088600004]
[0.17916470190000031]
[0.17916470190000031, 12.498874248299998]
[0.17916470190000031, 12.498874248299998, 25.819530088600015]
[0.0022147019000000002]
[0.0022147019000000002, 11.231298248299998]
[0.0022147019000000002, 11.231298248299998, 23.808200088600

## Mutation algorithm 
this function do mutation algorithm on candidates interval's centers

In [26]:
'''def mutation(candidate_array,mutation_random):
    New_candidate = copy.deepcopy(candidate_array)
    Mutant_candidate = copy.deepcopy(candidate_array)
    Trial_candidate = copy.deepcopy(candidate_array)
    for x in range(len(candidate_array)):
        a = np.full(len(candidate_array), 1/(len(candidate_array)-1))
        a[x] = 0
        temp = np.random.choice(len(candidate_array), 3, replace = False, p=a)
        for i in range(len(candidate_array[0].centers)):
            for j in range(len(candidate_array[0].centers[i])):
                Mutant_candidate[x].centers[i][j] = candidate_array[temp[0]].centers[i][j]+ random.random()*(candidate_array[temp[1]].centers[i][j] - candidate_array[temp[2]].centers[i][j])
                if random.random() < mutation_random:
                    Trial_candidate[x].centers[i][j] = Mutant_candidate[x].centers[i][j] #Trial & Mutant must be copy of same candidate
                    
                    
        Trial_candidate[x].fitness = sum_of_squares(Trial_candidate[x].nodes,Trial_candidate[x].centers,Trial_candidate[x].labels)
        
        if Trial_candidate[x].fitness < sum_of_squares(candidate_array[x].nodes,candidates_array[2].centers,candidates_array[2].labels):
            New_candidate[x] = Trial_candidate[x]
            print("sakjfnr")
        else:
            New_candidate[x] = candidate_array[x]
        #print(Trial_candidate[x].centers)
        #print(candidate_array[x].centers)
        #print(New_candidate[x].centers)
        #print(Mutant_candidate[x].centers)
        print("###########################################################################################################")
        Trial_candidate[x].each_fitness = each_fitness(Trial_candidate[x])
        print(Trial_candidate[x].each_fitness)
        candidate_array[x].each_fitness = each_fitness(candidate_array[x])
        print(candidate_array[x].each_fitness)
        print("###########################################################################################################")
        
    return New_candidate
#print(candidates_array[1].fitness)
#for i in range(0,3500):
New_candidates_array = mutation(candidates_array,mutation_random)
    #if New_candidates_array[1].fitness != candidates_array[1].fitness:
     #   print(New_candidates_array[1].fitness)#never happen'''

'def mutation(candidate_array,mutation_random):\n    New_candidate = copy.deepcopy(candidate_array)\n    Mutant_candidate = copy.deepcopy(candidate_array)\n    Trial_candidate = copy.deepcopy(candidate_array)\n    for x in range(len(candidate_array)):\n        a = np.full(len(candidate_array), 1/(len(candidate_array)-1))\n        a[x] = 0\n        temp = np.random.choice(len(candidate_array), 3, replace = False, p=a)\n        for i in range(len(candidate_array[0].centers)):\n            for j in range(len(candidate_array[0].centers[i])):\n                Mutant_candidate[x].centers[i][j] = candidate_array[temp[0]].centers[i][j]+ random.random()*(candidate_array[temp[1]].centers[i][j] - candidate_array[temp[2]].centers[i][j])\n                if random.random() < mutation_random:\n                    Trial_candidate[x].centers[i][j] = Mutant_candidate[x].centers[i][j] #Trial & Mutant must be copy of same candidate\n                    \n                    \n        Trial_candidate[x].f

In [27]:
def mutation(candidate_array,mutation_random):
    New_candidate = copy.deepcopy(candidate_array)
    Trial_candidate = copy.deepcopy(candidate_array)
    for x in range(len(candidate_array)):
        a = np.full(len(candidate_array), 1/(len(candidate_array)-1))
        a[x] = 0
        temp = np.random.choice(len(candidate_array), 3, replace = False, p=a)
        Centers = []
        for i in range(len(candidate_array[0].centers)):
            Second_layer = []
            for j in range(len(candidate_array[0].centers[i])):
                temporary_Center = candidate_array[temp[0]].centers[i][j]+ random.random()*(candidate_array[temp[1]].centers[i][j] - candidate_array[temp[2]].centers[i][j])
                if random.random() < mutation_random:
                    Second_layer.append(temporary_Center) #Trial & Mutant must be copy of same candidate
                else:
                    Second_layer.append(candidate_array[x].centers[i][j])
            Centers.append(Second_layer)
            
        
        y_predict_temp,centers_temp,fitness_temp = doKmeans(candidate_array[x].nodes,clusters_count = number_of_clusters, init_centers = np.asarray(Centers))
        if fitness_temp < candidates_array[x].fitness:
            New_candidate[x].labels  =  y_predict_temp
            New_candidate[x].fitness = fitness_temp
            New_candidate[x].centers = centers_temp
        New_candidate[x].each_fitness = each_fitness(New_candidate[c])
        print(New_candidate[x].fitness)
        print(candidate_array[x].fitness)
        print("#############################################################################################")
    return New_candidate

New_candidates_array = mutation(candidates_array,mutation_random)

78.8514414261
78.855665826
#############################################################################################
78.8514414261
78.8514414261
#############################################################################################
78.8514414261
78.8514414261
#############################################################################################
78.855665826
78.855665826
#############################################################################################
78.8514414261
145.452691765
#############################################################################################


  return_n_iter=True)
  return_n_iter=True)
  return_n_iter=True)
  return_n_iter=True)
  return_n_iter=True)


# Probability Equation

In [28]:
def probability(candidates):
    for c in range(0,len(candidates)):
        temp_array = []
        for clusters in range(0,len(candidates[c].centers)):
            temp_array.append((1/np.sum(candidates[c].each_fitness[clusters]))/(np.sum(1/(np.sum(x.each_fitness[clusters])) for x in candidates)))
        print(temp_array)    
        candidates[c].probability = temp_array
    return
probability(New_candidates_array)


[0.24293907036681392, 0.14067049348418179, 0.19061548066685854]
[0.24293907036681392, 0.14067049348418179, 0.19061548066685854]
[0.24293907036681392, 0.14067049348418179, 0.19061548066685854]
[0.24293907036681392, 0.14067049348418179, 0.19061548066685854]
[0.028243718532744308, 0.43731802606327291, 0.23753807733256593]


### Roulette Wheel Selection
This is a logic for selecting targets with higher probability.<br>
In this article, each candidate want to follow the other candidate with higher fitness. So in the previous section <strong><em>Probability Equation<strong><em>, we calculated all probabilities of candidates based on their fitnesses.

In [29]:
def roulette_wheel_selection(inertia_array):
    maximum = np.sum(inertia_array)
    pick = random.uniform(0, maximum)
    current = 0
    for fitness in inertia_array:
        current += fitness
        if current > pick:
            return fitness

# Shrink sampling interval
**here we shrink sampeling interval using reduction factor ro creat neighborhood of each center**

In [30]:
def shrinked_sampling_interval(candidates_array):
    for i in range(0,len(candidates_array)):
        shrinked_sampling_interval = []
        for j in range(0,len(candidates_array[i].centers)):
            Second_layer = []
            for k in range(0,len(candidates_array[i].centers[j])):
                Third_layer = []
                Temp = abs(candidates_array[i].sampling_interval[0][k] - candidates_array[i].sampling_interval[1][k]) * sampling_interval_reduction_factor
                Third_layer.append(candidates_array[i].centers[j][k] - (Temp/2))
                Third_layer.append(candidates_array[i].centers[j][k] + (Temp/2))
                Second_layer.append(Third_layer)
            shrinked_sampling_interval.append(Second_layer)
        candidates_array[i].sampling_interval = shrinked_sampling_interval
    return
shrinked_sampling_interval(New_candidates_array)
print(New_candidates_array[0].sampling_interval)

[[[3.2960000000000003, 6.7160000000000002], [2.2879999999999998, 4.5679999999999996], [-1.3405, 4.2645], [-0.89399999999999935, 1.3860000000000006]], [[5.1399999999999997, 8.5600000000000005], [1.9336842105263155, 4.2136842105263153], [2.9396052631578948, 8.5446052631578944], [0.93105263157894735, 3.2110526315789469]], [[4.1916129032258063, 7.6116129032258062], [1.6083870967741936, 3.8883870967741938], [1.591048387096774, 7.1960483870967744], [0.29387096774193555, 2.5738709677419354]]]


# variations
**Choose t diffrent random center of each cluster's center in shrinked sampling interval neghborhood**

In [31]:
def variation(candidates_array):
    for candidate in candidates_array:
        variations = []
        for i in range(0,variations_count):
            Second_layer = []
            for j in candidate.sampling_interval:
                Third_layer = []
                for k in range(0,len(j)):
                    #print(k)
                    Third_layer.append(random.uniform(j[k][0],j[k][1]))
                Second_layer.append(Third_layer)
            variations.append(Second_layer)
        candidate.variation = variations    
variation(New_candidates_array)
print(New_candidates_array[3].variation[1])

[[8.5598534513859974, 2.4141354096095444, 3.3191499241282232, 2.7656968535132966], [5.2166396707559812, 3.8980452013549627, 3.9083019343540273, 0.93198913160181196], [5.2563342112091611, 2.216302500427449, 2.1646235086498109, 0.50007144637086032]]
