# K-MCI
### Intro
In this notebook we have the main algorithm of the article. All necessary functions are implemented in function.py file.<br>
This document have blow sections:<br>
1. Importing Libraries & classes
2. Reading datasets
3. Preprocessing datasets
4. Main Algorithm

### Importing Libraries & classes
We import some libraries from third party.

In [1]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import random
from copy import deepcopy
from sklearn import preprocessing

In [2]:
# defnition of Candidate class
class Candidate:    
    def __init__(self,nodes,sampling_interval,fitness=-1):
        self.nodes = nodes
        self.sampling_interval = sampling_interval 
        self.variation = []
        self.fitness = fitness
        self.clusters = None
        self.centers = []
        self.probability = []
        self.labels = []
        self.each_fitness = []
        
    def describe(self):
        print("Candidate with #{} nodes, #{} centers, fitness={}\n clusters:\n{} \n Centers:\n{}\n sampling_interval:\n{}"
              .format(len(self.nodes),len(self.centers),self.fitness,self.nodes[np.random.randint(self.nodes.shape[0], size=5), :],self.centers,self.sampling_interval))

### Reading Datasets and Preprocessing
We show the structure of our data for all of datasets.<br>
We have these datasets from UCI Machine Learning Repository:
1. Breast Cancer Wisconsin
2. Contraceptive Method Choice data
3. Glass
4. Iris
5. Vowel
6. Wine

In [3]:
""" In this cell we have some code which will do preprocessing for us on all datasets """
""" the resourses of these datasets are mentioned in README.md file. """

path = 'datasets/' 
# because we are using local files, you need to download datasets and change the "path" variable
# local folder of your downloaded datasets


def bcw(): 
    # importing dataset
    dataset = pd.read_csv(path+'bcw.csv',
                          names = ['Sample code number','Clump Thickness','Uniformity of Cell Size',
                                   'Uniformity of Cell Shape','Marginal Adhesion','Single Epithelial',
                                   'Bare Nuclei','Bland Chromatin','Normal Nucleoli','Mitoses','Class'])
    dataset = dataset.drop(dataset.columns[0], axis=1)
    dataset= dataset.replace(to_replace='?',value=np.nan)
    dataset = dataset.dropna(axis =0) # resolving missing values
    dataset = dataset.astype('int64') # casting string valued column to int64
    x = dataset.iloc[:,:-1].values # features
    y = dataset.iloc[:,-1].values # target values
    return (x,y,dataset)


def cmc():
    # importing dataset
    dataset = pd.read_csv(path+'cmc.csv', names=["Wife's age","Wife's education","Husband's education",
                                                 "Number of children ever born","Wife's religion",
                                                 "Wife's now working?","Husband's occupation",
                                                 "Standard-of-living index","Media exposure","Contraceptive method used"])
    x = dataset.iloc[:,:-1].values # features
    y = dataset.iloc[:,-1].values # target values
    return (x,y,dataset)


def glass():
    # importing dataset
    dataset = pd.read_csv(path+'glass.csv', names= ['Id','refractive index','Sodium','Magnesium','Aluminum','Silicon','Potassium'
                                                    ,'Calcium','Barium','Iron','glass'])
    x = dataset.iloc[:,1:10].values # features
    y = dataset.iloc[:,-1].values # target values
    return (x,y,dataset)


def iris():
    # importing dataset
    dataset = pd.read_csv(path+'iris.csv', names= ['sepal length','sepal width','petal length','petal width','class'])
    x = dataset.iloc[:,:-1].values # features
    y = dataset.iloc[:,-1].values # target values
    
    # encoding categorial data types to labelEncoder
    from sklearn.preprocessing import LabelEncoder
    labelencoder_y = LabelEncoder()
    labelencoder_y = labelencoder_y.fit(y)
    y = labelencoder_y.transform(y)  # 0 for 'Iris-setosa', 1 for 'Iris-versicolor', 2 for 'Iris-virginica'
    return (x,y,dataset)


def vowel():
    # importing dataset
    dataset = pd.read_csv(path+'vowel.csv', names= ['vowel','type 1 frq','type 2 frq','type 3 frq'])
    x = dataset.iloc[:,1:].values # features
    y = dataset.iloc[:,0].values # target values
    return (x,y,dataset)

def wine():
    # importing dataset
    dataset = pd.read_csv(path+'wine.csv', names= ['class','Alcohol','Malic acid','Ash','Alcalinity of ash','Magnesium','Total phenols','Flavanoids','Nonflavanoid phenols','Proanthocyanins','Color intensity','Hue','OD280/OD315','Proline'])
    x = dataset.iloc[:,1:].values # features
    y = dataset.iloc[:,0].values # target values
    return (x,y,dataset)

In [4]:
# Getting data from our preprocessing class Datasets
x_bcw, y_bcw, df_bcw = bcw()
x_cmc, y_cmc, df_cmc = cmc()
x_glass, y_glass, df_glass = glass()
x_iris, y_iris, df_iris = iris()
x_vowel, y_vowel, df_vowel = vowel()
x_wine, y_wine, df_wine = wine()

In [5]:
# Showing dataset of Breast Cancer Wisconsin
df_bcw.head(3)

Unnamed: 0,Clump Thickness,Uniformity of Cell Size,Uniformity of Cell Shape,Marginal Adhesion,Single Epithelial,Bare Nuclei,Bland Chromatin,Normal Nucleoli,Mitoses,Class
0,5,1,1,1,2,1,3,1,1,2
1,5,4,4,5,7,10,3,2,1,2
2,3,1,1,1,2,2,3,1,1,2


In [6]:
# Showing dataset of Contraceptive Method Choice data
df_cmc.head(3)

Unnamed: 0,Wife's age,Wife's education,Husband's education,Number of children ever born,Wife's religion,Wife's now working?,Husband's occupation,Standard-of-living index,Media exposure,Contraceptive method used
0,24,2,3,3,1,1,2,3,0,1
1,45,1,3,10,1,1,3,4,0,1
2,43,2,3,7,1,1,3,4,0,1


In [7]:
# Showing dataset of Glass
df_glass.head(3)

Unnamed: 0,Id,refractive index,Sodium,Magnesium,Aluminum,Silicon,Potassium,Calcium,Barium,Iron,glass
0,1,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,1
1,2,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,1
2,3,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,1


In [8]:
# Showing dataset of Iris
df_iris.head(3)

Unnamed: 0,sepal length,sepal width,petal length,petal width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa


In [9]:
# Showing dataset of Vowel
df_vowel.head(3)

Unnamed: 0,vowel,type 1 frq,type 2 frq,type 3 frq
0,1,700,1500,2600
1,1,550,1550,2400
2,1,700,1500,2600


In [10]:
# Showing dataset of Wine
df_wine.head(3)

Unnamed: 0,class,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,OD280/OD315,Proline
0,1,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,1,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185


# Main Algorithm 

The implementation of main algorithm have below sections:
1. <Strong>Initializing Parameters and Candidates</Strong>
    1. Sampling Interval
    2. Apply Random Centers to Candidates
    3. Visualizing Candidates
2. <Strong>Running Kmean to Exploit</Strong>
    1. K-means Algorithm
    2. Visualizing Candidates
3. <Strong>Mutation Algorithm</Strong>
    1. Fitness over Mutated Candidate
    2. Probablility Equation
    3. Mutation Logic

**Extra details will be explain in other next sections**


## Initializing Parameters and Candidates
In this step, we initialize our algorithm. There are some parameters which are important to convergance speed and quality of solution. So we initialize them here based on the paper.<br>
Each time you want to run K-MCI algorithm, you should first do this initialization.<br>

In [27]:
# notice: this values related to IRIS dataset
number_of_candidates = 5
sampling_interval_reduction_factor = 0.95
convergence_parameter = None # what is this in paper???
mutation_random = 0.7
iterations_count = 3500
variations_count = 15
number_of_clusters = 3 # in this article number of clusters are predefined
m,n = x_wine.shape

In [28]:
# initializing candidates
candidates_array = [] # the array of all candidates
def initialize_candidates(candidates_array,number_of_candidates,dataset):
    for i in range(0 ,number_of_candidates):
        candidates_array.append(Candidate(dataset,sampling_interval=None))
    return candidates_array

candidates_array = initialize_candidates(candidates_array,number_of_candidates,x_wine)

## Apply Random Centers to Candidates
In this step, we just select some random nodes as centers with psuedorandom functions in python native library.

In [29]:
for c in range(0,number_of_candidates):
    candidates_array[c].centers =  np.asarray(random.sample(list(candidates_array[c].nodes),number_of_clusters))
    print(candidates_array[c].centers)

[[  1.22500000e+01   1.73000000e+00   2.12000000e+00   1.90000000e+01
    8.00000000e+01   1.65000000e+00   2.03000000e+00   3.70000000e-01
    1.63000000e+00   3.40000000e+00   1.00000000e+00   3.17000000e+00
    5.10000000e+02]
 [  1.23700000e+01   1.13000000e+00   2.16000000e+00   1.90000000e+01
    8.70000000e+01   3.50000000e+00   3.10000000e+00   1.90000000e-01
    1.87000000e+00   4.45000000e+00   1.22000000e+00   2.87000000e+00
    4.20000000e+02]
 [  1.30500000e+01   1.65000000e+00   2.55000000e+00   1.80000000e+01
    9.80000000e+01   2.45000000e+00   2.43000000e+00   2.90000000e-01
    1.44000000e+00   4.25000000e+00   1.12000000e+00   2.51000000e+00
    1.10500000e+03]]
[[  1.37800000e+01   2.76000000e+00   2.30000000e+00   2.20000000e+01
    9.00000000e+01   1.35000000e+00   6.80000000e-01   4.10000000e-01
    1.03000000e+00   9.58000000e+00   7.00000000e-01   1.68000000e+00
    6.15000000e+02]
 [  1.36200000e+01   4.95000000e+00   2.35000000e+00   2.00000000e+01
    9.200

## K-means Algorithm 
First we define our fitness function, then apply kmeans and measure our error using introduced fitness function.

In [30]:
# fitness function
def sum_of_squares(data, centroids, labels):
    sqe = 0
    for l in np.unique(labels):
        data_l = data[labels == l]
        resid = data_l - centroids[l]
        sqe += np.sum(np.sqrt(np.sum(resid**2,axis=1)))
    return sqe

In [31]:
def kmeans(x,clusters_count,init_centers):
    kmeans = KMeans(n_clusters = clusters_count, init = 'k-means++',n_init = 50)
    kmeans = kmeans.fit(x)
    labels = kmeans.labels_
    return (labels,kmeans.cluster_centers_,sum_of_squares(x,kmeans.cluster_centers_,labels))

## Running Kmeans to Exploit
Here by running Kmeans on all of our candidates, they centers initialized and nodes assigns to clusters based on Kmean++ algorithm.<br>
The objective function of Kmean is also the object function of K-MCI. <br>
So the fitness of any candidates is calculated using mean square error of nodes to their centers distances.

In [32]:
for c in range(0,number_of_candidates):
    candidates_array[c].labels,candidates_array[c].centers,candidates_array[c].fitness = kmeans(candidates_array[c].nodes
                                                                                                ,clusters_count = number_of_clusters,
                                                                                                init_centers = candidates_array[c].centers)

In [33]:
candidates_array[random.randint(0,number_of_candidates-1)].describe()

Candidate with #178 nodes, #3 centers, fitness=16555.679416032723
 clusters:
[[  1.23700000e+01   1.21000000e+00   2.56000000e+00   1.81000000e+01
    9.80000000e+01   2.42000000e+00   2.65000000e+00   3.70000000e-01
    2.08000000e+00   4.60000000e+00   1.19000000e+00   2.30000000e+00
    6.78000000e+02]
 [  1.37800000e+01   2.76000000e+00   2.30000000e+00   2.20000000e+01
    9.00000000e+01   1.35000000e+00   6.80000000e-01   4.10000000e-01
    1.03000000e+00   9.58000000e+00   7.00000000e-01   1.68000000e+00
    6.15000000e+02]
 [  1.23300000e+01   9.90000000e-01   1.95000000e+00   1.48000000e+01
    1.36000000e+02   1.90000000e+00   1.85000000e+00   3.50000000e-01
    2.76000000e+00   3.40000000e+00   1.06000000e+00   2.31000000e+00
    7.50000000e+02]
 [  1.29900000e+01   1.67000000e+00   2.60000000e+00   3.00000000e+01
    1.39000000e+02   3.30000000e+00   2.89000000e+00   2.10000000e-01
    1.96000000e+00   3.35000000e+00   1.31000000e+00   3.50000000e+00
    9.85000000e+02]
 [ 

## Sampling Interval
In this step, **first** we should assign each node to the **specific center** then we find **min** and **max** within a specific axis correspond to the features.<br>
So we have to first do `kmeans`, then we can calculate `min` and `max` in within each cluster.

In [34]:
def sampling_intervals_generator(candidate,number_of_clusters):
    clusters_list = []
    maxes = []
    mins = []
    for i in range(0,number_of_clusters):
        clusters_list.append(candidate.nodes[candidate.labels == i])
        maxes.append(np.amax(clusters_list[i],axis=0))
        mins.append(np.amin(clusters_list[i],axis=0))
    candidate.clusters = clusters_list # update clusters of candidate
    return (np.array(maxes),np.array(mins))

Now we will implement a function over `sampling_intervals_generator` function. This function get sampling intervals arrays and assign each `(max,min)` tuples to corresponding element in `centers` of any `candidates_array`.<br>
We should do this for easily accessing intervals when we trying to shrink them.

In [35]:
def sampling_intervals(max_min_lists):
    maxes, mins = max_min_lists[0],max_min_lists[1]
    num_of_centers, num_of_features = maxes.shape[0], maxes.shape[1]
    
    max_min = np.zeros(shape=(num_of_centers,num_of_features,2)) # array of tuples
    
    for i in range(num_of_centers):
        for j in range(num_of_features):
         max_min[i][j] = (maxes[i][j],mins[i][j])     
    return np.array(max_min)

In [36]:
for c in range(0,number_of_candidates):
    candidates_array[c].sampling_interval = sampling_intervals(sampling_intervals_generator(candidates_array[c],number_of_clusters))

## Visualizing Candidates
In this step, by Kmeans, we assigned centers to each candidates and calculated the fitness of each candidate using fitness function

In [37]:
candidates_array[random.randint(0,number_of_candidates-1)].describe()

Candidate with #178 nodes, #3 centers, fitness=16555.679416032723
 clusters:
[[  1.38200000e+01   1.75000000e+00   2.42000000e+00   1.40000000e+01
    1.11000000e+02   3.88000000e+00   3.74000000e+00   3.20000000e-01
    1.87000000e+00   7.05000000e+00   1.01000000e+00   3.26000000e+00
    1.19000000e+03]
 [  1.27700000e+01   2.39000000e+00   2.28000000e+00   1.95000000e+01
    8.60000000e+01   1.39000000e+00   5.10000000e-01   4.80000000e-01
    6.40000000e-01   9.89999900e+00   5.70000000e-01   1.63000000e+00
    4.70000000e+02]
 [  1.27700000e+01   2.39000000e+00   2.28000000e+00   1.95000000e+01
    8.60000000e+01   1.39000000e+00   5.10000000e-01   4.80000000e-01
    6.40000000e-01   9.89999900e+00   5.70000000e-01   1.63000000e+00
    4.70000000e+02]
 [  1.37100000e+01   5.65000000e+00   2.45000000e+00   2.05000000e+01
    9.50000000e+01   1.68000000e+00   6.10000000e-01   5.20000000e-01
    1.06000000e+00   7.70000000e+00   6.40000000e-01   1.74000000e+00
    7.40000000e+02]
 [ 

In [38]:
print('max and min of first feature of first center (max,min)={}'
      .format(candidates_array[random.randint(0,number_of_candidates-1)].sampling_interval[0,0]))

max and min of first feature of first center (max,min)=[ 14.13  11.03]


## Fitness over Mutated Candidate

### Mean Square Distance to a Specific Center
This function is exactly based on "sum_of_squares" function. But here all centers not engaged because we want to find the sum of square distances of nodes of a cluster to their cluster center.<br>
The idea is when mutation process done, at least one dimension may get better center but the other goes bad. So we want to have that center which is better to the previous time.

In [39]:
def center_dist(data, center, labels, center_position, center_cluster):
    sqe = 0
    for l in range(0,len(labels)):
        if center_cluster == labels[l] :
            data_l = data[l][center_position]
            resid = data_l - center
            sqe += np.sum(np.sqrt(np.sum(resid**2)))
    return sqe

In [40]:
def each_fitness(candidates_array_example):
    fitness_of_each_centers = []
    for i in range(0,len(candidates_array_example.centers)):
        Second_layer = []
        for j in range(0,len(candidates_array_example.centers[i])):
            Second_layer.append(center_dist(candidates_array_example.nodes,candidates_array_example.centers[i][j],candidates_array_example.labels,j,i))
        fitness_of_each_centers.append(Second_layer)
    return np.array(fitness_of_each_centers)
print(each_fitness(candidates_array[0]))

[[  3.65066667e+01   7.37762319e+01   1.52985507e+01   1.68330435e+02
    5.76608696e+02   3.18336232e+01   4.50252174e+01   7.12985507e+00
    2.88194203e+01   1.18098260e+02   1.35904348e+01   4.03078261e+01
    4.63637681e+03]
 [  1.72055319e+01   1.43782979e+01   7.38382979e+00   9.14170213e+01
    4.19574468e+02   1.33161702e+01   1.63212766e+01   2.28468085e+00
    1.64595745e+01   4.79927660e+01   4.43829787e+00   1.43302128e+01
    6.62804255e+03]
 [  3.61509677e+01   6.12861290e+01   1.34922581e+01   1.30096774e+02
    7.61354839e+02   3.08812903e+01   5.30022581e+01   6.92322581e+00
    2.81970968e+01   1.38465806e+02   1.25260000e+01   4.24929032e+01
    4.95238710e+03]]


## Mutation Algorithm 
In this step, we apply mutation algorithm on every single candidates.

### Candidate Sampler
This function, return 3 random candidates. For mutation, we need 3 random candidates who are not same and they are different from the candidate we want to mutate.

In [42]:
def candidate_sample(number_of_candidates, candidates_array, mutation_target_candidate):
    bad_index = candidates_array.index(mutation_target_candidate)
    index_of_other_candidates = list(range(number_of_candidates))
    index_of_other_candidates.pop(bad_index)
    participants_indexes = random.sample(index_of_other_candidates,3)
    participants = []
    for i in range(len(participants_indexes)):
        participants.append(candidates_array[participants_indexes[i]])
    return np.array(participants)

In [43]:
candidate_sample(5,candidates_array, candidates_array[2])

array([<__main__.Candidate object at 0x0000022D3D1CB400>,
       <__main__.Candidate object at 0x0000022D3D1CBF28>,
       <__main__.Candidate object at 0x0000022D3D1CBE10>], dtype=object)

The `mutator` function, apply mutation algorithm on a specific `candidate` with list of `participants` who are different candidates.

In [44]:
def mutator(candidate, participant_candidates, number_of_candidates, gama): 
    mutant_candidate = deepcopy(candidate) # the candidate who will be mutated
    random_number = np.random.rand()
    mutant_candidate.centers = participant_candidates[0].centers.flatten() + random_number*(participant_candidates[1].centers.flatten() - participant_candidates[2].centers.flatten())
    
    print(candidate.centers)
    print('=================')
    print(mutant_candidate.centers)
    
    trial_candidate = deepcopy(candidate)
    trial_candidate.centers =  trial_candidate.centers.flatten()
    
    # select mixed features of original and mutant candidate based on the random probability
    for i in range(len(trial_candidate.centers)):
        if random_number < gama:
            trial_candidate.centers[i] = mutant_candidate.centers[i]
            
    mutant_candidate.centers.shape = candidate.centers.shape # reshape to actual structure (falttened for calculation)
    trial_candidate.centers.shape = candidate.centers.shape # reshape to actual structure (falttened for calculation)
    
    # select mutated candidate or original one based on the better fitness value
    new_candidate = None
    e_old = sum_of_squares(candidate.nodes, candidate.centers, candidate.labels)
    e_trial = sum_of_squares(trial_candidate.nodes, trial_candidate.centers, trial_candidate.labels)
    if e_old <= e_trial:
        new_candidate = candidate
        #print("true")
        #print(e_old,'---',e_trial)
    else:
        new_candidate = trial_candidate
    
        
    return mutant_candidate, trial_candidate, new_candidate

In [45]:
mut, tri , new = mutator(candidates_array[0], candidate_sample(5,candidates_array, candidates_array[2]),5, 0.7)

[[  1.25166667e+01   2.49420290e+00   2.28855072e+00   2.08231884e+01
    9.23478261e+01   2.07072464e+00   1.75840580e+00   3.90144928e-01
    1.45188406e+00   4.08695651e+00   9.41159420e-01   2.49072464e+00
    4.58231884e+02]
 [  1.38044681e+01   1.88340426e+00   2.42617021e+00   1.70234043e+01
    1.05510638e+02   2.86723404e+00   3.01425532e+00   2.85319149e-01
    1.91042553e+00   5.70255319e+00   1.07829787e+00   3.11404255e+00
    1.19514894e+03]
 [  1.29298387e+01   2.50403226e+00   2.40806452e+00   1.98903226e+01
    1.03596774e+02   2.11112903e+00   1.58403226e+00   3.88387097e-01
    1.50338710e+00   5.65032258e+00   8.83967742e-01   2.36548387e+00
    7.28338710e+02]]
[  1.25166667e+01   2.49420290e+00   2.28855072e+00   2.08231884e+01
   9.23478261e+01   2.07072464e+00   1.75840580e+00   3.90144928e-01
   1.45188406e+00   4.08695651e+00   9.41159420e-01   2.49072464e+00
   4.58231884e+02   1.38044681e+01   1.88340426e+00   2.42617021e+00
   1.70234043e+01   1.05510638e+0

In [41]:
for c in candidates_array:
    print(c.fitness)
    print("=============")

16555.679416
16555.679416
16555.679416
16555.679416
16555.679416


In [None]:
'''def mutation(candidate_array,mutation_random):
    New_candidate = copy.deepcopy(candidate_array)
    Mutant_candidate = copy.deepcopy(candidate_array)
    Trial_candidate = copy.deepcopy(candidate_array)
    for x in range(len(candidate_array)):
        a = np.full(len(candidate_array), 1/(len(candidate_array)-1))
        a[x] = 0
        temp = np.random.choice(len(candidate_array), 3, replace = False, p=a)
        for i in range(len(candidate_array[0].centers)):
            for j in range(len(candidate_array[0].centers[i])):
                Mutant_candidate[x].centers[i][j] = candidate_array[temp[0]].centers[i][j]+ random.random()*(candidate_array[temp[1]].centers[i][j] - candidate_array[temp[2]].centers[i][j])
                if random.random() < mutation_random:
                    Trial_candidate[x].centers[i][j] = Mutant_candidate[x].centers[i][j] #Trial & Mutant must be copy of same candidate
                    
                    
        Trial_candidate[x].fitness = sum_of_squares(Trial_candidate[x].nodes,Trial_candidate[x].centers,Trial_candidate[x].labels)
        
        if Trial_candidate[x].fitness < sum_of_squares(candidate_array[x].nodes,candidates_array[2].centers,candidates_array[2].labels):
            New_candidate[x] = Trial_candidate[x]
            print("sakjfnr")
        else:
            New_candidate[x] = candidate_array[x]
        #print(Trial_candidate[x].centers)
        #print(candidate_array[x].centers)
        #print(New_candidate[x].centers)
        #print(Mutant_candidate[x].centers)
        print("###########################################################################################################")
        Trial_candidate[x].each_fitness = each_fitness(Trial_candidate[x])
        print(Trial_candidate[x].each_fitness)
        candidate_array[x].each_fitness = each_fitness(candidate_array[x])
        print(candidate_array[x].each_fitness)
        print("###########################################################################################################")
        
    return New_candidate
#print(candidates_array[1].fitness)
#for i in range(0,3500):
New_candidates_array = mutation(candidates_array,mutation_random)
    #if New_candidates_array[1].fitness != candidates_array[1].fitness:
     #   print(New_candidates_array[1].fitness)#never happen'''

In [None]:
def mutation(candidate_array,mutation_random):
    New_candidate = copy.deepcopy(candidate_array)
    Trial_candidate = copy.deepcopy(candidate_array)
    for x in range(len(candidate_array)):
        a = np.full(len(candidate_array), 1/(len(candidate_array)-1))
        a[x] = 0
        temp = np.random.choice(len(candidate_array), 3, replace = False, p=a)
        Centers = []
        for i in range(len(candidate_array[0].centers)):
            Second_layer = []
            for j in range(len(candidate_array[0].centers[i])):
                temporary_Center = candidate_array[temp[0]].centers[i][j]+ random.random()*(candidate_array[temp[1]].centers[i][j] - candidate_array[temp[2]].centers[i][j])
                if random.random() < mutation_random:
                    Second_layer.append(temporary_Center) #Trial & Mutant must be copy of same candidate
                else:
                    Second_layer.append(candidate_array[x].centers[i][j])
            Centers.append(Second_layer)
            
        
        y_predict_temp,centers_temp,fitness_temp = doKmeans(candidate_array[x].nodes,clusters_count = number_of_clusters, init_centers = np.asarray(Centers))
        if fitness_temp < candidates_array[x].fitness:
            New_candidate[x].labels  =  y_predict_temp
            New_candidate[x].fitness = fitness_temp
            New_candidate[x].centers = centers_temp
        New_candidate[x].each_fitness = each_fitness(New_candidate[c])
        print(New_candidate[x].fitness)
        print(candidate_array[x].fitness)
        print("#############################################################################################")
    return New_candidate

New_candidates_array = mutation(candidates_array,mutation_random)

# Probability Equation

In [None]:
def probability(candidates):
    for c in range(0,len(candidates)):
        temp_array = []
        for clusters in range(0,len(candidates[c].centers)):
            temp_array.append((1/np.sum(candidates[c].each_fitness[clusters]))/(np.sum(1/(np.sum(x.each_fitness[clusters])) for x in candidates)))
        print(temp_array)    
        candidates[c].probability = temp_array
    return
probability(New_candidates_array)


### Roulette Wheel Selection
This is a logic for selecting targets with higher probability.<br>
In this article, each candidate want to follow the other candidate with higher fitness. So in the previous section <strong><em>Probability Equation<strong><em>, we calculated all probabilities of candidates based on their fitnesses.

In [None]:
def roulette_wheel_selection(inertia_array):
    maximum = np.sum(inertia_array)
    pick = random.uniform(0, maximum)
    current = 0
    for fitness in inertia_array:
        current += fitness
        if current > pick:
            return fitness

# Shrink sampling interval
**here we shrink sampeling interval using reduction factor ro creat neighborhood of each center**

In [None]:
def shrinked_sampling_interval(candidates_array):
    for i in range(0,len(candidates_array)):
        shrinked_sampling_interval = []
        for j in range(0,len(candidates_array[i].centers)):
            Second_layer = []
            for k in range(0,len(candidates_array[i].centers[j])):
                Third_layer = []
                Temp = abs(candidates_array[i].sampling_interval[0][k] - candidates_array[i].sampling_interval[1][k]) * sampling_interval_reduction_factor
                Third_layer.append(candidates_array[i].centers[j][k] - (Temp/2))
                Third_layer.append(candidates_array[i].centers[j][k] + (Temp/2))
                Second_layer.append(Third_layer)
            shrinked_sampling_interval.append(Second_layer)
        candidates_array[i].sampling_interval = shrinked_sampling_interval
    return
shrinked_sampling_interval(New_candidates_array)
print(New_candidates_array[0].sampling_interval)

# variations
**Choose t diffrent random center of each cluster's center in shrinked sampling interval neghborhood**

In [None]:
def variation(candidates_array):
    for candidate in candidates_array:
        variations = []
        for i in range(0,variations_count):
            Second_layer = []
            for j in candidate.sampling_interval:
                Third_layer = []
                for k in range(0,len(j)):
                    #print(k)
                    Third_layer.append(random.uniform(j[k][0],j[k][1]))
                Second_layer.append(Third_layer)
            variations.append(Second_layer)
        candidate.variation = variations    
variation(New_candidates_array)
print(New_candidates_array[3].variation[1])