In [1]:
import pandas as pd
import numpy as np
import random as rd
from sklearn import svm
from sklearn import preprocessing
from sklearn.model_selection import KFold

#Objective Value Function -->

In [2]:
def objective_value(x,y,chromosome,kfold=3):     #by default kfold =3  
  # chromosome x
  lb_x,ub_x = 10,1000          #Lower and upper bounds for chromosome x
  len_x = (len(chromosome)//2) # Length of chromosome x(12 genes)

  # chromosome y
  lb_y,ub_y = 0.05,0.99          #Lower and upper bounds for chromosome y
  len_y = (len(chromosome)//2) # Length of chromosome y(12 genes)

  #precision
  precision_x = (ub_x-lb_x)/((2**len_x)-1) # precision for decoding x
  precision_y = (ub_y-lb_y)/((2**len_y)-1) # precision for decoding y

  #print ("## for variable x ###")
  z=0
  t= 1
  x_bit_sum =0 # initiation (sum(bit*(2^z)) is:0 at first)
  for i in range(len(chromosome)//2):
    x_bit = chromosome[-t]*(2**z)
    x_bit_sum += x_bit
    t=t+1
    z=z+1

  #print ("## for variable y ###")
  z=0
  t= 1 + (len(chromosome)//2)
  y_bit_sum =0
  for i in range(len(chromosome)//2):
    y_bit = chromosome[-t]*(2**z)
    y_bit_sum += y_bit
    t=t+1
    z=z+1


  c_hyperparameter = (x_bit_sum*precision_x)+lb_x
  gamma_hyperparameter = (y_bit_sum*precision_y)+lb_y

  kf = KFold(n_splits=kfold)

  sum_of_error = 0
  for train_index,test_index in kf.split(x):

    x_train,x_test = x[train_index],x[test_index]
    y_train,y_test = y[train_index],y[test_index]

    model = svm.SVR(kernel="rbf",C =c_hyperparameter,gamma=gamma_hyperparameter)
    model.fit(x_train,np.ravel(y_train))  

    accuracy = model.score(x_test,y_test)  #accuracy of the svm
    error = 1-(accuracy)

    sum_of_error += error    # error for all k folds

  avg_error = sum_of_error/kfold # the svm's average error

  #the defined function will return 3 values
  return c_hyperparameter,gamma_hyperparameter,avg_error

#Selecting Parents Function -->

In [3]:
#using tournament selection method
def find_parents_ts(all_solutions,x,y):

  #make an empty array to place the selected parents
  parents = np.empty((0,np.size(all_solutions,1)))

  for i in range(2):  #to get 2 parents repeat process 2 times

    #selecting 3 random parents from the pool of solutions and storing in indices_list
    indices_list = np.random.choice(len(all_solutions),3,replace=False)

    #get 3 parents
    posb_parent_1 = all_solutions[indices_list[0]]
    posb_parent_2 = all_solutions[indices_list[1]]
    posb_parent_3 = all_solutions[indices_list[2]]


    #get objective function value for each of the 3 parents
    #index 2 becoz second index gives fitness value(error) in out function
    obj_func_parent_1 = objective_value(x=x,y=y,chromosome=posb_parent_1)[2]
    obj_func_parent_2 = objective_value(x=x,y=y,chromosome=posb_parent_2)[2]
    obj_func_parent_3 = objective_value(x=x,y=y,chromosome=posb_parent_3)[2]

    #find parent which is best (least error):
    min_obj_func = min(obj_func_parent_1,obj_func_parent_2,obj_func_parent_3)
    if min_obj_func == obj_func_parent_1:
      selected_parent = posb_parent_1
    elif min_obj_func == obj_func_parent_2:
      selected_parent = posb_parent_2
    else:
      selected_parent = posb_parent_3

    #put selected parent in the empty array we craeted above.
    parents = np.vstack( (parents, selected_parent))

 

  parent_1 = parents[0,:]   #first parent in parents array 
  parent_2 = parents[1,:]   #second parent in parents array 
  
  return parent_1,parent_2  #defined function will return 2 parents

#CrossOver Function -->

In [4]:
def crossover(parent_1,parent_2,prob_crsvr=1):
  child_1 = np.empty((0, len(parent_1)))    #empty array of child 1 of 12 columns
  child_2 = np.empty((0, len(parent_2)))   #empty array of child 2 of 12 columns

  #randomly generating number to do crossover
  rand_num_to_crsvr_or_not = np.random.rand()   #do crossover or not?

  if rand_num_to_crsvr_or_not < prob_crsvr:
    #generating two indices to perform crossover
    index_1 = np.random.randint(0, len(parent_1))
    index_2 = np.random.randint(0, len(parent_1))

    #get different indices
    while index_1 == index_2:
      index_2 = np.random.randint(0,len(parent_1))   #if generated index is same, then we generate another random index

    index_parent_1 = min(index_1,index_2)   #minimum index
    index_parent_2 = max(index_1,index_2)   #maximum index

    #For crossover ,we create 3 segments: 1. Contains elements fron start to minimum index in parent1
    #                                     2. Contains elements from minimum index to maximum index in parent2.
    #                                     3. Contains elements from maximum index till end in parent1.       

    #Creating segments -->
    #for Parent 1:
    first_seg_parent_1 = parent_1[:index_parent_1]
    mid_seg_parent_1 = parent_1[ index_parent_1:index_parent_2+1]
    last_seg_parent_1 = parent_1[index_parent_2+1:]

    #for Parent 2:
    first_seg_parent_2 = parent_2[:index_parent_1]
    mid_seg_parent_2 = parent_2[ index_parent_1:index_parent_2+1]
    last_seg_parent_2 = parent_2[index_parent_2+1:]

    #Joining segments -->
    #Creating child 1:
    child_1 = np.concatenate((first_seg_parent_1,mid_seg_parent_2,last_seg_parent_1))

    #Creating child 2:
    child_2 = np.concatenate((first_seg_parent_2,mid_seg_parent_1,last_seg_parent_2))

  else:
    #if probability of crossover is less, than perform no crossover ,i.e., child=parent
    child_1 = parent_1
    child_2 = parent_2

  return child_1,child_2

#Mutation Function -->

In [5]:
def mutation(child_1,child_2,prob_mutation=0.2):
  #For child_1 -->
  #mutated child1
  mutated_child_1 = np.empty((0,len(child_1)))   #size equal to length of child_1

  t=0 #starting index 1
  for i in child_1:    #for every gene

    #generating random number, to mutate or not
    rand_num_to_mutate_or_not = np.random.rand()   

    if rand_num_to_mutate_or_not < prob_mutation:
      if child_1[t] ==0:   #if we mutate  0-->1
        child_1[t] =1
      else:
        child_1[t] =0     #if we mutate  1-->0
      mutated_child_1 = child_1

      t=t+1 #incrementing index

    else:
      mutated_child_1 = child_1
      t=t+1


  #mutated child2
  mutated_child_2 = np.empty((0,len(child_2)))   #size equal to length of child_2
  t=0
  for i in child_2:    #for every gene

    #generating random number, to mutate or not
    rand_num_to_mutate_or_not = np.random.rand()   

    if rand_num_to_mutate_or_not < prob_mutation:
      if child_2[t] ==0:   #if we mutate  0-->1
        child_2[t] =1
      else:
        child_2[t] =0     #if we mutate  1-->0
      mutated_child_2 = child_2

      t=t+1 #incrementing index

    else:
      mutated_child_2 = child_2
      t=t+1

  return mutated_child_1,mutated_child_2

#SVM Optimization -->

In [6]:
data = pd.read_excel("/content/ENB2012_data.xlsv")
data - data.sample(frac=1) # shuffling the data(sample) and keep 100% of it(frac).


#Original data
x_org_data = pd.DataFrame(data, columns=["X1","X2","X3","X4","X5","X6","X7","X8"]) #features
y = pd.DataFrame(data, columns=["Y1"]).values   #output (target)

x_with_dummies = pd.get_dummies(x_org_data,columns=["X6","X8"])
var_prep = preprocessing.MinMaxScaler() 
x = var_prep.fit_transform(x_with_dummies)

data_count = len(x)
print()
print ("Number of observations in the dataset:",data_count)

prob_crsvr = 1       #probability of crossover
prob_mutation = 0.3  #probability of mutation
population = 40      #population number
generations = 20      #generation number
kfold = 3

# X and Y decision variables. 12 genes of X and 12 genes of Y.
x_y_string = np.array([0,1,0,0,0,1,0,0,1,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0])

#empty pool for initial population
pool_of_solutions = np.empty((0,len(x_y_string)))

#empty pool for storing generations
best_of_a_generation = np.empty((0, len(x_y_string)+1))

#generating population n times(n=population):
for i in range(population):
  rd.shuffle(x_y_string) #shuffle the chromosome
  #numpy.vstack() function is used to stack the sequence of input arrays vertically to make a single array.
  pool_of_solutions = np.vstack((pool_of_solutions, x_y_string))

#Now,pool of solution has 20(population) chromosomes.

#Everything we have done till now was for single generation parents->crossover-->mutation
#we have repeat everything till we generations = generation number defined above.

gen=1 #we start with generation number 1.
for i in range(generations): # do it n (generation) times
    
    # an empty array for saving the new generations after each iteration
    # at the beginning of each generation, the array should be empty
    # so that you put all the solutions created in a each generation
    #since, in each iteration it is empty, the solution will be updated in pool_of_solution.
    #and will act as population for next generation.
    new_population = np.empty((0,len(x_y_string)))
    
    # an empty array for saving the new generation plus its objective function value(error in our case)
    new_population_with_obj_val = np.empty((0,len(x_y_string)+1))
    
    # an empty array for saving the best solution (chromosome)
    # for each generation
    sorted_best = np.empty((0,len(x_y_string)+1))
    

    print("--> Generation: #", gen) # tracking purposes
    
    
    family = 1 # we start at family no.1 (tracking purposes)
    
    
    for j in range(int(population/2)): 
      # population/2 because each gives 2 parents, so we can generate equal number of population
        
        print()
        print("--> Family: #", family) # tracking purposes
        
            
        # selecting 2 parents using tournament selection
        # "find_parents_ts"[0] gives parent_1
        # "find_parents_ts"[1] gives parent_2
        parent_1 = find_parents_ts(pool_of_solutions,x=x,y=y)[0]
        parent_2 = find_parents_ts(pool_of_solutions,x=x,y=y)[1]
        
        
        # crossover the 2 parents to get 2 children
        # "crossover"[0] gives child_1
        # "crossover"[1] gives child_2
        child_1 = crossover(parent_1,parent_2,prob_crsvr=prob_crsvr)[0]
        child_2 = crossover(parent_1,parent_2,prob_crsvr=prob_crsvr)[1]
        
        
        # mutating the 2 children to get 2 mutated children
        # "mutation"[0] gives mutated_child_1
        # "mutation"[1] gives mutated_child_2
        mutated_child_1 = mutation(child_1,child_2,prob_mutation=prob_mutation)[0]
        mutated_child_2 = mutation(child_1,child_2,prob_mutation=prob_mutation)[1]
        
        
        # getting the obj val (fitness value) for the 2 mutated children
        # "objective_value"[2] gives obj val for the mutated child
        obj_val_mutated_child_1 = objective_value(x=x,y=y,chromosome=mutated_child_1,kfold=kfold)[2]
        obj_val_mutated_child_2 = objective_value(x=x,y=y,chromosome=mutated_child_2,kfold=kfold)[2]
        

        # for each mutated child, put its obj val next to it
        # numpy.hstack() function is used to stack the sequence of input arrays horizontally (i.e. column wise) to make a single array.
        mutant_1_with_obj_val = np.hstack((obj_val_mutated_child_1,
                                               mutated_child_1)) # lines 132 and 140
        
        mutant_2_with_obj_val = np.hstack((obj_val_mutated_child_2,
                                               mutated_child_2)) # lines 134 and 143
        
        
        # we need to create the new population for the next generation
        # so for each family, we get 2 solutions
        # we keep on adding them till we are done with all the families in one generation
        # by the end of each generation, we should have the same number as the initial population
        # so this keeps on growing and growing
        # when it's a new generation, this array empties and we start the stacking process
        # and so on
        # new_population,we created in the start for storing mutated population 
        new_population = np.vstack((new_population,mutated_child_1,mutated_child_2))
        
        
        #  including the obj val for each solution
        new_population_with_obj_val = np.vstack((new_population_with_obj_val,mutant_1_with_obj_val,mutant_2_with_obj_val))
        
        
        # after getting 2 mutated children (solutions), we get another 2, and so on
        # until we have the same number of the intended population
        # then we go to the next generation and start over
        # since we ended up with 2 solutions, we move on to the next possible solutions,i.e., next family.
        family = family+1
    
    #loop to produce new population finished
    # we replace the initial (before) population with the new one (current generation)
    # this new pool of solutions becomes the starting population of the next generation
    pool_of_solutions = new_population
    
    
    #we want best solution from each generation
    # so we sorted them from best to worst
    # so we sort them based on index [0], which is the objective function value.
    #top element will be best solution because it will have least error.
    sorted_best = np.array(sorted(new_population_with_obj_val, key=lambda x:x[0]))
    
    
    # since we sorted them from best to worst
    # the best in that generation would be the first solution in the array
    # so index [0] of the "sorted_best" array
    #best_of_a_generation was created initially.
    #This array will contain best solution from each generation.
    best_of_a_generation = np.vstack((best_of_a_generation,sorted_best[0]))
    
    
    # increase the counter of generations (tracking purposes)
    gen = gen+1 
    #loop ends when all generations are generated.


# for our very last generation, we have the last population
# for this array of last population (convergence), there is a best solution
# so we sort them from best to worst
sorted_last_population = np.array(sorted(new_population_with_obj_val, key=lambda x:x[0]))
#same for generation
sorted_best_of_a_generation = np.array(sorted(best_of_a_generation,key=lambda x:x[0]))

# Replacing error with accuracy for all population in sorted_last_population which is the best(same for generation)
sorted_last_population[:,0] = 1-(sorted_last_population[:,0])
sorted_best_of_a_generation[:,0] = 1-(sorted_best_of_a_generation[:,0])

# since we sorted them from best to worst
# the best would be the first solution in the array
# so index [0] of the "sorted_last_population" array
#since sorted_last_population was sorted, so first entry will have minimum error and hence maximum accuracy.
best_string_convergence = sorted_last_population[0]

best_string_overall = sorted_best_of_a_generation[0]

print()
#print()
#print("Execution Time in Minutes:",(end_time - start_time)/60) # exec. time


print()
print()
print("------------------------------")
print()
#print("Execution Time in Seconds:",end_time - start_time) # exec. time
#print()
print("Best solution in last population --> ")
print("Final Solution (Convergence):",best_string_convergence[1:]) # final solution entire chromosome
print("Encoded C (Convergence):",best_string_convergence[1:14]) # final solution x chromosome
print("Encoded Gamma (Convergence):",best_string_convergence[14:]) # final solution y chromosome
print()
print("Best solution in last generation --> ")
print("Final Solution (Best):",best_string_overall[1:]) # final solution entire chromosome
print("Encoded C (Best):",best_string_overall[1:14]) # final solution x chromosome
print("Encoded Gamma (Best):",best_string_overall[14:]) # final solution y chromosome

# to decode the x and y chromosomes to their real values
final_solution_convergence = objective_value(x=x,y=y,chromosome=best_string_convergence[1:],kfold=kfold)

final_solution_overall = objective_value(x=x,y=y,chromosome=best_string_overall[1:],kfold=kfold)

# the "svm_hp_opt.objective_value" function returns 3 things -->
# [0] is the x value
# [1] is the y value
# [2] is the obj val for the chromosome (avg. error)
print()
print("Best Obj value in last population --> ")
print("Decoded C (Convergence):",round(final_solution_convergence[0],5)) # real value of x
print("Decoded Gamma (Convergence):",round(final_solution_convergence[1],5)) # real value of y
print("Obj Value(Accuracy) - Convergence:",round(1-(final_solution_convergence[2]),5)) # obj val of final chromosome
print()
print("Best solution in last generation --> ")
print("Decoded C (Best):",round(final_solution_overall[0],5)) # real value of x
print("Decoded Gamma (Best):",round(final_solution_overall[1],5)) # real value of y
print("Obj Value(Accuracy) - Best in Generations:",round(1-(final_solution_overall[2]),5)) # obj val of final chromosome
print()
print("------------------------------")






Number of observations in the dataset: 768
--> Generation: # 1

--> Family: # 1

--> Family: # 2

--> Family: # 3

--> Family: # 4

--> Family: # 5

--> Family: # 6

--> Family: # 7

--> Family: # 8

--> Family: # 9

--> Family: # 10

--> Family: # 11

--> Family: # 12

--> Family: # 13

--> Family: # 14

--> Family: # 15

--> Family: # 16

--> Family: # 17

--> Family: # 18

--> Family: # 19

--> Family: # 20
--> Generation: # 2

--> Family: # 1

--> Family: # 2

--> Family: # 3

--> Family: # 4

--> Family: # 5

--> Family: # 6

--> Family: # 7

--> Family: # 8

--> Family: # 9

--> Family: # 10

--> Family: # 11

--> Family: # 12

--> Family: # 13

--> Family: # 14

--> Family: # 15

--> Family: # 16

--> Family: # 17

--> Family: # 18

--> Family: # 19

--> Family: # 20
--> Generation: # 3

--> Family: # 1

--> Family: # 2

--> Family: # 3

--> Family: # 4

--> Family: # 5

--> Family: # 6

--> Family: # 7

--> Family: # 8

--> Family: # 9

--> Family: # 10

--> Family: # 11

--> 