In [1]:
import pandas as pd
import numpy as np
import random as rd
from sklearn import svm
from sklearn import preprocessing
from sklearn.model_selection import KFold

In [2]:
data = pd.read_excel("/content/ENB2012_data.xlsv")
data - data.sample(frac=1) # shuffling the data(sample) and keep 100% of it(frac).


#Original data
x_org_data = pd.DataFrame(data, columns=["X1","X2","X3","X4",
                                         "X5","X6","X7","X8"]) #features
y = pd.DataFrame(data, columns=["Y1"]).values   #output (target)

#preprocessing
x_with_dummies = pd.get_dummies(x_org_data,columns=["X6","X8"])
#x6 has values from 2 to 5 and x8 has values from 0 to 5
 
#One-hot Encoding
#normalising dataset
var_prep = preprocessing.MinMaxScaler() 
x = var_prep.fit_transform(x_with_dummies)

In [3]:
def objective_value(x,y,chromosome,kfold=3):     #by default kfold =3  
  # chromosome x
  lb_x,ub_x = 10,1000          #Lower and upper bounds for chromosome x
  len_x = (len(chromosome)//2) # Length of chromosome x(12 genes)

  # chromosome y
  lb_y,ub_y = 0.05,0.99          #Lower and upper bounds for chromosome y
  len_y = (len(chromosome)//2) # Length of chromosome y(12 genes)

  #precision
  precision_x = (ub_x-lb_x)/((2**len_x)-1) # precision for decoding x
  precision_y = (ub_y-lb_y)/((2**len_y)-1) # precision for decoding y

  #print ("## for variable x ###")
  z=0
  t= 1
  x_bit_sum =0 # initiation (sum(bit*(2^z)) is:0 at first)
  for i in range(len(chromosome)//2):
    x_bit = chromosome[-t]*(2**z)
    x_bit_sum += x_bit
    t=t+1
    z=z+1

  #print ("## for variable y ###")
  z=0
  t= 1 + (len(chromosome)//2)
  y_bit_sum =0
  for i in range(len(chromosome)//2):
    y_bit = chromosome[-t]*(2**z)
    y_bit_sum += y_bit
    t=t+1
    z=z+1


  c_hyperparameter = (x_bit_sum*precision_x)+lb_x
  gamma_hyperparameter = (y_bit_sum*precision_y)+lb_y

  kf = KFold(n_splits=kfold)

  sum_of_error = 0
  for train_index,test_index in kf.split(x):

    x_train,x_test = x[train_index],x[test_index]
    y_train,y_test = y[train_index],y[test_index]

    model = svm.SVR(kernel="rbf",C =c_hyperparameter,gamma=gamma_hyperparameter)
    model.fit(x_train,np.ravel(y_train))  

    accuracy = model.score(x_test,y_test)  #accuracy of the svm
    error = 1-(accuracy)

    sum_of_error += error    # error for all k folds

  avg_error = sum_of_error/kfold # the svm's average error

  #the defined function will return 3 values
  return c_hyperparameter,gamma_hyperparameter,avg_error

# Selecting parents -->

In [4]:
chromosome = np.array([0,0,1,1,1,0,1,1,1,0,1,1])

population = 20 #population size

#create an empty array to place the initial random solutions
all_solutions = np.empty((0,len(chromosome)))  #np.empty generates empty array.

 
# we shuffle our chromosome to get a new parent chromosome 20 times to make population of 20
for i in range(population):
  rd.shuffle(chromosome) #shuffle the chromosome
  all_solutions = np.vstack((all_solutions, chromosome))  #adding chromosome after each shuffle #np.vstack stacks each chromosome in allsolutions in the form of stack.

#create an empty array to place the selected parents
parents = np.empty((0,np.size(all_solutions,1)))


#finding 2 parents from the pool of solutions
#using tournament selection method
def find_parents_ts(all_solutions,x,y):

  #make an empty array to place the selected parents
  parents = np.empty((0,np.size(all_solutions,1)))

  for i in range(2):  #to get 2 parents repeat process 2 times

    #selecting 3 random parents from the pool of solutions and storing in indices_list
    indices_list = np.random.choice(len(all_solutions),3,replace=False)

    #get 3 parents
    posb_parent_1 = all_solutions[indices_list[0]]
    posb_parent_2 = all_solutions[indices_list[1]]
    posb_parent_3 = all_solutions[indices_list[2]]


    #get objective function value for each of the 3 parents
    #index 2 becoz second index gives fitness value(error) in out function
    obj_func_parent_1 = objective_value(x=x,y=y,chromosome=posb_parent_1)[2]
    obj_func_parent_2 = objective_value(x=x,y=y,chromosome=posb_parent_2)[2]
    obj_func_parent_3 = objective_value(x=x,y=y,chromosome=posb_parent_3)[2]

    #find parent which is best (least error):
    min_obj_func = min(obj_func_parent_1,obj_func_parent_2,obj_func_parent_3)
    if min_obj_func == obj_func_parent_1:
      selected_parent = posb_parent_1
    elif min_obj_func == obj_func_parent_2:
      selected_parent = posb_parent_2
    else:
      selected_parent = posb_parent_3

    #put selected parent in the empty array we craeted above.
    parents = np.vstack( (parents, selected_parent))

 

  parent_1 = parents[0,:]   #first parent in parents array 
  parent_2 = parents[1,:]   #second parent in parents array 
  
  return parent_1,parent_2  #defined function will return 2 parents


In [5]:
#testing function to generate parents:

parents = find_parents_ts(all_solutions,x,y)
print("Find parents:",parents)
print("Parent 1:",parents[0])
print("Parent 2:",parents[1])

Find parents: (array([1., 1., 1., 1., 0., 1., 1., 0., 1., 0., 1., 0.]), array([0., 1., 0., 1., 1., 1., 1., 1., 0., 1., 0., 1.]))
Parent 1: [1. 1. 1. 1. 0. 1. 1. 0. 1. 0. 1. 0.]
Parent 2: [0. 1. 0. 1. 1. 1. 1. 1. 0. 1. 0. 1.]
