In [68]:
import pandas as pd
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

data = pd.read_csv('datasets/wine.csv')

# X = df.drop(['y'], axis = 1)
# Y = df['y']

X = data.iloc[:,:-1]
Y = data.drop(data.iloc[:,:-1], axis=1)

In [69]:
from random import choices , randint, randrange , random

# function for encoding -> it will generate binary chromosomes contains 0,1 as genes

def generate_genome(length):
  return (choices([0,1], k=length))


In [70]:
#it will just convert an array to a string

def listToString(s):

    str1 = ""

    for ele in s:
        str1 += str(ele)

    return str1

In [71]:
def stringToList(s):
  mylist=[]

  for ele in s:
    mylist.append(int(ele))
  
  return mylist

In [72]:
#function to generate an initial population for the evolution

list_population=[]

def generate_population(size , length):
  for i in range(0,size):    #loop will run, the number of times = the population we want to pick 
    genome = generate_genome(length)  #call to function, generate_genome().. 
    
    stringGenome = listToString(genome)
    list_population.append(stringGenome)
  return list_population   #returned the list of selected population.. 


# generate_population(6, 13)

In [73]:
#this function will do crossover of the two parent chromosomes provided to it.. 

def crossover(genome_a, genome_b):
  if len(genome_a) != len(genome_b):     #if length of the two genomes is not equal , through error... 
    raise ValueError("Genomes a and b must be of same length")

  length = len(genome_a)
  if(length <2):     #if their length is less than 2, then is no need of performing crossover.. 
    return genome_a, genome_b

  p = randint(1, length-1)    #generate a random number, from 1 to lenght of the genome.. 
  return genome_a[0:p]+genome_b[p:], genome_b[0:p]+genome_a[p:]        #cut and attach parts of two genomes.. to make new pair of child genome.. 



In [74]:
#this function will do mutation of the the child chromosome provided to it 

def mutation(genome , num=1 , probability=0.2):    #
  for _ in range(num):   #run loop multiple times, default value is 1
    index = randrange(len(genome))      #generate a random index from 0 to length o fthe genome
    genome = stringToList(genome)
    genome[index] = genome[index] if random()>probability else abs(genome[index]-1)   #now generate a random value from 0 to 1 , and if it is greater than probability value, then do not change the value of the index, else change
    genome = listToString(genome)
  return genome

  

In [75]:
# fitness function to check the fitness of the parent
#To test the efficiency I am using all the features here.. 

train_X, test_X, train_Y, test_Y = train_test_split(X,Y, random_state = 0)

model = tree.DecisionTreeClassifier(criterion="gini")

model.fit(train_X, train_Y)

predictions = model.predict(test_X)

score = accuracy_score(test_Y , predictions)

score


0.9555555555555556

In [76]:
# fitness function to check the fitness of the parent
#this function will calculate the fitness score of each of the parent chromosome.. 

def calculate_fitness(genome):
  arr = list(genome)

  features_arr = []
  original_features_name= X.columns.tolist()

  for i in range(0,len(arr)):
    if(arr[i]=='1'):
      features_arr.append(original_features_name[i])

  data_X =data[features_arr]
  data_Y = Y

  train_data_X, test_data_X, train_data_Y, test_data_Y = train_test_split(data_X,data_Y, random_state = 0)

  model1 = tree.DecisionTreeClassifier(criterion="gini")

  model1.fit(train_data_X, train_data_Y)

  predictions = model1.predict(test_data_X)

  score1 = accuracy_score(test_data_Y , predictions)
  return score1



In [77]:
#this function will randomly pick two parents out of the population and return them 

def select_parents(population):   # choices(list, weights, k)
   return choices(
      population = population,
      weights = [calculate_fitness(gene) for gene in population],
      k=2
  )

In [78]:
#this is our main function, it will continue our evolution generation by generation until we reach a specific fitness score:

def continue_evolution(generation_limit,fitness_limit, size, length):
  population = generate_population(size, length)    #generate an intial population using generate_population function 

  total_generations=0

  for generation in range(generation_limit):  # run the loop (number of generations) upto the passed value.. or until the desired value is attained. 
      population_dict = dict() 
      total_generations+=1   

      for i in range(0, len(population)):     #now run the loop for each chromosome in the population
          population_dict.update({population[i] : calculate_fitness(population[i])})   #calculate their fitness score and update them in a doctionary..

      population_dict = sorted(population_dict.items(), key=lambda x:x[1], reverse=True)   #sort the dictionary , based on the score value..
      population_dict = dict(population_dict)

      sorted_pop = population_dict.keys()  
      population = list(sorted_pop)  #generate a sorted list of population from the dictionary..

      if population_dict[population[0]] >= fitness_limit:  #if the fitness score of the best scored chromosome is greater then the desired score, break out of the loop..
          return [total_generations, population[0],population_dict[population[0]]]

      next_generation = population[0:2]  #put the first two best chromosome of the population into the next generation list.. 

      for j in range(int(len(population)/2)-1):    #run the loop half-1 the time of the number of chromosomes in the population ..
          parents = select_parents(population)       # generate two random parents out of the population
          child1 , child2 = crossover(parents[0], parents[1])  #make crossover on them and created two child chromosomes..
          child1 = mutation(child1)    #do mutatiom on both of the child chromosomes.. 
          child2 = mutation(child2)   
          next_generation += [child1,  child2]     #add the generated child into the next generation 

      population = next_generation    #make ext generation as the new population and start the iteration again..

  score = calculate_fitness(population[0])
  # return the list of last chosed generation
  return [total_generations,population[0],score]  
  


In [79]:
# This function will start the evolution: 

def start_evolution():

  features_count = len(X.axes[1])
  
  fittest = continue_evolution(100,0.93,6,features_count)  #call to start_evolution function.. 
  print(fittest)

start_evolution()

[1, '1000100100011', 0.9333333333333333]
