# Genetic Program Example - adapted from TinyGP by Moshesipper

Import libraries

In [1]:
from random import random, randint, seed
from statistics import mean
from copy import deepcopy
import numpy as np

## Define parameters

We must now define our parameters, and allow for bloat control.

In [24]:
POP_SIZE        = 3000   # population size
MIN_DEPTH       = 3    # minimal initial random tree depth
MAX_DEPTH       = 8   # maximal initial random tree depth
GENERATIONS     = 250  # maximal number of generations to run evolution
TOURNAMENT_SIZE = 5    # size of tournament for tournament selection
XO_RATE         = 0.9  # crossover rate 
PROB_MUTATION   = 0.01  # per-node mutation probability 

## Defining non-terminals and terminals

Define functions for non terminal set - this can be done natively in functional programming languages.

In [3]:
def add(x, y): return np.add(x, y)
def sub(x, y): return np.subtract(x, y)
def mul(x, y): return np.multiply(x, y)
def cos(x, y): return np.cos(add(x,y))
#def div(x,y): return x/y # Consider what issues might arrise with this function

#Define terminal and non-terminal sets
FUNCTIONS = [add, sub, mul, cos]
TERMINALS = ['xx', 10,1,2,np.pi]

## Managing our dataset

In usual settings you will have a dataset which you are working from, much in the same way as in traditional ML tasks; however, for the assignment and for observing, we will consider a target function and a create a dataset from that.

In [4]:
def target_func(xx): # evolution's target
    return sum([x**2 + 2*x + 1 for x in xx])

In [5]:
def f1(xx):
    return sum([x**2 for x in xx])

def cosine(xx):
    return sum([10*np.cos(2*np.pi*x) for x in xx])


def f2(xx):
    return 10*len(xx) + sum([x**2 - 10*np.cos(2*np.pi*x) for x in xx])

In [33]:
def generate_dataset(): # generate 101 data points from target_func
    dataset = []
    for x in np.linspace(-1,1,25):
        for y in np.linspace(-1,1,25):
            #dataset.append([x,y, target_func([x,y])])
            #dataset.append([x,y, f1([x,y])])
            #dataset.append([x,y, cosine([x,y])])
            dataset.append([x,y, f2([x,y])])
    return dataset

## Creating the genetic program class

In [25]:
class GPTree:
    def __init__(self, data = None, left = None, right = None):
        self.data  = data
        self.left  = left
        self.right = right
        
    def node_label(self): # string label
        if (self.data in FUNCTIONS):
            return self.data.__name__
        else: 
            return str(self.data)
    
    def print_tree(self, prefix = ""): # textual printout
        print("%s%s" % (prefix, self.node_label()))        
        if self.left:  self.left.print_tree (prefix + "   ")
        if self.right: self.right.print_tree(prefix + "   ")

    def compute_tree(self, x): 
        if (self.data in FUNCTIONS): 
            return self.data(self.left.compute_tree(x), self.right.compute_tree(x))
        elif self.data == 'xx': return x
        else: return self.data
            
    def random_tree(self, grow, max_depth, depth = 0): # create random tree using either grow or full method
        if depth < MIN_DEPTH or (depth < max_depth and not grow): 
            self.data = FUNCTIONS[randint(0, len(FUNCTIONS)-1)]
        elif depth >= max_depth:   
            self.data = TERMINALS[randint(0, len(TERMINALS)-1)]
        else: # intermediate depth, grow
            if random () > 0.5: 
                self.data = TERMINALS[randint(0, len(TERMINALS)-1)]
            else:
                self.data = FUNCTIONS[randint(0, len(FUNCTIONS)-1)]
        if self.data in FUNCTIONS:
            self.left = GPTree()          
            self.left.random_tree(grow, max_depth, depth = depth + 1)            
            self.right = GPTree()
            self.right.random_tree(grow, max_depth, depth = depth + 1)

    def mutation(self):
        if random() < PROB_MUTATION: # mutate at this node
            self.random_tree(grow = True, max_depth = 2)
        elif self.left: self.left.mutation()
        elif self.right: self.right.mutation() 

    def size(self): # tree size in nodes
        if self.data in TERMINALS: return 1
        l = self.left.size()  if self.left  else 0
        r = self.right.size() if self.right else 0
        return 1 + l + r

    def build_subtree(self): # count is list in order to pass "by reference"
        t = GPTree()
        t.data = self.data
        if self.left:  t.left  = self.left.build_subtree()
        if self.right: t.right = self.right.build_subtree()
        return t
                        
    def scan_tree(self, count, second): # note: count is list, so it's passed "by reference"
        count[0] -= 1            
        if count[0] <= 1: 
            if not second: # return subtree rooted here
                return self.build_subtree()
            else: # glue subtree here
                self.data  = second.data
                self.left  = second.left
                self.right = second.right
        else:  
            ret = None              
            if self.left  and count[0] > 1: ret = self.left.scan_tree(count, second)  
            if self.right and count[0] > 1: ret = self.right.scan_tree(count, second)  
            return ret

    def crossover(self, other): # xo 2 trees at random nodes
        if random() < XO_RATE:
            second = other.scan_tree([randint(1, other.size())], None) # 2nd random subtree
            self.scan_tree([randint(1, self.size())], second) # 2nd subtree "glued" inside 1st tree

## Fitness and selection

In [26]:
def fitness(individual, dataset): # inverse mean absolute error over dataset normalized to [0,1]
    return 1 / (1 + mean([abs(np.sum(individual.compute_tree(ds[0:-1])) - ds[-1]) for ds in dataset]))

In the example we are using we are using tournament based fitness. What benefits and negatives does tournament selection have?

In [27]:
def selection(population, fitnesses): # select one individual using tournament selection
    tournament = [randint(0, len(population)-1) for i in range(TOURNAMENT_SIZE)] # select tournament contenders
    tournament_fitnesses = [fitnesses[tournament[i]] for i in range(TOURNAMENT_SIZE)]
    return deepcopy(population[tournament[tournament_fitnesses.index(max(tournament_fitnesses))]]) 

Try to implement a roulette wheel selection for this and compare your results.

In [28]:
def roulette_selection(population, fitnesses):
    pass

In [29]:
def init_population(pop_size = POP_SIZE, max_depth = MAX_DEPTH, min_depth = MIN_DEPTH): # ramped half-and-half
    pop = []
    ite = MAX_DEPTH - MIN_DEPTH +1
    if POP_SIZE % ite > 0:
        print('not valid param:',POP_SIZE , ite)
        return
    t1 = int(POP_SIZE / (ite * 2))
    print(t1)
    
    for md in range(MIN_DEPTH,MAX_DEPTH +1):
        for i in range(t1):
            t = GPTree()
            t.random_tree(grow = True, max_depth = md) # grow
            pop.append(t) 
        for i in range(t1):
            t = GPTree()
            t.random_tree(grow = False, max_depth = md) # full
            pop.append(t) 
    return pop

##  Main Loop

In [35]:
dataset = generate_dataset()
population = init_population() 
best_of_run = None
best_of_run_f = 0
best_of_run_gen = 0
print(len(population))
#population[0] = deepcopy(f2_function)
fitnesses = [fitness(population[i], dataset) for i in range(POP_SIZE)]

250
3000


In [36]:
fitnesses

[0.03284031347211116,
 0.1081492928753457,
 0.04776939638293075,
 0.001270284435612794,
 0.047955591557787815,
 0.04668229324560677,
 0.043953242891314796,
 0.0479952960865088,
 0.002641999783621998,
 0.04944388989020214,
 0.046929004021411695,
 0.047888842503550924,
 0.044554323498383196,
 0.0005615660006291202,
 0.07043701290907235,
 0.030883547544365588,
 0.016903544767822458,
 0.03472823018037384,
 0.06984199632083185,
 0.010121153934418602,
 0.07656122961464162,
 0.00020231617172340076,
 0.007125593195246355,
 0.04829504882013934,
 0.03526608947702001,
 0.0028921807884224053,
 0.04779607010090281,
 0.0055556428048874566,
 0.04961689711215043,
 0.04201433869405613,
 0.044458531986783185,
 0.014854087214149863,
 0.020722867090861533,
 0.07893103687415659,
 0.021489238946135233,
 0.048858924962243866,
 0.05173823391725088,
 0.07445474577173408,
 0.04778758830022874,
 0.006530315911818425,
 0.052079892990905666,
 0.05516695991206311,
 0.04827364288268811,
 0.0074855797845499086,
 0.04

In [None]:
    # go evolution!
    for gen in range(GENERATIONS):        
        nextgen_population=[]
        
        for i in range(POP_SIZE):
            parent1 = selection(population, fitnesses)
            parent2 = selection(population, fitnesses)
            parent1.crossover(parent2)
            parent1.mutation()
            nextgen_population.append(parent1)
        population=nextgen_population
        fitnesses = [fitness(population[i], dataset) for i in range(POP_SIZE)]
        print("________________________")
        print("gen:", gen, ", best_of_run_f:", round(max(fitnesses),3), ", best_of_run_f:",best_of_run_f) 
        if max(fitnesses) > best_of_run_f:
            print('new_tree')
            best_of_run_f = max(fitnesses)
            best_of_run_gen = gen
            best_of_run = deepcopy(population[fitnesses.index(max(fitnesses))])
            best_of_run.print_tree()
        if best_of_run_f == 1: break
            
    
    print("\n\n_________________________________________________\nEND OF RUN\nbest_of_run attained at gen " + str(best_of_run_gen) +\
          " and has f=" + str(round(best_of_run_f,3)))
    best_of_run.print_tree()

In [None]:

def run():
    population = init_population() 
    best_of_run = None
    best_of_run_f = 0
    best_of_run_gen = 0
    print(len(population))
    #population[0] = deepcopy(f2_function)
    fitnesses = [fitness(population[i], dataset) for i in range(POP_SIZE)]
    
        # go evolution!
    for gen in range(GENERATIONS):        
        nextgen_population=[]
        
        for i in range(POP_SIZE):
            parent1 = selection(population, fitnesses)
            parent2 = selection(population, fitnesses)
            parent1.crossover(parent2)
            parent1.mutation()
            nextgen_population.append(parent1)
        population=nextgen_population
        fitnesses = [fitness(population[i], dataset) for i in range(POP_SIZE)]
        print("________________________")
        print("gen:", gen, ", best_of_run_f:", round(max(fitnesses),3), ", best_of_run_f:",best_of_run_f) 
        if max(fitnesses) > best_of_run_f:
            print('new_tree')
            best_of_run_f = max(fitnesses)
            best_of_run_gen = gen
            best_of_run = deepcopy(population[fitnesses.index(max(fitnesses))])
            best_of_run.print_tree()
        if best_of_run_f == 1: break
            
    
    print("\n\n_________________________________________________\nEND OF RUN\nbest_of_run attained at gen " + str(best_of_run_gen) +\
          " and has f=" + str(round(best_of_run_f,3)))
    best_of_run.print_tree()

In [None]:
dataset = generate_dataset()


In [62]:
#terminals
xx = GPTree(data = 'xx')
lenx = GPTree(data = 2)
ten = GPTree(data=10)
pi = GPTree(data = np.pi)
    
def genTree_f1():
    return GPTree(data = mul, left = xx, right = xx)

def genTree_cosine():
    xx_sq = GPTree(data = mul, left = xx, right = xx)
    
    pi_mul_xx = GPTree(data = mul, left = pi, right = xx)
    cosine = GPTree(data = cos, left = pi_mul_xx, right = pi_mul_xx)
    ten_cos = GPTree(data = mul, left = ten, right = cosine)
    
    subs = GPTree(data = sub, left = xx_sq, right = ten_cos)
    ten_mul_lenxx = GPTree(data = mul, left = ten, right = lenx)
    root = GPTree(data = add, left = ten, right = subs)
    return root


    
def genTree_f2():

    ten_mul_lenxx = GPTree(data = mul, left = ten, right = lenx)
    
    pi_mul_xx = GPTree(data = mul, left = pi, right = xx)
    cosine = GPTree(data = cos, left = pi_mul_xx, right = pi_mul_xx)
    ten_cos = GPTree(data = mul, left = ten, right = cosine)
    xx_sq = GPTree(data = mul, left = xx, right = xx)
    subs = GPTree(data = sub, left = xx_sq, right = ten_cos)
    
    root = GPTree(data = add, left = ten, right = subs)
    
    return root

    

In [63]:
f1_function = genTree_f1()
c = genTree_cosine()
f2_function = genTree_f2()

In [65]:
dataset = generate_dataset()
fitness(f2_function, dataset)


0.9999999999999993

In [47]:
dataset

[[-5.12, -5.12, 37.84942745157179],
 [-5.12, -5.016565656565657, 34.14476435598601],
 [-5.12, -4.913131313131313, 34.516512787427594],
 [-5.12, -4.80969696969697, 38.39436270407137],
 [-5.12, -4.706262626262626, 43.78726257721653],
 [-5.12, -4.602828282828283, 48.09518687017432],
 [-5.12, -4.49939393939394, 49.16918704374752],
 [-5.12, -4.395959595959596, 46.187534896961225],
 [-5.12, -4.292525252525253, 39.99074766331033],
 [-5.12, -4.189090909090909, 32.73890247713022],
 [-5.12, -4.085656565656565, 27.030955816481416],
 [-5.12, -3.982222222222222, 24.84512836134043],
 [-5.12, -3.878787878787879, 26.73236875336698],
 [-5.12, -3.7753535353535357, 31.591727470768937],
 [-5.12, -3.671919191919192, 37.11922266869112],
 [-5.12, -3.5684848484848484, 40.747191826643956],
 [-5.12, -3.465050505050505, 40.69114809593917],
 [-5.12, -3.361616161616162, 36.677329722387526],
 [-5.12, -3.2581818181818183, 30.054314881882092],
 [-5.12, -3.1547474747474746, 23.243195676326245],
 [-5.12, -3.05131313131

In [26]:
f2_function.print_tree()

add
   mul
      10
      2
   sub
      mul
         xx
         xx
      mul
         10
         cos
            mul
               3.141592653589793
               xx
            mul
               3.141592653589793
               xx
