# Genetic Program Example - adapted from TinyGP by Moshesipper

Import libraries

In [1]:
from random import random, randint, seed, randrange
from statistics import mean
from copy import deepcopy
import numpy as np
from itertools import combinations
import math

## Define parameters

We must now define our parameters, and allow for bloat control.

In [2]:
POP_SIZE        = 60   # population size
MIN_DEPTH       = 2    # minimal initial random tree depth
MAX_DEPTH       = 5    # maximal initial random tree depth
GENERATIONS     = 250  # maximal number of generations to run evolution
TOURNAMENT_SIZE = 5    # size of tournament for tournament selection
XO_RATE         = 0.8  # crossover rate 
PROB_MUTATION   = 0.2  # per-node mutation probability 

## Defining non-terminals and terminals

Define functions for non terminal set - this can be done natively in functional programming languages.

In [3]:
# functions taking two arguments
def add(x, y): return x + y
def sub(x, y): return x - y
def mul(x, y): return x * y
def div(x,y): 
    if y != 0:
        return x/y
    else: 
        return 2**(31-1)

In [4]:
# functions taking one argument
def sin(x): return math.sin(x)
def cos(x): return math.cos(x)
def aBs(x): return abs(x)
def eXp(x): return math.exp(x)
def power_1(x): return x
def power_2(x): return x**2
def power_3(x): return x**3
def power_4(x): return x**4
def power_reverse_1(x): return x
def power_reverse_2(x): return x**(1/2)
def power_reverse_3(x): return x**(1/3)
def power_reverse_4(x): return x**(1/4)

Define terminal and non-terminal sets

In [5]:
FUNCTIONS_2_ARGS = [add, sub, mul]
FUNCTIONS_1_ARG = [sin, cos, aBs, eXp, power_1, power_2, power_3, power_4, power_reverse_1, power_reverse_2,
                  power_reverse_3, power_reverse_4]

def generate_terminals(dim):
    TERMINALS = [-2, -1, 0, 1, 2]
    for i in range(1,dim+1):
        x = f'x{i}'
        TERMINALS.append(x)
    return TERMINALS

TERMINALS = generate_terminals(2)
print(generate_terminals(3))

[-2, -1, 0, 1, 2, 'x1', 'x2', 'x3']


## Managing our dataset

In usual settings you will have a dataset which you are working from, much in the same way as in traditional ML tasks; however, for the assignment and for observing, we will consider a target function and a create a dataset from that.

In [6]:
# def target_func(x): # evolution's target
#     return x*x + 2*x + 1

In [7]:
# def generate_dataset(): # generate 101 data points from target_func
#     dataset = []
#     for x in range(-100,101,2): 
#         x /= 100
#         dataset.append([x, target_func(x)])
#     return dataset

## Sphere method

In [8]:
def sphere_function(pos):
    big_sum = 0
    for i in range(len(pos)):
        big_sum = big_sum + pos[i]**2
    return big_sum

In [9]:
def generate_dataset(dims): # generate 101 data points from target_func
    dataset = []
    for i in range(100):
        n_tuple = []
        for dim in range(dims):
            x = randrange(-500, 500)/100
            n_tuple.append(x)
        dataset.append((n_tuple,sphere_function(n_tuple)))
    return dataset

def flip_coin():
    if randrange(0,100) > 50:
        return True
    else:
        return False

In [10]:
dataset = generate_dataset(2)
dataset_result = dataset
print(dataset_result)

[([2.75, -3.19], 17.738599999999998), ([-2.53, -0.17], 6.429799999999999), ([-3.69, 0.73], 14.149), ([4.92, 0.51], 24.4665), ([0.3, 2.21], 4.9741), ([4.84, -0.92], 24.272), ([3.74, -1.99], 17.9477), ([-1.38, -0.59], 2.2524999999999995), ([-3.8, 3.79], 28.8041), ([2.51, -1.08], 7.466499999999999), ([4.77, -1.84], 26.138499999999997), ([-0.99, -1.1], 2.1901), ([4.04, 4.33], 35.070499999999996), ([-4.71, -2.45], 28.186600000000002), ([4.39, 0.3], 19.362099999999998), ([2.11, -1.1], 5.6621), ([-4.25, 4.56], 38.8561), ([2.98, 0.09], 8.8885), ([0.58, -3.89], 15.4685), ([3.12, 2.7], 17.0244), ([-1.63, -1.92], 6.343299999999999), ([1.97, -2.97], 12.701800000000002), ([-2.98, -2.21], 13.7645), ([-2.55, -3.32], 17.5249), ([-0.5, -2.35], 5.772500000000001), ([3.14, 0.57], 10.1845), ([4.93, 1.57], 26.769799999999996), ([4.55, -3.56], 33.376099999999994), ([0.3, 1.19], 1.5061), ([-4.43, -4.5], 39.8749), ([1.2, 1.73], 4.4329), ([-1.51, -4.51], 22.6202), ([-2.72, 4.29], 25.802500000000002), ([-1.02, 

## Creating the genetic program class

In [26]:
class GPTree:
    def __init__(self, data = None, left = None, right = None):
        self.data  = data
        self.left  = left
        self.right = right
        
    def node_label(self): # string label
        if (self.data in FUNCTIONS_2_ARGS or self.data in FUNCTIONS_1_ARG):
            return self.data.__name__
        else: 
            return str(self.data)
    
    def print_tree(self, prefix = ""): # textual printout
        print("%s%s" % (prefix, self.node_label()))        
        if self.left:  self.left.print_tree (prefix + "   ")
        if self.right: self.right.print_tree(prefix + "   ")

    def compute_tree(self, x): 
        if (self.data in FUNCTIONS_2_ARGS): 
            return self.data(self.left.compute_tree(x), self.right.compute_tree(x))
        elif (self.data in FUNCTIONS_1_ARG):
            print(x)
            print(self.left.compute_tree(x))
            print(self.data(self.left.compute_tree(x)))
            return self.data(self.left.compute_tree(x))
        elif type(self.data) is str: 
            if self.data[0] == 'x': return x[int(self.data[1:])-1]
        else: return self.data
            
    def random_tree(self, grow, max_depth, depth = 0): # create random tree using either grow or full method
        if depth < MIN_DEPTH or (depth < max_depth and not grow): 
            if flip_coin() == True:
                self.data = FUNCTIONS_2_ARGS[randint(0, len(FUNCTIONS_2_ARGS)-1)]
            else:
                self.data = FUNCTIONS_1_ARG[randint(0, len(FUNCTIONS_1_ARG)-1)]
        elif depth >= max_depth:   
            self.data = TERMINALS[randint(0, len(TERMINALS)-1)]
        else: # intermediate depth, grow
            if random () > 0.5: 
                self.data = TERMINALS[randint(0, len(TERMINALS)-1)]
            else:
                if flip_coin() == True:
                    self.data = FUNCTIONS_2_ARGS[randint(0, len(FUNCTIONS_2_ARGS)-1)]
                else:
                    self.data = FUNCTIONS_1_ARG[randint(0, len(FUNCTIONS_1_ARG)-1)]
        if self.data in FUNCTIONS_2_ARGS:
            self.left = GPTree()          
            self.left.random_tree(grow, max_depth, depth = depth + 1)            
            self.right = GPTree()
            self.right.random_tree(grow, max_depth, depth = depth + 1)
        elif self.data in FUNCTIONS_1_ARG:
            self.left = GPTree()          
            self.left.random_tree(grow, max_depth, depth = depth + 1)

    def mutation(self):
        if random() < PROB_MUTATION: # mutate at this node
            self.random_tree(grow = True, max_depth = 2)
        elif self.left: self.left.mutation()
        elif self.right: self.right.mutation() 

    def size(self): # tree size in nodes
        if self.data in TERMINALS: return 1
        l = self.left.size()  if self.left  else 0
        r = self.right.size() if self.right else 0
        return 1 + l + r

    def build_subtree(self): # count is list in order to pass "by reference"
        t = GPTree()
        t.data = self.data
        if self.left:  t.left  = self.left.build_subtree()
        if self.right: t.right = self.right.build_subtree()
        return t
                        
    def scan_tree(self, count, second): # note: count is list, so it's passed "by reference"
        count[0] -= 1            
        if count[0] <= 1: 
            if not second: # return subtree rooted here
                return self.build_subtree()
            else: # glue subtree here
                self.data  = second.data
                self.left  = second.left
                self.right = second.right
        else:  
            ret = None              
            if self.left  and count[0] > 1: ret = self.left.scan_tree(count, second)  
            if self.right and count[0] > 1: ret = self.right.scan_tree(count, second)  
            return ret

    def crossover(self, other): # xo 2 trees at random nodes
        if random() < XO_RATE:
            second = other.scan_tree([randint(1, other.size())], None) # 2nd random subtree
            self.scan_tree([randint(1, self.size())], second) # 2nd subtree "glued" inside 1st tree

## Fitness and selection

In [12]:
def fitness(individual, dataset): # inverse mean absolute error over dataset normalized to [0,1]
    
    return 1 / (1 + mean([abs(individual.compute_tree(ds[0]) - ds[1]) for ds in dataset]))

In the example we are using tournament based fitness. What benefits and negatives does tournament selection have?

In [13]:
def selection(population, fitnesses): # select one individual using tournament selection
    tournament = [randint(0, len(population)-1) for i in range(TOURNAMENT_SIZE)] # select tournament contenders
    tournament_fitnesses = [fitnesses[tournament[i]] for i in range(TOURNAMENT_SIZE)]
    return deepcopy(population[tournament[tournament_fitnesses.index(max(tournament_fitnesses))]]) 

Try to implement a roulette wheel selection for this and compare your results.

In [14]:
def roulette_selection(population, fitnesses):
    pass

In [15]:
def init_population(): # ramped half-and-half
    pop = []
    for md in range(3, MAX_DEPTH + 1):
        for i in range(int(POP_SIZE/6)):
            t = GPTree()
            t.random_tree(grow = True, max_depth = md) # grow
            pop.append(t) 
        for i in range(int(POP_SIZE/6)):
            t = GPTree()
            t.random_tree(grow = False, max_depth = md) # full
            pop.append(t) 
    return pop

##  Main Loop

In [27]:
    dims = 2
    dataset = generate_dataset(dims)
    population = init_population() 
    best_of_run = None
    best_of_run_f = 0
    best_of_run_gen = 0
    fitnesses = [fitness(population[i], dataset) for i in range(POP_SIZE)]

    # go evolution!
    for gen in range(GENERATIONS):        
        nextgen_population=[]
        for i in range(POP_SIZE):
            parent1 = selection(population, fitnesses)
            parent2 = selection(population, fitnesses)
            parent1.crossover(parent2)
            parent1.mutation()
            nextgen_population.append(parent1)
        population=nextgen_population
        fitnesses = [fitness(population[i], dataset) for i in range(POP_SIZE)]
        if max(fitnesses) > best_of_run_f:
            best_of_run_f = max(fitnesses)
            best_of_run_gen = gen
            best_of_run = deepcopy(population[fitnesses.index(max(fitnesses))])
            print("________________________")
            print("gen:", gen, ", best_of_run_f:", round(max(fitnesses),3), ", best_of_run:") 
            best_of_run.print_tree()
        if best_of_run_f == 1: break   
    
    print("\n\n_________________________________________________\nEND OF RUN\nbest_of_run attained at gen " + str(best_of_run_gen) +\
          " and has f=" + str(round(best_of_run_f,3)))
    best_of_run.print_tree()

[-2.45, 4.45]
[-2.45, 4.45]
4.45
[-2.45, 4.45]
4.45
-15.352500000000003
[-2.45, 4.45]
4.45
[-2.45, 4.45]
4.45
[4.31, -1.96]
[4.31, -1.96]
-1.96
[4.31, -1.96]
-1.96
-1.8815999999999997
[4.31, -1.96]
-1.96
[4.31, -1.96]
-1.96
[-0.62, 2.31]
[-0.62, 2.31]
2.31
[-0.62, 2.31]
2.31
-3.0261
[-0.62, 2.31]
2.31
[-0.62, 2.31]
2.31
[0.92, 2.48]
[0.92, 2.48]
2.48
[0.92, 2.48]
2.48
-3.6704000000000003
[0.92, 2.48]
2.48
[0.92, 2.48]
2.48
[2.89, 0.09]
[2.89, 0.09]
0.09
[2.89, 0.09]
0.09
0.0819
[2.89, 0.09]
0.09
[2.89, 0.09]
0.09
[-1.94, 0.57]
[-1.94, 0.57]
0.57
[-1.94, 0.57]
0.57
0.24509999999999998
[-1.94, 0.57]
0.57
[-1.94, 0.57]
0.57
[-3.62, -2.25]
[-3.62, -2.25]
-2.25
[-3.62, -2.25]
-2.25
-2.8125
[-3.62, -2.25]
-2.25
[-3.62, -2.25]
-2.25
[4.86, -3.62]
[4.86, -3.62]
-3.62
[4.86, -3.62]
-3.62
-9.4844
[4.86, -3.62]
-3.62
[4.86, -3.62]
-3.62
[2.82, 1.7]
[2.82, 1.7]
1.7
[2.82, 1.7]
1.7
-1.1899999999999997
[2.82, 1.7]
1.7
[2.82, 1.7]
1.7
[-3.79, 2.01]
[-3.79, 2.01]
2.01
[-3.79, 2.01]
2.01
-2.03009999999

[-3.59, 0.29]
-2
[-1.55, -0.83]
-2
[-0.08, 1.98]
-2
[-1.78, 0.64]
-2
[2.62, -3.51]
-2
[2.67, 3.25]
-2
[-4.47, -4.13]
-2
[2.12, 1.33]
-2
[-2.82, -0.71]
-2
[-0.24, 4.74]
-2
[2.01, 2.56]
-2
[-4.51, 4.62]
-2
[1.87, -3.64]
-2
[0.4, 0.46]
-2
[3.98, -2.9]
-2
[-4.01, 3.86]
-2
[-2.35, 3.51]
-2
[0.64, 2.93]
-2
[-3.04, 2.58]
-2
[-4.85, 4.94]
-2
[-4.09, -1.53]
-2
[-3.09, 4.05]
-2
[-4.67, 0.82]
-2
[4.93, 0.5]
-2
[4.65, -2.31]
-2
[2.89, -3.14]
-2
[4.9, -3.19]
-2
[0.79, -4.54]
-2
[4.95, 1.38]
-2
[-0.83, -3.3]
-2
[1.01, 4.49]
-2
[1.75, 1.27]
-2
[3.29, -3.87]
-2
[2.32, -3.19]
-2
[-2.65, -0.18]
-2
[-4.45, 1.01]
-2
[2.61, -3.6]
-2
[-1.44, -4.73]
-2
[1.9, 0.33]
-2
[1.89, 1.09]
-2
[0.16, 4.51]
-2
[4.29, -0.75]
-2
[-2.17, -0.52]
-2
[3.33, -2.42]
-2
[2.18, -0.76]
-2
[3.56, -2.27]
-2
[2.31, 2.75]
-2
[-1.7, -1.35]
-2
[-2.29, -1.31]
-2
[-2.45, 4.45]
-1
[4.31, -1.96]
-1
[-0.62, 2.31]
-1
[0.92, 2.48]
-1
[2.89, 0.09]
-1
[-1.94, 0.57]
-1
[-3.62, -2.25]
-1
[4.86, -3.62]
-1
[2.82, 1.7]
-1
[-3.79, 2.01]
-1
[3.88, 0.15

-1
[-1.44, -0.3]
-1
[3.44, 0.77]
[3.44, 0.77]
1
1
[3.44, 0.77]
1
[3.44, 0.77]
[3.44, 0.77]
-1
-1
[3.44, 0.77]
-1
[3.13, 3.01]
[3.13, 3.01]
1
1
[3.13, 3.01]
1
[3.13, 3.01]
[3.13, 3.01]
-1
-1
[3.13, 3.01]
-1
[-1.79, -0.69]
[-1.79, -0.69]
1
1
[-1.79, -0.69]
1
[-1.79, -0.69]
[-1.79, -0.69]
-1
-1
[-1.79, -0.69]
-1
[1.13, 2.3]
[1.13, 2.3]
1
1
[1.13, 2.3]
1
[1.13, 2.3]
[1.13, 2.3]
-1
-1
[1.13, 2.3]
-1
[-2.47, -4.28]
[-2.47, -4.28]
1
1
[-2.47, -4.28]
1
[-2.47, -4.28]
[-2.47, -4.28]
-1
-1
[-2.47, -4.28]
-1
[-3.67, -3.34]
[-3.67, -3.34]
1
1
[-3.67, -3.34]
1
[-3.67, -3.34]
[-3.67, -3.34]
-1
-1
[-3.67, -3.34]
-1
[-1.82, -4.89]
[-1.82, -4.89]
1
1
[-1.82, -4.89]
1
[-1.82, -4.89]
[-1.82, -4.89]
-1
-1
[-1.82, -4.89]
-1
[0.14, -4.9]
[0.14, -4.9]
1
1
[0.14, -4.9]
1
[0.14, -4.9]
[0.14, -4.9]
-1
-1
[0.14, -4.9]
-1
[3.99, 0.73]
[3.99, 0.73]
1
1
[3.99, 0.73]
1
[3.99, 0.73]
[3.99, 0.73]
-1
-1
[3.99, 0.73]
-1
[-0.4, -4.16]
[-0.4, -4.16]
1
1
[-0.4, -4.16]
1
[-0.4, -4.16]
[-0.4, -4.16]
-1
-1
[-0.4, -4.16]
-1
[0

[0.58, -2.31]
0
0.0
[0.58, -2.31]
[0.58, -2.31]
0
0
[0.58, -2.31]
0
[-2.9, -2.39]
[-2.9, -2.39]
[-2.9, -2.39]
0
0
[-2.9, -2.39]
0
0.0
[-2.9, -2.39]
[-2.9, -2.39]
0
0
[-2.9, -2.39]
0
[-1.44, -0.3]
[-1.44, -0.3]
[-1.44, -0.3]
0
0
[-1.44, -0.3]
0
0.0
[-1.44, -0.3]
[-1.44, -0.3]
0
0
[-1.44, -0.3]
0
[3.44, 0.77]
[3.44, 0.77]
[3.44, 0.77]
0
0
[3.44, 0.77]
0
0.0
[3.44, 0.77]
[3.44, 0.77]
0
0
[3.44, 0.77]
0
[3.13, 3.01]
[3.13, 3.01]
[3.13, 3.01]
0
0
[3.13, 3.01]
0
0.0
[3.13, 3.01]
[3.13, 3.01]
0
0
[3.13, 3.01]
0
[-1.79, -0.69]
[-1.79, -0.69]
[-1.79, -0.69]
0
0
[-1.79, -0.69]
0
0.0
[-1.79, -0.69]
[-1.79, -0.69]
0
0
[-1.79, -0.69]
0
[1.13, 2.3]
[1.13, 2.3]
[1.13, 2.3]
0
0
[1.13, 2.3]
0
0.0
[1.13, 2.3]
[1.13, 2.3]
0
0
[1.13, 2.3]
0
[-2.47, -4.28]
[-2.47, -4.28]
[-2.47, -4.28]
0
0
[-2.47, -4.28]
0
0.0
[-2.47, -4.28]
[-2.47, -4.28]
0
0
[-2.47, -4.28]
0
[-3.67, -3.34]
[-3.67, -3.34]
[-3.67, -3.34]
0
0
[-3.67, -3.34]
0
0.0
[-3.67, -3.34]
[-3.67, -3.34]
0
0
[-3.67, -3.34]
0
[-1.82, -4.89]
[-1.82, -4.8

[1.97, -0.18]
-1
[1.97, -0.18]
[1.97, -0.18]
-1
1
[1.97, -0.18]
-1
[1.97, -0.18]
-0.18
[-4.75, -2.75]
-1
[-4.75, -2.75]
[-4.75, -2.75]
-1
1
[-4.75, -2.75]
-1
[-4.75, -2.75]
-2.75
[1.56, -0.48]
-1
[1.56, -0.48]
[1.56, -0.48]
-1
1
[1.56, -0.48]
-1
[1.56, -0.48]
-0.48
[2.19, -3.53]
-1
[2.19, -3.53]
[2.19, -3.53]
-1
1
[2.19, -3.53]
-1
[2.19, -3.53]
-3.53
[4.63, 4.49]
-1
[4.63, 4.49]
[4.63, 4.49]
-1
1
[4.63, 4.49]
-1
[4.63, 4.49]
4.49
[-0.74, -3.15]
-1
[-0.74, -3.15]
[-0.74, -3.15]
-1
1
[-0.74, -3.15]
-1
[-0.74, -3.15]
-3.15
[-0.72, -0.31]
-1
[-0.72, -0.31]
[-0.72, -0.31]
-1
1
[-0.72, -0.31]
-1
[-0.72, -0.31]
-0.31
[3.67, 4.54]
-1
[3.67, 4.54]
[3.67, 4.54]
-1
1
[3.67, 4.54]
-1
[3.67, 4.54]
4.54
[2.64, 1.64]
-1
[2.64, 1.64]
[2.64, 1.64]
-1
1
[2.64, 1.64]
-1
[2.64, 1.64]
1.64
[-3.66, 1.66]
-1
[-3.66, 1.66]
[-3.66, 1.66]
-1
1
[-3.66, 1.66]
-1
[-3.66, 1.66]
1.66
[0.03, 4.72]
-1
[0.03, 4.72]
[0.03, 4.72]
-1
1
[0.03, 4.72]
-1
[0.03, 4.72]
4.72
[-3.59, 0.29]
-1
[-3.59, 0.29]
[-3.59, 0.29]
-1
1
[-3

[-2.45, 4.45]
[-2.45, 4.45]
[-2.45, 4.45]
2.45
2.45
[-2.45, 4.45]
2.45
1.2510985509742438
[-2.45, 4.45]
[-2.45, 4.45]
2.45
2.45
[-2.45, 4.45]
2.45
[4.31, -1.96]
[4.31, -1.96]
[4.31, -1.96]
-3.96
-3.96
[4.31, -1.96]
-3.96
(0.9974905699336812+0.997490569933681j)
[4.31, -1.96]
[4.31, -1.96]
-3.96
-3.96
[4.31, -1.96]
-3.96


TypeError: can't convert complex to float

In [23]:
math.cos(-4.97)

0.25477113377824295