In [1]:
import math
import random as rnd

import numpy as np
from sklearn import datasets
from scipy.spatial.distance import pdist
import matplotlib.pyplot as plt
%matplotlib inline

from rtree.random_tree_factory import RandomTreeFactory
from environment.environment import Environment
from genetics.individual import Individual
from genetics.selection import tournament_selection
from genetics.operators import point_mutation
from genetics.operators import crossover

In [2]:
SYMBOLS = {
    '_sum': lambda x, y: x + y,
    '_diff': lambda x, y: x - y,
    '_prod': lambda x, y: x * y,
    '_div': lambda x, y: x/y if y != 0 else 0,
    '_log': lambda x: math.log(x) if x > 0 else 0,
    '_sin': lambda x: math.sin(x),
    '_vars': lambda: rnd.choice(['x0', 'x1', 'x2', 'x3'])
}

ENV = Environment(SYMBOLS)

FACTORY = RandomTreeFactory(environment=ENV)

In [3]:
DATA = datasets.load_iris()['data']
N_POP = 100
N_INPUTS = 4
N_OUTPUTS = 2
CROSSOVER_P = 0.9
MUTATION_P = 0.1
TOURNAMENT_SIZE = 10

In [4]:
def cost_function(outputs):
    return sum(pdist(np.array(outputs), metric='euclidean'))

In [16]:
population = []
variables = ['x%d'%x for x in range(0, N_INPUTS)]
for _ in range(0, N_POP):
    trees = [FACTORY.create() for _ in range(0, N_OUTPUTS)]
    population.append(Individual(trees, variables))

In [17]:
for individual in population:
    individual.eval_on_data(DATA, cost_function, SYMBOLS)

In [18]:
new_population = []
while len(new_population) < N_POP:
    child_a = tournament_selection(population, TOURNAMENT_SIZE).copy()
    child_b = tournament_selection(population, TOURNAMENT_SIZE).copy()
    
    # crossover
    for i in range(0, N_OUTPUTS):
        if rnd.random() < CROSSOVER_P:
            child_a.trees[i], child_b.trees[i] = crossover(child_a.trees[i], child_b.trees[i])
    
    
    for i in range(0, N_OUTPUTS):
        if rnd.random() < MUTATION_P:
            child_a.trees[i] = point_mutation(child_a.trees[i], ENV.symbols_inv)
            child_b.trees[i] = point_mutation(child_b.trees[i], ENV.symbols_inv)
    
    new_population.append(child_a)
    new_population.append(child_b)
            
        

In [19]:
for individual in new_population:
    for tree in individual.trees:
        print(tree)
    individual.eval_on_data(DATA, cost_function, SYMBOLS)

_diff(_log(x1),x1)
_sin(_prod(x0,x0))
_div(x0,_log(x1))
_log(_sin(_log(x2)))
_diff(_sin(x0),_sin(_prod(x2,x0)))
_log(x0)
_sin(x3)
_div(_diff(x3,_log(_sin(_log(x3)))),x0)
_sin(_log(_log(x0)))
_log(_log(_log(x0)))
_diff(x0,_log(x0))
_sin(_log(x2))
_diff(_log(_log(x1)),_sin(x1))
_sin(_sin(x0))
_prod(x1,x3)
_div(_sin(x0),_sin(_log(x2)))
_sin(_diff(x2,_log(x0)))
_div(_diff(x3,x0),x2)
_diff(x0,_log(x0))
_sin(_log(x0))
_prod(x1,_sin(x1))
_sin(_div(_log(x1),x2))
_prod(_sin(x0),_sin(_sin(x0)))
_sin(_div(_log(x1),x2))
_log(_sin(x1))
_sin(_diff(_sin(x2),x0))
_sin(_log(x2))
_sin(_log(x2))
_sin(_sum(x0,x1))
_sum(_sin(_sin(_log(x1))),x3)
_sin(_sin(_log(_log(x3))))
_log(x1)
_prod(x1,x3)
_div(_sin(x0),_log(x1))
_prod(_sin(x0),_sin(_sin(x1)))
_sin(_div(_sin(x0),x2))
_diff(_log(_log(x1)),x1)
_sin(_sin(_log(x2)))
_diff(_sin(_log(x1)),x1)
_sin(_sin(_log(x2)))
_sin(_diff(x2,x0))
_div(_diff(_sin(x2),x0),x0)
_div(x2,x0)
_sum(_log(x2),x3)
_div(x2,x0)
_sum(_log(x2),_sin(x2))
_log(_log(x0))
_prod(_log(x3),_log(

TypeError: unsupported operand type(s) for -: 'float' and 'function'