In [3]:
import random

import numpy as np
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns

import tengp
from gpbenchmarks import get_data


In [30]:
@tengp.utils.handle_invalid_decorator
def tabu_es(X, y, cost_function, params,
            target_fitness=0,
            population_size=5,
            evaluations=5000,
            random_state=None,
            mutation='point',
            mutation_probability=0.25,
            memory_size=10,
            verbose=False):

    if mutation not in tengp.mutations.MUTATIONS:
        raise UnknownMutationException("Provided type of mutation is not implemented.")

    move = tengp.mutations.MUTATIONS[mutation]
    if mutation == 'probabilistic':
        move = partial(move, probability=mutation_probability)

    if random_state:
        random.seed(random_state)

    # initialize memory
    memory = []
    
    # initialize stats object
    stats = {'moves': 0, 'memory_hits': 0, 'better_after': 0, 'worse_after': 0}

    # initial generation
    ib = tengp.individual.IndividualBuilder(params)

    population = [ib.create() for _ in range(population_size)]

    n_evals = 0
    
    generation = 0

    for individual in population:
        output = individual.transform(X)
        individual.fitness = cost_function(y, output)
        n_evals += 1

    while n_evals < evaluations:
        generation += 1

        parent = min(population, key=lambda x: x.fitness)

        if parent.fitness <= target_fitness:
            return population

        population = []


        for _ in range(population_size -1):
            _move = move(parent)
            stats['moves'] += 1
            individual = parent.apply(_move)

            while _move in memory:
                stats['memory_hits'] += 1
                output = individual.transform(X)
                fitness = cost_function(output, y)
                if fitness > parent.fitness:
                    stats['worse_after'] +=1 
                elif fitness < parent.fitness:
                    stats['better_after'] += 1
                    break
                elif fitness == parent.fitness:
                    break
                _move = move(parent)
                individual = parent.apply(_move)
                n_evals += 1
                stats['moves'] += 1

            if len(memory) > memory_size:
                del memory[0]

            memory.append(_move)

            population.append(individual)


        for individual in population:
            if individual == parent:
                individual.fitness = parent.fitness
                continue
            output = individual.transform(X)
            individual.fitness = cost_function(y, output)
            n_evals += 1

        population += [parent]

        if verbose and generation % 100 == 0:
            print(f'Gen: {generation}, population: {sorted([x.fitness for x in population])}')

    population.sort(key=lambda x: x.fitness)
    return population, stats


In [33]:
def pdivide(x, y):
    return np.divide(x, y, out=np.copy(x), where=x!=0)

def plog(x):
    return np.log(x, out=np.copy(x), where=x>0)

def pow2(x):
    return x**2

def pow3(x):
    return x**3


funset = tengp.FunctionSet()
funset.add(np.add, 2)
funset.add(np.subtract, 2)
funset.add(np.multiply, 2)
funset.add(pdivide, 2)
funset.add(plog, 1)
funset.add(np.sin, 1)
funset.add(np.cos, 1)

#     function set: +, -, *, /, sin, cos, tan, tanh, sqrt, exp, log, **2, **3
ext_funset = tengp.FunctionSet()
ext_funset.add(np.add, 2)
ext_funset.add(np.subtract, 2)
ext_funset.add(np.multiply, 2)
ext_funset.add(pdivide, 2)
ext_funset.add(np.sin, 1)
ext_funset.add(np.cos, 1)
ext_funset.add(np.tan, 1)
ext_funset.add(np.tanh, 1)
ext_funset.add(np.sqrt, 1)
ext_funset.add(np.exp, 1)
ext_funset.add(plog, 1)
ext_funset.add(pow2, 1)
ext_funset.add(pow3, 1)

params1d = tengp.Parameters(2, 1, 1, 100, funset)
params2d = tengp.Parameters(3, 1, 1, 100, funset)
params5d = tengp.Parameters(6, 1, 1, 100, ext_funset)

functions = [('nguyenf4', params1d, [20, -1, 1]), ('nguyenf7', params1d, [20, 0, 2]), ('nguyenf10', params2d, [100, -1, 1]), ('korns12', params5d, [10000, -50, 50])]

TRIALS = 100

In [34]:
%%time

random.seed(0)

results = []

for function in functions[:1]:
    trial_results = []
    for trial in range(TRIALS):
        print(trial, end=',')
        X, y = get_data(function[0], *function[2])
        X = np.c_[np.ones(len(X)), X]
        population, stats = tabu_es(X, y, mean_squared_error, function[1], mutation='single', memory_size=1000)
        trial_results.append((population[0].fitness, stats['moves'], stats['memory_hits'], stats['better_after'], stats['worse_after']))
    results.append(trial_results)
    

0,

  
  current_node.value = current_node.fun(*values)
  return umr_sum(a, axis, dtype, out, keepdims)
  current_node.value = current_node.fun(*values)
  current_node.value = current_node.fun(*values)
  """


1,

  


2,3,

  current_node.value = current_node.fun(*values)


4,5,6,

  current_node.value = current_node.fun(*values)


7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,CPU times: user 6min 58s, sys: 391 ms, total: 6min 59s
Wall time: 7min 1s


In [45]:
scores = [x[0] for y in results for x in y]
moves = [x[1] for y in results for x in y]
hits = [x[2] for y in results for x in y]
worse = [x[4] for y in results for x in y]

In [42]:
np.min(scores)

1.3789658401812608e-32

In [43]:
np.mean(scores)

0.07354103095024606

In [44]:
np.median(scores)

0.02232148956290639

In [47]:
np.mean(moves)

4997.24

In [49]:
np.mean(hits)

618.39

In [50]:
np.mean(worse)

545.08

In [51]:
np.mean(hits)/np.mean(worse)

1.1344940192265354