In [3]:
import random
import pickle

import numpy as np
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import pygmo as pg

import tengp
import symreg
from experiment_settings import nguyen7_funset, pagie_funset, keijzer_funset, korns12_funset, vlad_funset

In [4]:
TRIALS = 50
OUTPUT_FOLDER  = 'results/abc/'
PARALLEL = False

In [5]:
def run_parallel(current_data):
    if not PARALLEL:
        logs = run_experiment(current_data, cost_function)
    else:
        name, (x_train, y_train, x_test, y_test), params = current_data
        print(name)

        bounds = tengp.individual.IndividualBuilder(params).create().bounds[:]

        logs = Parallel(n_jobs=num_cores)(delayed(run_experiment_instance)(_, cost_function, x_train, y_train, params, bounds)
                                            for _ in range(TRIALS))
    return logs

def run_experiment(data_item, cost_function):
    logs = []
    
    name, (x_train, y_train, x_test, y_test), params = data_item
    print(name)
    
    bounds = tengp.individual.IndividualBuilder(params).create().bounds[:]
    
    for i in range(TRIALS):
        log = run_experiment_instance(i, cost_function, x_train, y_train, params, bounds)
        logs.append(log)
    
    return logs


def run_experiment_instance(i, cost_function, x_train, y_train, params, bounds):
    print(i, end=',')
    prob = pg.problem(cost_function(np.c_[np.ones(len(x_train)), x_train], y_train, params, bounds))
    algo = pg.algorithm(pg.bee_colony(gen=2000))
    algo.set_verbosity(1)
    pop = pg.population(prob, 25)
    pop = algo.evolve(pop)
    uda = algo.extract(pg.bee_colony)
        
    return [x[2] for x in uda.get_log()]

class cost_function:
    def __init__(self, X, Y, params, bounds):
        self.params = params
        self.bounds = bounds
        self.X = X
        self.Y = Y
    
    def fitness(self, x):      
        individual = tengp.individual.NPIndividual(list(x), self.bounds, self.params)
        
        pred = individual.transform(self.X)
        
        try:
            return [mean_squared_error(pred, self.Y)]
        except ValueError:
            return [10000000000]
        
    def get_bounds(self):
        return self.bounds

In [7]:
kw_params = {'real_valued': True, 'max_back': 20}

params_nguyen4 = tengp.Parameters(2, 1, 1, 50, nguyen7_funset, **kw_params)
params_nguyen7 = tengp.Parameters(2, 1, 1, 50, nguyen7_funset, **kw_params)
params_nguyen10 = tengp.Parameters(3, 1, 1, 50, nguyen7_funset, **kw_params)
params_pagie1  = tengp.Parameters(3, 1, 1, 50, pagie_funset, **kw_params)
params_keijzer6 = tengp.Parameters(2, 1, 1, 50, keijzer_funset, **kw_params)
params_korns = tengp.Parameters(6, 1, 1, 50, korns12_funset, **kw_params)
params_vlad = tengp.Parameters(6, 1, 1, 50, vlad_funset, **kw_params)

all_params = [params_nguyen7, params_pagie1, params_keijzer6, params_korns, params_vlad]

random.seed(42)

data = [
    ('nguyen4', symreg.get_benchmark_poly(random, 6), params_nguyen4),
    ('nguyen7', symreg.get_benchmark_nguyen7(random, None), params_nguyen7),
    ('nguyen10', symreg.get_benchmark_nguyen10(random, None), params_nguyen10),
    ('pagie1', symreg.get_benchmark_pagie1(random, None), params_pagie1),
    ('keijzer6', symreg.get_benchmark_keijzer(random, 6), params_keijzer6),
    ('korns12', symreg.get_benchmark_korns(random, 12), params_korns),
    ('vladislasleva4', symreg.get_benchmark_vladislasleva4(random, None), params_vlad)
]

# Nguyen 4

In [None]:
%%time

pg.set_global_rng_seed(seed = 42)
logs = run_parallel(data[0])
pickle.dump(logs, open(f'{OUTPUT_FOLDER}ng4_log', 'wb'))

# Nguyen 7


In [None]:
%%time

pg.set_global_rng_seed(seed = 42)
logs = run_parallel(data[1])
pickle.dump(logs, open(f'{OUTPUT_FOLDER}ng7_log', 'wb'))

In [9]:
np.warnings.filterwarnings('ignore')

# Pagie

In [10]:
%%time

pg.set_global_rng_seed(seed = 42)
logs = run_parallel(data[3])
pickle.dump(logs, open(f'{OUTPUT_FOLDER}pag1_log', 'wb'))

pagie1
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,CPU times: user 3h 18min 26s, sys: 1.77 s, total: 3h 18min 28s
Wall time: 4h 24min 32s


# Keijzer 6

In [11]:
%%time

pg.set_global_rng_seed(seed = 42)
logs = run_parallel(data[4])
pickle.dump(logs, open(f'{OUTPUT_FOLDER}kei6_log', 'wb'))

keijzer6
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,CPU times: user 2h 12min 54s, sys: 1.81 s, total: 2h 12min 56s
Wall time: 7h 16min 54s


# Korns 12

In [8]:
%%time

pg.set_global_rng_seed(seed = 42)
logs = run_parallel(data[5])
pickle.dump(logs, open(f'{OUTPUT_FOLDER}korns12_log', 'wb'))

korns12


  return  np.sqrt(x)
  return  np.sqrt(x)
  return np.exp(x)
  return np.exp(x)
  return np.log(x, out=np.copy(x), where=x>0)
  return  np.sqrt(x)
  return x**2
  return np.log(x, out=np.copy(x), where=x>0)
  return x**3
  return np.exp(x)
  return l + u
  return x**2
  u = a*f_u(L, U)
  return np.log(x, out=np.copy(x), where=x>0)
  return x**3
  value = (1-coeff)*self.nodes[lower].value + coeff*self.nodes[upper].value
  return x**2
  return l + u
  return x**3
  u = a*f_u(L, U)
  value = (1-coeff)*self.nodes[lower].value + coeff*self.nodes[upper].value
  return np.sin(x)
  return l + u
  l = (1-a)*f_l(L, U)
  u = a*f_u(L, U)
  return np.sin(x)
  return np.divide(x, y, out=np.copy(x), where=y!=0)
  value = (1-coeff)*self.nodes[lower].value + coeff*self.nodes[upper].value
  l = (1-a)*f_l(L, U)
  return np.divide(x, y, out=np.copy(x), where=y!=0)
  return np.cos(x)
  return np.sin(x)
  return np.tan(x)
  return np.cos(x)
  l = (1-a)*f_l(L, U)
  return np.tan(x)
  return np.divide(x, y, o

  l = (1-a)*f_l(L, U)
  value = (1-coeff)*self.nodes[lower].value + coeff*self.nodes[upper].value
  l = (1-a)*f_l(L, U)
  L = (1-b)*x_l1 + b*x_u1
  L = (1-b)*x_l1 + b*x_u1
  u = a*f_u(L, U)
  u = a*f_u(L, U)
  L = (1-b)*x_l1 + b*x_u1
  u = a*f_u(L, U)
  U = (1-c)*x_l2 + c*x_u2
  U = (1-c)*x_l2 + c*x_u2
  U = (1-c)*x_l2 + c*x_u2
  u = a*f_u(L, U)
  u = a*f_u(L, U)
  u = a*f_u(L, U)
  return umr_sum(a, axis, dtype, out, keepdims)
  return umr_sum(a, axis, dtype, out, keepdims)
  return umr_sum(a, axis, dtype, out, keepdims)
  output_errors = np.average((y_true - y_pred) ** 2, axis=0,
  output_errors = np.average((y_true - y_pred) ** 2, axis=0,
  output_errors = np.average((y_true - y_pred) ** 2, axis=0,
  return np.divide(x, y, out=np.copy(x), where=y!=0)
  return np.divide(x, y, out=np.copy(x), where=y!=0)
  return np.divide(x, y, out=np.copy(x), where=y!=0)
  ret = umr_sum(arr, axis, dtype, out, keepdims)
  ret = umr_sum(arr, axis, dtype, out, keepdims)
  ret = umr_sum(arr, axis, dtype

CPU times: user 8.27 s, sys: 4.19 s, total: 12.5 s
Wall time: 16h 50min 17s


# Vladislasleva 4

In [None]:
%%time

pg.set_global_rng_seed(seed = 42)
logs = run_parallel(data[6])
pickle.dump(logs, open(f'{OUTPUT_FOLDER}vlad4_log', 'wb'))