- Default PSO parameters
- max_back=20

In [1]:
from joblib import Parallel, delayed
import multiprocessing

In [9]:
import random

import numpy as np
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns

import tengp
import symreg
from gpbenchmarks import get_data

from experiment_settings import nguyen7_funset, pagie_funset, keijzer_funset, korns12_funset, vlad_funset

def plot_results(name, outs):
    print(name)
    costs = np.array(outs)
    print('min:', np.min(costs))
    print('mean:', np.mean(costs))
    print('median:', np.median(costs))
    print('variance:',np.var(costs))
    plt.figure()
    plt.title(name)
    sns.distplot(costs, kde=False);
    

def run_experiment(data_item, cost_function):
    results = []
    genes = []
    
    name, (x_train, y_train, x_test, y_test), params = data_item
    print(name)
    
    bounds = tengp.individual.IndividualBuilder(params).create().bounds[:]
    
    pg.set_global_rng_seed(42)
    
    for i in range(10):
        fit, gs = run_experiment_instance(cost_function, x_train, y_train, params, bounds)
        results.append(fit)
        genes.append(gs)
    
    return results, genes


def run_experiment_instance(i, cost_function, x_train, y_train, params, bounds):
    print(i)
    prob = pg.problem(cost_function(np.c_[x_train, np.ones(len(x_train))], y_train, params, bounds))
    algo = pg.algorithm(pg.pso(gen=2000))
   # algo.set_verbosity(1)
    pop = pg.population(prob, 50)
    pop = algo.evolve(pop)
    return pop.champion_f[0], pop.champion_x
    

In [3]:
import pygmo as pg

class cost_function:
    def __init__(self, X, Y, params, bounds):
        self.params = params
        self.bounds = bounds
        self.X = X
        self.Y = Y
    
    def fitness(self, x):      
        individual = tengp.individual.NPIndividual(list(x), self.bounds, self.params)
        
        pred = individual.transform(self.X)
        
        try:
            return [mean_squared_error(pred, self.Y)]
        except ValueError:
            return [10000000000]
        
    def get_bounds(self):
        return self.bounds

In [5]:
kw_params = {'real_valued': True, 'max_back': 20}

params_nguyen4 = tengp.Parameters(2, 1, 1, 50, nguyen7_funset, **kw_params)
params_nguyen7 = tengp.Parameters(2, 1, 1, 50, nguyen7_funset, **kw_params)
params_nguyen10 = tengp.Parameters(3, 1, 1, 50, nguyen7_funset, **kw_params)
params_pagie1  = tengp.Parameters(2, 1, 1, 50, pagie_funset, **kw_params)
params_keijzer6 = tengp.Parameters(2, 1, 1, 50, keijzer_funset, **kw_params)
params_korns = tengp.Parameters(6, 1, 1, 50, korns12_funset, **kw_params)
params_vlad = tengp.Parameters(6, 1, 1, 50, vlad_funset, **kw_params)

all_params = [params_nguyen7, params_pagie1, params_keijzer6, params_korns, params_vlad]

In [6]:
import random 

data = [
    ('nguyen4', symreg.get_benchmark_poly(random, 6), params_nguyen4),
    ('nguyen7', symreg.get_benchmark_nguyen7(random, None), params_nguyen7),
    ('nguyen10', symreg.get_benchmark_nguyen10(random, None), params_nguyen10),
    ('pagie1', symreg.get_benchmark_pagie1(random, None), params_pagie1),
    ('keijzer6', symreg.get_benchmark_keijzer(random, 6), params_keijzer6),
    ('korns12', symreg.get_benchmark_korns(random, 12), params_korns),
    ('vladislasleva4', symreg.get_benchmark_vladislasleva4(random, None), params_vlad)
]

In [7]:
num_cores = multiprocessing.cpu_count() - 1
num_cores

3

# Nguyen 4

In [None]:
%%time

ng4_res, ng4_genes = run_experiment(data[0], cost_function)

In [None]:
%%time

name, (x_train, y_train, x_test, y_test), params = data[0]
print(name)

bounds = tengp.individual.IndividualBuilder(params).create().bounds[:]

results = Parallel(n_jobs=num_cores)(delayed(run_experiment_instance)(_, cost_function, x_train, y_train, params, bounds)
                                    for _ in range(10))



In [None]:
import pickle

pickle.dump(ng4_res, open('ng4_res_100_000evals', 'wb'))
pickle.dump(ng4_genes, open('ng4_genes_100_000evals', 'wb'))

In [None]:
plot_results('nguyen4', ng4_res)

# Nguyen 7


In [None]:
%%time

ng7_res, ng7_genes = run_experiment(data[1], cost_function)

In [12]:
%%time

name, (x_train, y_train, x_test, y_test), params = data[1]
print(name)

bounds = tengp.individual.IndividualBuilder(params).create().bounds[:]

results = Parallel(n_jobs=num_cores)(delayed(run_experiment_instance)(_, cost_function, x_train, y_train, params, bounds)
                                    for _ in range(100))

nguyen7
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
CPU times: user 4.47 s, sys: 1.19 s, total: 5.66 s
Wall time: 2h 4min 3s


In [16]:
import pickle

pickle.dump([x[0] for x in results], open('ng7_res_100_000evals', 'wb'))
pickle.dump([x[1] for x in results], open('ng7_genes_100_000evals', 'wb'))

In [None]:
plot_results('nguyen7', ng7_res)

# Nguyen 10

In [None]:
%%time

ng10_res, ng10_genes = run_experiment(data[2], cost_function)

In [None]:
pickle.dump(ng10_res, open('ng10_res_100_000evals', 'wb'))
pickle.dump(ng10_genes, open('ng10_genes_100_000evals', 'wb'))

In [None]:
plot_results(data[2][0], ng10_res)

# Pagie

In [None]:
%%time

pag1_res, pag1_genes = run_experiment(data[3], cost_function)

In [None]:
pickle.dump(pag1_res, open('pag1_res_100_000evals', 'wb'))
pickle.dump(pag1_genes, open('pag1_genes_100_000evals', 'wb'))

In [None]:
plot_results(data[3][0], pag1_res)

# Keijzer 6

In [None]:
%%time

kei6_res, kei6_genes = run_experiment(data[4], cost_function)

pickle.dump(kei6_res, open('kei6_res_100_000evals', 'wb'))
pickle.dump(kei6_genes, open('kei6_genes_100_000evals', 'wb'))

plot_results(data[4][0], kei6_res)

# Korns 12

In [None]:
%%time

korns12_res, korns12_genes = run_experiment(data[5], cost_function)

pickle.dump(korns12_res, open('korns12_res_100_000evals', 'wb'))
pickle.dump(korns12_genes, open('korns12_genes_100_000evals', 'wb'))

In [None]:
plot_results(data[5][0], korns12_res)

# Vladislasleva 4

In [None]:
%%time

vlad4_res, vlad4_genes = run_experiment(data[6], cost_function)

pickle.dump(vlad4_res, open('vlad4_res_100_000evals', 'wb'))
pickle.dump(vlad4_genes, open('vlad4_genes_100_000evals', 'wb'))

In [None]:
plot_results(data[6][0], vlad4_res)

# Rounding of results

In [None]:
rounded_results = []

chunk = 100
for i, (name, (x_train, y_train, x_test, y_test), params) in zip(range(5), data):
    problem_results = []
    
    x_train = np.c_[x_train, np.ones(len(x_train))]
        
    bounds = tengp.individual.IndividualBuilder(params).create().bounds
    
    print(name)
    
    for genes in champions[chunk*i:chunk*i + chunk]:
        
        individual = params.individual_class(np.round(genes), bounds, params)
        
        output = individual.transform(x_train)
        
        try:
            problem_results.append(mean_squared_error(output, y_train))
        except ValueError:
            print('value error')
    
    rounded_results.append(problem_results)
        
        

In [None]:
np.min(rounded_results[4])