In [17]:
import tengp
import numpy as np
from gpbenchmarks import get_data
from sklearn.metrics import mean_squared_error
import random

def pdivide(x, y):
    return np.divide(x, y, out=np.copy(x), where=x!=0)

def plog(x, _):
    return np.log(x, out=np.copy(x), where=x>0)

def psin(x, _):
    return np.sin(x)

def pcos(x, _):
    return np.cos(x)

def pow2(x):
    return x**2

def pow3(x):
    return x**3


funset = tengp.FunctionSet()
funset.add(np.add, 2)
funset.add(np.subtract, 2)
funset.add(np.multiply, 2)
funset.add(pdivide, 2)
funset.add(plog, 2)
funset.add(psin, 2)
funset.add(pcos, 2)

X, y = get_data('nguyenf4', 20, -1, 1)
X = np.c_[np.ones(len(X)), X]

params = tengp.Parameters(2, 1, 1, 100, funset, real_valued=True)

builder = tengp.individual.IndividualBuilder(params)
bounds = builder.create().bounds[:]

In [2]:
def cost_function(G):
    individual = tengp.individual.NPIndividual(G, bounds, params)
    pred = individual.transform(X)
    
    try:
        out = mean_squared_error(pred, y)
    except ValueError:
        out = np.inf
    return out


def cost_function_cma_old(G):
    clipped = []
    for gene, bound in zip(G, bounds):
        if gene < 0:
            clipped.append(0)
        elif gene > bound:
            clipped.append(bound)
        else:
            clipped.append(gene)
        
    individual = tengp.individual.NPIndividual(clipped, bounds, params)
    pred = individual.transform(X)
    
    try:
        out = mean_squared_error(pred, y)
    except ValueError:
        out = np.inf
    return (out, )

def linscale(x, _min, _max):
    return _min + (_max-_min)*x/10

def cost_function_cma(G):
    scaled = [linscale(x, 0, b) for x, b in zip(G, bounds)]
    clipped = []
    for gene, bound in zip(scaled, bounds):
        if gene < 0:
            clipped.append(0)
        elif gene > bound:
            clipped.append(bound)
        else:
            clipped.append(gene)
        
    individual = tengp.individual.NPIndividual(clipped, bounds, params)
    pred = individual.transform(X)
    
    try:
        out = mean_squared_error(pred, y)
    except ValueError:
        out = np.inf
    return (out, )

In [13]:
# from scipy.optimize import minimize

# lbfgs_bounds = [(0, b) for b in bounds]

# genes = [np.random.uniform(0, b) for b in bounds[:-params.n_outputs]]
# genes += [random.choice(list(range(b))) for b in bounds[-params.n_outputs:]]

# minimize(cost_function, [np.random.uniform(0, b) for b in bounds], method='L-BFGS-B', bounds=lbfgs_bounds)

In [16]:
from deap import creator, base, tools, cma, algorithms

creator.create('FitnessMin', base.Fitness, weights=(-1.0,))
creator.create('Individual', list, fitness=creator.FitnessMin)

toolbox = base.Toolbox()
toolbox.register('evaluate', cost_function_cma_old)

strategy = cma.Strategy(centroid=[np.random.uniform(0, b) for b in bounds], sigma=0.05)

toolbox.register("generate", strategy.generate, creator.Individual)
toolbox.register("update", strategy.update)

hof = tools.HallOfFame(1)
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("avg", np.mean)
stats.register("min", np.min)

res = algorithms.eaGenerateUpdate(toolbox, ngen=250, stats=stats, halloffame=hof)

print(hof[0])
print(cost_function_cma_old(hof[0]))



gen	nevals	avg    	min   
0  	19    	6118.51	115.39
1  	19    	959.666	10.2785
2  	19    	735.332	5.53425
3  	19    	74.176 	4.68289
4  	19    	52.3233	3.6522 
5  	19    	17.6116	2.83428
6  	19    	5.21443	2.37268
7  	19    	3.89088	2.23958
8  	19    	3.7897 	2.55623
9  	19    	416596 	2.48458
10 	19    	3.54326	2.26884
11 	19    	5.17742	1.82025
12 	19    	3.30625	2.39032
13 	19    	3.62928	1.97446
14 	19    	3.57254	1.6909 
15 	19    	4.10875	2.05607
16 	19    	3.48169	1.74025
17 	19    	38.6769	1.94691
18 	19    	490.739	2.21922
19 	19    	2.97073	1.87886
20 	19    	3.17962	2.04908
21 	19    	5.11213	2.05063
22 	19    	3.90842	1.70803
23 	19    	4.1144 	1.83232
24 	19    	170.037	1.79279
25 	19    	12833.7	2.06669
26 	19    	9492.85	1.74348
27 	19    	5.00355	1.8685 
28 	19    	3.66435	1.8024 
29 	19    	110.408	1.35395
30 	19    	261.626	1.84413
31 	19    	4.97392	1.36524
32 	19    	350.453	1.20298
33 	19    	56432.8	1.09514
34 	19    	4555.95	1.09256
35 	19    	9.1501 	1.15678
36 

In [None]:
#from scipy.optimize import differential_evolution

Okrem CMA-ES ma este napadlo pouzit optimalizacne algoritmy, ktore sa bezne pouzivaju v neuronovych sietach. Nejake su implementovane napriklad v Kerase a daju sa pouzit aj nejake

https://github.com/keras-team/keras/issues/369

dalsia moznost by mohla byt vyskusat jednoduchu (1+lambda)-ES, v style hillclimbera (z nejakeho dovodu mam pocit, ze vysledky CMA-ES su az prilis nestabilne).

Treba sa pozriet na distribucie vyslednych cisel, ktore moje nove mapovanie generuje. Mam totiz pocit, ze s poctom uzlov sa zvysuje aj priemerne vysledne cislo, ktore RVCGP jedinec generuje.

In [None]:
from scipy.optimize import minimize

In [None]:
minimize(cost_function, [np.random.uniform(0, b) for b in bounds], method='CG')

In [87]:
%%timeit


107 µs ± 6.36 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [86]:
%%timeit
[np.random.uniform(0, b) for b in bounds]

269 µs ± 19.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [88]:
%%time
for _ in range(5000):
    genes = [random.uniform(0, b) for b in bounds]
    individual = params.individual_class(genes, bounds, params)
    individual.transform(X)

CPU times: user 2min 50s, sys: 812 ms, total: 2min 50s
Wall time: 2min 57s


In [83]:
individual.active_nodes

{0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 25,
 26,
 28,
 29,
 38,
 39,
 88,
 89,
 102}