In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.pyplot as plt

import tengp
import numpy as np
from gpbenchmarks import get_data
from sklearn.metrics import mean_squared_error
import random


def pdivide(x, y):
    return np.divide(x, y, out=np.copy(x), where=x!=0)

def plog(x, _):
    return np.log(x, out=np.copy(x), where=x>0)

def psin(x, _):
    return np.sin(x)

def pcos(x, _):
    return np.cos(x)

def pow2(x):
    return x**2

def pow3(x):
    return x**3


funset = tengp.FunctionSet()
funset.add(np.add, 2)
funset.add(np.subtract, 2)
funset.add(np.multiply, 2)
funset.add(pdivide, 2)

X, y = get_data('nguyenf4', 20, -1, 1)
X = np.c_[np.ones(len(X)), X]

rv_params = tengp.Parameters(2, 1, 1, 10, funset, real_valued=True)
params = tengp.Parameters(2, 1, 1, 10, funset, real_valued=False)

builder = tengp.individual.IndividualBuilder(params)
bounds = builder.create().bounds[:]
genotypes = []

In [2]:
import math

def bound(x, a, b):
    return a + (b - a) * ( 1 - np.cos(np.pi * x/10))/2

def scale_a(x, a, b):
    return a + (b - a) * x / 10

def scale_b(x, a, b):
    return a + (b - a) * (x/10)**2

def scale_c(x, a, b):
    return a *(b/a)**(x/10)

In [3]:
def cost_function(rv_genes):
#    global genetypes
    bounded = bound(np.array(rv_genes), 0, 10)
    #print('before', rv_genes)
    scaled = [scale_a(x, 0, b) for x, b in zip(bounded, bounds)]
    #print('after', scaled)
    individual = tengp.individual.NPIndividual(scaled, bounds, rv_params)
    
    pred = individual.transform(X)
    
    try:
        out = mean_squared_error(pred, y)
    except ValueError:
        out = np.inf
        
 #   genotypes.append((scaled, out))
    return (out, )

In [4]:
from array import array
from deap import creator, base, tools, cma, algorithms

creator.create('FitnessMin', base.Fitness, weights=(-1.0,))
creator.create('Individual', array, typecode='f', fitness=creator.FitnessMin)

In [5]:
random.seed(42)
res = tengp.simple_es(X, y, mean_squared_error, params)

  del sys.path[0]
  current_node.value = current_node.fun(*values)
  return umr_sum(a, axis, dtype, out, keepdims)


In [6]:
print(res[0].active_nodes, res[0].fitness)

{0, 1, 2, 3, 4, 5, 9, 12} 0.005871946376804379


In [7]:
rv_genes = res[0].genes[:]

In [8]:
print(rv_genes[7], bounds[7])

1 3


In [23]:
rv_genes[7] = 0.9

In [24]:
mean_squared_error(rv_params.individual_class(rv_genes, bounds, rv_params).transform(X), y)

0.9888926670091461

In [28]:
strategy = cma.Strategy(centroid=res[0].genes[:], sigma=2)

genotypes= []

toolbox = base.Toolbox()
toolbox.register('evaluate', cost_function)
toolbox.register("generate", strategy.generate, creator.Individual)
toolbox.register("update", strategy.update)
hof = tools.HallOfFame(1)
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("avg", np.mean)
stats.register("min", np.min)

pop, log = algorithms.eaGenerateUpdate(toolbox, ngen=250, stats=stats, halloffame=hof, verbose=True)

gen	nevals	avg   	min    
0  	14    	24.755	1.55764
1  	14    	39.4992	1.44955
2  	14    	39.0113	2.91147
3  	14    	25.1706	2.47459
4  	14    	35.3994	2.89529
5  	14    	33.1305	1.10893
6  	14    	20.1277	1.83652
7  	14    	49.4402	2.26618
8  	14    	24.1831	2.24623
9  	14    	24.0671	1.55367
10 	14    	17.6682	2.43557
11 	14    	18.158 	3.07014
12 	14    	31.7587	1.7385 
13 	14    	7.04107	2.40763
14 	14    	10.2671	1.24861
15 	14    	10.0782	1.4762 
16 	14    	37.8499	2.11564
17 	14    	9.24091	2.2792 
18 	14    	16.7594	2.36236
19 	14    	4.8309 	1.30215
20 	14    	12.4316	1.27074
21 	14    	21.8364	2.87313
22 	14    	11.7568	2.13337
23 	14    	35.1385	2.2779 
24 	14    	6.67472	1.19848
25 	14    	22.4123	2.79017
26 	14    	13.3651	2.79133
27 	14    	7.62755	1.8527 
28 	14    	12.6098	2.64176
29 	14    	7.91053	2.97961
30 	14    	6.91628	2.87216
31 	14    	9.67973	1.91132
32 	14    	6.734  	3.05582
33 	14    	4.79373	2.33861
34 	14    	7.85879	2.93786
35 	14    	3.60546	2.85401
36 

In [27]:
genes = np.array([x[0] for x in genotypes])
costs = np.array([x[1] for x in genotypes])

In [None]:
from sklearn.decomposition import PCA

In [None]:
pca = PCA(n_components=2, random_state=42)

In [None]:
X_r = pca.fit(genes).transform(genes)

In [None]:
_, ax = plt.subplots(figsize=(10, 10))
ax.scatter(X_r[:,0], X_r[:,1], c=costs)