In [1]:
import random

import numpy as np

In [2]:
from sklearn.utils import check_random_state

rng = check_random_state(0)

# Training samples
X_train = rng.uniform(-1, 1, 100)
y_train = X_train**4 + X_train**3 + X_train**2 + X_train

# Test samples
X_test = rng.uniform(-1, 1, 100)
y_test = X_test**4 + X_test**3 + X_test**2 + X_test

Add PyCGP into the mix, along with single mutation, its gem and match strategy.

In [3]:
from pycgp.evolution import evolution
from pycgp.mutation import single_mutation
from pycgp.gems import GemSM, MatchSMStrategy
from pycgp.counter import Counter
from pycgp.params import DEFAULT_PARAMS

In [4]:
from sklearn.metrics import mean_squared_error

ev_params = {
  'cost_func': mean_squared_error,
  'target_fitness': 0,
  'gems': True,
  'j_box_size': 50,
  'gem_type': GemSM,
  'match_strategy': MatchSMStrategy,
    'mutation': single_mutation
}

TRIALS = 50

In [5]:
DEFAULT_PARAMS['n_rows'] = 1
DEFAULT_PARAMS['n_cols'] = 15
DEFAULT_PARAMS['n_inputs'] = 1
DEFAULT_PARAMS['n_outputs'] = 1

In [6]:
def print_evaluation(all_evals, all_bests, stats, scores):
    print('Number of improving gem applications: {}'.format(sum([x['g_better'] for x in stats])))
    print('Number of detoriorating gem applications: {}'.format(sum([x['g_worse'] for x in stats])))
    print('Number of same as parent applications: {}'.format(sum([x['g_same_as_parent'] for x in stats])))
    sum_of_gens = sum([x['gens'] for x in stats])
    print('Total generations: {}, avg: {}'.format(sum_of_gens, sum_of_gens/TRIALS))
    print('Average number of evaluations: {}\nAverage final fitness: {}'.format(
        np.average(all_evals), np.average([x.fitness for x in all_bests])))
    print('Average score on test: {}'.format(np.average(scores)))

In [7]:
%%time

random.seed(1)

ev_params['gems'] = False

all_evals = []
all_bests = []
stats = []
scores = []

print('iteration:', end=' ')
for i in range(0, TRIALS):
    result = evolution(DEFAULT_PARAMS, ev_params, X_train.reshape(-1,1), y_train)
    all_evals.append(result['evals'])
    best_individual = result['final'][0]
    all_bests.append(best_individual)
    stats.append(Counter.get().dict.copy())
    
    output = best_individual.execute(X_test.reshape(-1,1))
    score = mean_squared_error(output, y_test)
    scores.append(score)
    
    print('{},'.format(i), end=' ')
print('\n')

iteration: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 

CPU times: user 6min 18s, sys: 1.84 s, total: 6min 20s
Wall time: 6min 19s


In [8]:
print_evaluation(all_evals, all_bests, stats, scores)

Number of improving gem applications: 0
Number of detoriorating gem applications: 0
Number of same as parent applications: 0
Total generations: 62450, avg: 1249.0
Average number of evaluations: 5001.0
Average final fitness: 0.023260489355618908
Average score on test: 0.5907578026302437


In [9]:
%%time

random.seed(1)

ev_params['gems'] = True

sm_all_evals = []
sm_all_bests = []
sm_stats = []
sm_scores = []

print('iteration:', end=' ')
for i in range(0, TRIALS):
    result = evolution(DEFAULT_PARAMS, ev_params, X_train.reshape(-1,1), y_train)
    sm_all_evals.append(result['evals'])
    best_individual = result['final'][0]
    sm_all_bests.append(best_individual)
    sm_stats.append(Counter.get().dict.copy())
    
    output = best_individual.execute(X_test.reshape(-1,1))
    score = mean_squared_error(output, y_test)
    sm_scores.append(score)
    
    print('{},'.format(i), end=' ')
print('\n')

iteration: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 

CPU times: user 6min 11s, sys: 2.09 s, total: 6min 14s
Wall time: 6min 12s


In [10]:
print_evaluation(sm_all_evals, sm_all_bests, sm_stats, sm_scores)

Number of improving gem applications: 10449
Number of detoriorating gem applications: 16591
Number of same as parent applications: 0
Total generations: 55700, avg: 1114.0
Average number of evaluations: 5001.8
Average final fitness: 0.018112667044337356
Average score on test: 0.02005298602749865


## 2D symbolic regression

In [None]:
rng = check_random_state(0)

# Training samples
X_train = rng.uniform(-1, 1, 100)
y_train = X_train**4 + X_train**3 + X_train**2 + X_train

# Test samples
X_test = rng.uniform(-1, 1, 100)
y_test = X_test**4 + X_test**3 + X_test**2 + X_test