# Symbolic regression benchmark

In [1]:
from pycgp.benchmarks.symbolic import PARAMS, EV_PARAMS, X, y, target_function
from pycgp.evolution import evolution
from pycgp.counter import Counter
import random
import numpy as np

In [2]:
def run_experiment(params, ev_params, x, y):
    rstat = []
    n_better = []
    n_worse = []
    n_same = []
    for i in range(0, 10):
        print(i, end=', ')

        result = evolution(PARAMS, EV_PARAMS, X, y)

        rstat.append([EV_PARAMS['cost_func'](y, individual.execute(X)) for individual in result['final']])
        n_better.append(Counter.get().dict['g_better'])
        n_worse.append(Counter.get().dict['g_worse'])
        n_same.append(Counter.get().dict['g_same_as_parent'])
    
    #print('Best fitness: {}'.format(np.min(stats)))
    #print('mean and std of fitness of last generation: {}, {}'.format(np.mean(stats), np.std(stats)))
    #print('Mean and std of best fitness: {}, {}'.format(np.mean(np.min(stats, axis=1)), np.std(np.min(stats, axis=1))))
    # best fitness, mean of last generation, std of last generation, mean of best individual, std of best individual
    results = [
        np.min(rstat), np.mean(rstat), np.std(rstat), np.mean(np.min(rstat, axis=1)), np.std(np.min(rstat, axis=1)),
        np.sum(n_better), np.mean(n_better),
        np.sum(n_worse), np.mean(n_worse),
        np.sum(n_same), np.mean(n_same)
    ]
    print(results)
    return results

## Point mutation

In [3]:
from pycgp.mutation import point_mutation
all_measurements = {}
EV_PARAMS['expire_gems'] = 30

### PM, 10 nodes

In [4]:
%%time
random.seed(1)

PARAMS['n_cols']      = 10
EV_PARAMS['mutation'] = point_mutation
EV_PARAMS['gems']     = False

all_measurements['10,false'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.20848560675127112, 218.1933562240682, 1451.879881959647, 0.38280884921087083, 0.089344825799990965, 0, 0.0, 0, 0.0, 0, 0.0]
CPU times: user 1min 10s, sys: 78.1 ms, total: 1min 10s
Wall time: 1min 10s


### PM, 50 nodes

In [5]:
%%time
random.seed(1)

PARAMS['n_cols']      = 50
EV_PARAMS['mutation'] = point_mutation
EV_PARAMS['gems']     = False

all_measurements['50,false'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.0, 0.35926208522030106, 0.19971042144091447, 0.28841664544703211, 0.12516176200444809, 0, 0.0, 0, 0.0, 0, 0.0]
CPU times: user 2min 48s, sys: 93.8 ms, total: 2min 49s
Wall time: 2min 49s


### PM, 100 nodes

In [6]:
%%time
random.seed(1)

PARAMS['n_cols']      = 100
EV_PARAMS['mutation'] = point_mutation
EV_PARAMS['gems']     = False

all_measurements['100,false'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.16776289758895796, 171.48393171528696, 1171.9858567558576, 0.32597922698990395, 0.077335513448730311, 0, 0.0, 0, 0.0, 0, 0.0]
CPU times: user 5min 57s, sys: 15.6 ms, total: 5min 57s
Wall time: 5min 58s


### PM, 10 nodes, gems

In [7]:
%%time
random.seed(1)

PARAMS['n_cols']      = 10
EV_PARAMS['mutation'] = point_mutation
EV_PARAMS['gems']     = True

all_measurements['10,True,5'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.34006399358365186, 12253129.662436092, 85770702.505664796, 0.43348057659583822, 0.056040204700835157, 1470, 147.0, 279, 27.899999999999999, 387, 38.700000000000003]
CPU times: user 1min 12s, sys: 93.8 ms, total: 1min 12s
Wall time: 1min 13s


### PM, 50 nodes, gems

In [8]:
%%time
random.seed(1)

PARAMS['n_cols']      = 50
EV_PARAMS['mutation'] = point_mutation
EV_PARAMS['gems']     = True

all_measurements['50,True,5'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.053798207185486842, 103.09866624658076, 717.00419778564515, 0.23704182304400004, 0.10814381636649698, 764, 76.400000000000006, 294, 29.399999999999999, 16513, 1651.3]
CPU times: user 2min 59s, sys: 188 ms, total: 3min
Wall time: 3min 1s


### PM, 100 nodes, gems

In [9]:
%%time
random.seed(1)

PARAMS['n_cols']      = 100
EV_PARAMS['mutation'] = point_mutation
EV_PARAMS['gems']     = True

all_measurements['100,True,5'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.19858953760134257, 4.4879484684009949e+23, 3.1415639278806973e+24, 0.30860436115039203, 0.072115818177315508, 556, 55.600000000000001, 389, 38.899999999999999, 20193, 2019.3]
CPU times: user 5min 53s, sys: 15.6 ms, total: 5min 53s
Wall time: 5min 54s


### PM, 10 nodes, gems, jBox 10

In [10]:
%%time
random.seed(1)

PARAMS['n_cols']      = 10
EV_PARAMS['mutation'] = point_mutation
EV_PARAMS['gems']     = True
EV_PARAMS['j_box_size'] = 10

all_measurements['10,True,10'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.32712689389304961, 2.53219263328134, 13.620389827330442, 0.42453149044906413, 0.064379211034215011, 1432, 143.19999999999999, 425, 42.5, 5170, 517.0]
CPU times: user 1min 12s, sys: 109 ms, total: 1min 12s
Wall time: 1min 12s


### PM, 50 nodes, gems, jbox 10

In [11]:
%%time
random.seed(1)

PARAMS['n_cols']      = 50
EV_PARAMS['mutation'] = point_mutation
EV_PARAMS['gems']     = True
EV_PARAMS['j_box_size'] = 10

all_measurements['50,True,10'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.16487898252049579, 0.82791356260625459, 1.9950883718560541, 0.31653693769229946, 0.097482949538619593, 1122, 112.2, 447, 44.700000000000003, 15727, 1572.7]
CPU times: user 3min 8s, sys: 203 ms, total: 3min 8s
Wall time: 3min 9s


### PM, 100 nodes, gems, jbox 10

In [12]:
%%time
random.seed(1)

PARAMS['n_cols']      = 100
EV_PARAMS['mutation'] = point_mutation
EV_PARAMS['gems']     = True
EV_PARAMS['j_box_size'] = 10

all_measurements['100,True,10'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.19103649780659512, 2.9697216634858377, 11.961513816638092, 0.33876656180746562, 0.06131039096918095, 927, 92.700000000000003, 340, 34.0, 32064, 3206.4000000000001]
CPU times: user 5min 49s, sys: 156 ms, total: 5min 49s
Wall time: 5min 51s


In [13]:
import pandas as pd
df = pd.DataFrame.from_dict(all_measurements, orient='index')
df.columns = ['best fitness', 'mean of last gen', 'std of last gen', 'mean of best individual', 'std of best indvidiual',
             'g_better', 'g_better avg', 'g_worse', 'g_worse avg', 'g_same', 'g_same avg']
df

Unnamed: 0,best fitness,mean of last gen,std of last gen,mean of best individual,std of best indvidiual,g_better,g_better avg,g_worse,g_worse avg,g_same,g_same avg
"10,false",0.208486,218.1934,1451.88,0.382809,0.089345,0,0.0,0,0.0,0,0.0
"50,false",0.0,0.3592621,0.1997104,0.288417,0.125162,0,0.0,0,0.0,0,0.0
"100,false",0.167763,171.4839,1171.986,0.325979,0.077336,0,0.0,0,0.0,0,0.0
"10,True,5",0.340064,12253130.0,85770700.0,0.433481,0.05604,1470,147.0,279,27.9,387,38.7
"50,True,5",0.053798,103.0987,717.0042,0.237042,0.108144,764,76.4,294,29.4,16513,1651.3
"100,True,5",0.19859,4.487948e+23,3.141564e+24,0.308604,0.072116,556,55.6,389,38.9,20193,2019.3
"10,True,10",0.327127,2.532193,13.62039,0.424531,0.064379,1432,143.2,425,42.5,5170,517.0
"50,True,10",0.164879,0.8279136,1.995088,0.316537,0.097483,1122,112.2,447,44.7,15727,1572.7
"100,True,10",0.191036,2.969722,11.96151,0.338767,0.06131,927,92.7,340,34.0,32064,3206.4
