# Symbolic regression benchmark, Single mutation

In [1]:
from pycgp.benchmarks.symbolic import PARAMS, EV_PARAMS, X, y, target_function
from pycgp.evolution import evolution
from pycgp.counter import Counter
import random
import numpy as np

In [2]:
def run_experiment(params, ev_params, x, y):
    rstat = []
    n_better = []
    n_worse = []
    n_same = []
    for i in range(0, 10):
        print(i, end=', ')

        result = evolution(PARAMS, EV_PARAMS, X, y)

        rstat.append([EV_PARAMS['cost_func'](y, individual.execute(X)) for individual in result['final']])
        n_better.append(Counter.get().dict['g_better'])
        n_worse.append(Counter.get().dict['g_worse'])
        n_same.append(Counter.get().dict['g_same_as_parent'])
    
    #print('Best fitness: {}'.format(np.min(stats)))
    #print('mean and std of fitness of last generation: {}, {}'.format(np.mean(stats), np.std(stats)))
    #print('Mean and std of best fitness: {}, {}'.format(np.mean(np.min(stats, axis=1)), np.std(np.min(stats, axis=1))))
    # best fitness, mean of last generation, std of last generation, mean of best individual, std of best individual
    results = [
        np.min(rstat), np.mean(rstat), np.std(rstat), np.mean(np.min(rstat, axis=1)), np.std(np.min(rstat, axis=1)),
        np.sum(n_better), np.mean(n_better),
        np.sum(n_worse), np.mean(n_worse),
        np.sum(n_same), np.mean(n_same)
    ]
    print(results)
    return results

## Single mutation

In [3]:
from pycgp.mutation import single_mutation
from pycgp.gems import MatchByActiveStrategy, GemSM
EV_PARAMS['gem_type'] = GemSM
EV_PARAMS['match_strategy'] = MatchByActiveStrategy
EV_PARAMS['mutation'] = single_mutation
all_measurements = {}

### SM, 10 nodes

In [4]:
%%time
random.seed(1)

PARAMS['n_cols']      = 10
EV_PARAMS['gems']     = False

all_measurements['10,0'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.16776289758895796, 1.584563250285287e+23, 1.109194275199701e+24, 0.40331497555146323, 0.11326392653235846, 0, 0.0, 0, 0.0, 0, 0.0]
CPU times: user 1min 36s, sys: 62.5 ms, total: 1min 36s
Wall time: 1min 36s


### SM, 50 nodes

In [5]:
%%time
random.seed(1)

PARAMS['n_cols']      = 50
EV_PARAMS['gems']     = False

all_measurements['50,0'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.0, 0.82675346161707552, 1.198642971015931, 0.28172411662416363, 0.14874260493227323, 0, 0.0, 0, 0.0, 0, 0.0]
CPU times: user 2min 59s, sys: 78.1 ms, total: 2min 59s
Wall time: 2min 59s


### SM, 100 nodes

In [6]:
%%time
random.seed(1)

PARAMS['n_cols']      = 100
EV_PARAMS['gems']     = False

all_measurements['100,0'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.12187932192563014, 1.7690668659154296e+23, 1.2383468061408008e+24, 0.28906797740086415, 0.094108171804113211, 0, 0.0, 0, 0.0, 0, 0.0]
CPU times: user 4min 31s, sys: 78.1 ms, total: 4min 31s
Wall time: 4min 32s


### SM, 10 nodes, gems

In [7]:
%%time
random.seed(1)

PARAMS['n_cols']      = 10
EV_PARAMS['gems']     = True

all_measurements['10,5'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.19988257373530144, 6.3382530011411481e+23, 4.4367771007988034e+24, 0.36985902898252088, 0.10782344009256949, 1023, 102.3, 11890, 1189.0, 0, 0.0]
CPU times: user 1min 29s, sys: 46.9 ms, total: 1min 29s
Wall time: 1min 29s


### SM, 50 nodes, gems

In [8]:
%%time
random.seed(1)

PARAMS['n_cols']      = 50
EV_PARAMS['gems']     = True

all_measurements['50,5'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.073258574530842571, 2.5353012004564592e+24, 1.7747108403195216e+25, 0.33049748631863285, 0.12958914824181172, 764, 76.400000000000006, 15786, 1578.5999999999999, 0, 0.0]
CPU times: user 2min 42s, sys: 31.2 ms, total: 2min 42s
Wall time: 2min 43s


### SM, 100 nodes, gems

In [9]:
%%time
random.seed(1)

PARAMS['n_cols']      = 100
EV_PARAMS['gems']     = True

all_measurements['100,5'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.12226624476574706, 1.5167661019464638e+24, 1.0617362713625246e+25, 0.3459751531528461, 0.10795427397995508, 513, 51.299999999999997, 11630, 1163.0, 0, 0.0]
CPU times: user 3min 58s, sys: 797 ms, total: 3min 59s
Wall time: 4min 3s


### SM, 10 nodes, gems, jBox 10

In [10]:
%%time
random.seed(1)

PARAMS['n_cols']      = 10
EV_PARAMS['gems']     = True
EV_PARAMS['j_box_size'] = 10

all_measurements['10,10'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.16776289758895796, 41238213.379289106, 288667431.58656788, 0.37513038935120518, 0.10282663565843006, 1835, 183.5, 11630, 1163.0, 0, 0.0]
CPU times: user 1min 3s, sys: 15.6 ms, total: 1min 3s
Wall time: 1min 3s


### SM, 50 nodes, gems, jbox 10

In [11]:
%%time
random.seed(1)

PARAMS['n_cols']      = 50
EV_PARAMS['gems']     = True
EV_PARAMS['j_box_size'] = 10

all_measurements['50,10'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.073258574530842571, 5.7143653920061384e+23, 4.0000557744042974e+24, 0.34695498385899309, 0.12373179856705138, 737, 73.700000000000003, 17288, 1728.8, 0, 0.0]
CPU times: user 2min 9s, sys: 156 ms, total: 2min 9s
Wall time: 2min 9s


### SM, 100 nodes, gems, jbox 10

In [12]:
%%time
random.seed(1)

PARAMS['n_cols']      = 100
EV_PARAMS['gems']     = True
EV_PARAMS['j_box_size'] = 10

all_measurements['100,10'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 

  output_errors = np.average((y_true - y_pred) ** 2, axis=0,


3, 4, 5, 6, 7, 8, 9, [0.26187475023799439, 5.3010941359160054e+27, 3.7107273129710895e+28, 0.35537053927680029, 0.053220856417509885, 688, 68.799999999999997, 11609, 1160.9000000000001, 0, 0.0]
CPU times: user 3min 22s, sys: 328 ms, total: 3min 22s
Wall time: 3min 23s


In [14]:
import pandas as pd
df = pd.DataFrame.from_dict(all_measurements, orient='index')
df.columns = ['best fitness', 'mean of last gen', 'std of last gen', 'mean of best individual', 'std of best indvidiual',
             'g_better', 'g_better avg', 'g_worse', 'g_worse avg', 'g_same', 'g_same avg']
df

Unnamed: 0,best fitness,mean of last gen,std of last gen,mean of best individual,std of best indvidiual,g_better,g_better avg,g_worse,g_worse avg,g_same,g_same avg
100,0.167763,1.584563e+23,1.109194e+24,0.403315,0.113264,0,0.0,0,0.0,0,0.0
500,0.0,0.8267535,1.198643,0.281724,0.148743,0,0.0,0,0.0,0,0.0
1000,0.121879,1.769067e+23,1.238347e+24,0.289068,0.094108,0,0.0,0,0.0,0,0.0
105,0.199883,6.338253e+23,4.436777e+24,0.369859,0.107823,1023,102.3,11890,1189.0,0,0.0
505,0.073259,2.535301e+24,1.774711e+25,0.330497,0.129589,764,76.4,15786,1578.6,0,0.0
1005,0.122266,1.516766e+24,1.061736e+25,0.345975,0.107954,513,51.3,11630,1163.0,0,0.0
1010,0.167763,41238210.0,288667400.0,0.37513,0.102827,1835,183.5,11630,1163.0,0,0.0
5010,0.073259,5.714365e+23,4.000056e+24,0.346955,0.123732,737,73.7,17288,1728.8,0,0.0
10010,0.261875,5.301094e+27,3.710727e+28,0.355371,0.053221,688,68.8,11609,1160.9,0,0.0
