# Symbolic regression benchmark, Probabilistic mutation

In [1]:
from pycgp.benchmarks.symbolic import PARAMS, EV_PARAMS, X, y, target_function
from pycgp.evolution import evolution
from pycgp.counter import Counter
import random
import numpy as np

In [2]:
def run_experiment(params, ev_params, x, y):
    rstat = []
    n_better = []
    n_worse = []
    n_same = []
    for i in range(0, 10):
        print(i, end=', ')

        result = evolution(PARAMS, EV_PARAMS, X, y)

        rstat.append([EV_PARAMS['cost_func'](y, individual.execute(X)) for individual in result['final']])
        n_better.append(Counter.get().dict['g_better'])
        n_worse.append(Counter.get().dict['g_worse'])
        n_same.append(Counter.get().dict['g_same_as_parent'])
    
    #print('Best fitness: {}'.format(np.min(stats)))
    #print('mean and std of fitness of last generation: {}, {}'.format(np.mean(stats), np.std(stats)))
    #print('Mean and std of best fitness: {}, {}'.format(np.mean(np.min(stats, axis=1)), np.std(np.min(stats, axis=1))))
    # best fitness, mean of last generation, std of last generation, mean of best individual, std of best individual
    results = [
        np.min(rstat), np.mean(rstat), np.std(rstat), np.mean(np.min(rstat, axis=1)), np.std(np.min(rstat, axis=1)),
        np.sum(n_better), np.mean(n_better),
        np.sum(n_worse), np.mean(n_worse),
        np.sum(n_same), np.mean(n_same)
    ]
    print(results)
    return results

## Probabilistic mutation

In [3]:
from pycgp.mutation import probabilistic_mutation
from pycgp.gems import MatchByActiveStrategy, GemSM
EV_PARAMS['gem_type'] = GemSM
EV_PARAMS['match_strategy'] = MatchByActiveStrategy
EV_PARAMS['mutation'] = probabilistic_mutation
all_measurements = {}

### ProbM, 10 nodes

In [4]:
%%time
random.seed(1)

PARAMS['n_cols']      = 10
EV_PARAMS['gems']     = False

all_measurements['10,0'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.11761416387514366, 332.44934324353596, 2256.6560028539516, 0.22964535027716329, 0.096359742757995553, 0, 0.0, 0, 0.0, 0, 0.0]
CPU times: user 1min 56s, sys: 141 ms, total: 1min 56s
Wall time: 1min 57s


### ProbM, 50 nodes

In [5]:
%%time
random.seed(1)

PARAMS['n_cols']      = 50
EV_PARAMS['gems']     = False

all_measurements['50,0'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.15450182862392237, 4.0841549712271243e+24, 1.9386971370959768e+25, 0.20542548052163764, 0.05086463543582681, 0, 0.0, 0, 0.0, 0, 0.0]
CPU times: user 3min 41s, sys: 359 ms, total: 3min 42s
Wall time: 3min 43s


### ProbM, 100 nodes

In [6]:
%%time
random.seed(1)

PARAMS['n_cols']      = 100
EV_PARAMS['gems']     = False

all_measurements['100,0'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 

  output_errors = np.average((y_true - y_pred) ** 2, axis=0,


4, 5, 6, 7, 8, 9, [0.16776289758895796, 3.0452327573746614e+23, 2.131662930162263e+24, 0.28097085666355393, 0.092455501443875737, 0, 0.0, 0, 0.0, 0, 0.0]
CPU times: user 5min 36s, sys: 125 ms, total: 5min 37s
Wall time: 5min 40s


### ProbM, 10 nodes, gems

In [7]:
%%time
random.seed(1)

PARAMS['n_cols']      = 10
EV_PARAMS['gems']     = True

all_measurements['10,5'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.0, 1.4222876424403884e+51, 9.9560134970827187e+51, 0.20136395949774105, 0.09549642253074421, 1203, 120.3, 7256, 725.60000000000002, 0, 0.0]
CPU times: user 1min 26s, sys: 62.5 ms, total: 1min 26s
Wall time: 1min 27s


### ProbM, 50 nodes, gems

In [8]:
%%time
random.seed(1)

PARAMS['n_cols']      = 50
EV_PARAMS['gems']     = True

all_measurements['50,5'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [7.7530235841252567e-32, 1.0544561763298678e+53, 7.3811932343090743e+53, 0.20386936617968546, 0.10684778153088174, 482, 48.200000000000003, 2924, 292.39999999999998, 0, 0.0]
CPU times: user 2min 46s, sys: 141 ms, total: 2min 46s
Wall time: 2min 47s


### ProbM, 100 nodes, gems

In [9]:
%%time
random.seed(1)

PARAMS['n_cols']      = 100
EV_PARAMS['gems']     = True

all_measurements['100,5'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 

  output_errors = np.average((y_true - y_pred) ** 2, axis=0,


2, 3, 4, 5, 6, 7, 8, 9, [0.09037321765074692, 1.8583279669534572e+78, 1.3008295768674197e+79, 0.25313248995815274, 0.090102140640471423, 484, 48.399999999999999, 2214, 221.40000000000001, 0, 0.0]
CPU times: user 4min 16s, sys: 219 ms, total: 4min 17s
Wall time: 4min 19s


### ProbM, 10 nodes, gems, jBox 10

In [10]:
%%time
random.seed(1)

PARAMS['n_cols']      = 10
EV_PARAMS['gems']     = True
EV_PARAMS['j_box_size'] = 10

all_measurements['10,10'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.12095524544055436, 5.8638491288334979e+50, 4.1046943901834492e+51, 0.19832573341218868, 0.059125445847342535, 1794, 179.40000000000001, 10532, 1053.2, 0, 0.0]
CPU times: user 1min 29s, sys: 156 ms, total: 1min 29s
Wall time: 1min 31s


### ProbM, 50 nodes, gems, jbox 10

In [11]:
%%time
random.seed(1)

PARAMS['n_cols']      = 50
EV_PARAMS['gems']     = True
EV_PARAMS['j_box_size'] = 10

all_measurements['50,10'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 

  output_errors = np.average((y_true - y_pred) ** 2, axis=0,


5, 6, 7, 8, 9, [0.1858275178994177, 4.6169107097413684e+50, 3.2318374968189577e+51, 0.26352114047113562, 0.072400437342059781, 948, 94.799999999999997, 5390, 539.0, 0, 0.0]
CPU times: user 2min 42s, sys: 78.1 ms, total: 2min 42s
Wall time: 2min 42s


### ProbM, 100 nodes, gems, jbox 10

In [12]:
%%time
random.seed(1)

PARAMS['n_cols']      = 100
EV_PARAMS['gems']     = True
EV_PARAMS['j_box_size'] = 10

all_measurements['100,10'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 

  output_errors = np.average((y_true - y_pred) ** 2, axis=0,


2, 3, 4, 5, 6, 7, 8, 9, [0.17717909640147766, 1.8583279669534572e+78, 1.3008295768674197e+79, 0.29817659988355028, 0.10227418733834077, 592, 59.200000000000003, 4044, 404.39999999999998, 0, 0.0]
CPU times: user 4min 1s, sys: 156 ms, total: 4min 1s
Wall time: 4min 2s


In [13]:
import pandas as pd
df = pd.DataFrame.from_dict(all_measurements, orient='index')
df.columns = ['best fitness', 'mean of last gen', 'std of last gen', 'mean of best individual', 'std of best indvidiual',
             'g_better', 'g_better avg', 'g_worse', 'g_worse avg', 'g_same', 'g_same avg']
df

Unnamed: 0,best fitness,mean of last gen,std of last gen,mean of best individual,std of best indvidiual,g_better,g_better avg,g_worse,g_worse avg,g_same,g_same avg
100,0.1176142,332.4493,2256.656,0.229645,0.09636,0,0.0,0,0.0,0,0.0
500,0.1545018,4.084155e+24,1.938697e+25,0.205425,0.050865,0,0.0,0,0.0,0,0.0
1000,0.1677629,3.045233e+23,2.131663e+24,0.280971,0.092456,0,0.0,0,0.0,0,0.0
105,0.0,1.4222879999999999e+51,9.956013e+51,0.201364,0.095496,1203,120.3,7256,725.6,0,0.0
505,7.753024000000001e-32,1.054456e+53,7.381193e+53,0.203869,0.106848,482,48.2,2924,292.4,0,0.0
1005,0.09037322,1.8583279999999997e+78,1.30083e+79,0.253132,0.090102,484,48.4,2214,221.4,0,0.0
1010,0.1209552,5.863849e+50,4.104694e+51,0.198326,0.059125,1794,179.4,10532,1053.2,0,0.0
5010,0.1858275,4.616911e+50,3.231837e+51,0.263521,0.0724,948,94.8,5390,539.0,0,0.0
10010,0.1771791,1.8583279999999997e+78,1.30083e+79,0.298177,0.102274,592,59.2,4044,404.4,0,0.0
