# Symbolic regression benchmark, Probabilistic mutation

In [1]:
from pycgp.benchmarks.symbolic import PARAMS, EV_PARAMS, X, y, target_function
from pycgp.evolution import evolution
from pycgp.counter import Counter
import random
import numpy as np

In [2]:
def run_experiment(params, ev_params, x, y):
    rstat = []
    n_better = []
    n_worse = []
    n_same = []
    for i in range(0, 10):
        print(i, end=', ')

        result = evolution(PARAMS, EV_PARAMS, X, y)

        rstat.append([EV_PARAMS['cost_func'](y, individual.execute(X)) for individual in result['final']])
        n_better.append(Counter.get().dict['g_better'])
        n_worse.append(Counter.get().dict['g_worse'])
        n_same.append(Counter.get().dict['g_same_as_parent'])
    
    #print('Best fitness: {}'.format(np.min(stats)))
    #print('mean and std of fitness of last generation: {}, {}'.format(np.mean(stats), np.std(stats)))
    #print('Mean and std of best fitness: {}, {}'.format(np.mean(np.min(stats, axis=1)), np.std(np.min(stats, axis=1))))
    # best fitness, mean of last generation, std of last generation, mean of best individual, std of best individual
    results = [
        np.min(rstat), np.mean(rstat), np.std(rstat), np.mean(np.min(rstat, axis=1)), np.std(np.min(rstat, axis=1)),
        np.sum(n_better), np.mean(n_better),
        np.sum(n_worse), np.mean(n_worse),
        np.sum(n_same), np.mean(n_same)
    ]
    print(results)
    return results

## Probabilistic mutation

In [3]:
from pycgp.mutation import probabilistic_mutation
from pycgp.gems import MatchSMStrategy, GemSM
EV_PARAMS['gem_type'] = GemSM
EV_PARAMS['match_strategy'] = MatchSMStrategy
EV_PARAMS['mutation'] = probabilistic_mutation
all_measurements = {}

### ProbM, 10 nodes

In [4]:
%%time
random.seed(1)

PARAMS['n_cols']      = 10
EV_PARAMS['gems']     = False

all_measurements['10,false'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.11761416387514366, 332.44934324353596, 2256.6560028539516, 0.22964535027716329, 0.096359742757995553, 0, 0.0, 0, 0.0, 0, 0.0]
CPU times: user 1min 56s, sys: 172 ms, total: 1min 56s
Wall time: 1min 57s


### ProbM, 50 nodes

In [5]:
%%time
random.seed(1)

PARAMS['n_cols']      = 50
EV_PARAMS['gems']     = False

all_measurements['50,false'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.15450182862392237, 4.0841549712271243e+24, 1.9386971370959768e+25, 0.20542548052163764, 0.05086463543582681, 0, 0.0, 0, 0.0, 0, 0.0]
CPU times: user 3min 39s, sys: 438 ms, total: 3min 40s
Wall time: 3min 41s


### ProbM, 100 nodes

In [6]:
%%time
random.seed(1)

PARAMS['n_cols']      = 100
EV_PARAMS['gems']     = False

all_measurements['100,false'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 

  output_errors = np.average((y_true - y_pred) ** 2, axis=0,


4, 5, 6, 7, 8, 9, [0.16776289758895796, 3.0452327573746614e+23, 2.131662930162263e+24, 0.28097085666355393, 0.092455501443875737, 0, 0.0, 0, 0.0, 0, 0.0]
CPU times: user 5min 35s, sys: 141 ms, total: 5min 35s
Wall time: 5min 39s


### ProbM, 10 nodes, gems

In [7]:
%%time
random.seed(1)

PARAMS['n_cols']      = 10
EV_PARAMS['gems']     = True

all_measurements['10,5'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.19858953760134257, 1.0826201469542883e+24, 5.3838183381464922e+24, 0.26128284461272588, 0.06540258641497973, 18, 1.8, 44, 4.4000000000000004, 0, 0.0]
CPU times: user 1min 31s, sys: 109 ms, total: 1min 31s
Wall time: 1min 32s


### ProbM, 50 nodes, gems

In [8]:
%%time
random.seed(1)

PARAMS['n_cols']      = 50
EV_PARAMS['gems']     = True

all_measurements['50,True,5'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.15450182862392237, 4.0841549712271243e+24, 1.9386971370959768e+25, 0.20542548052163764, 0.05086463543582681, 0, 0.0, 0, 0.0, 0, 0.0]
CPU times: user 2min 44s, sys: 109 ms, total: 2min 44s
Wall time: 2min 46s


### ProbM, 100 nodes, gems

In [9]:
%%time
random.seed(1)

PARAMS['n_cols']      = 100
EV_PARAMS['gems']     = True

all_measurements['100,True,5'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 

  output_errors = np.average((y_true - y_pred) ** 2, axis=0,


4, 5, 6, 7, 8, 9, [0.16776289758895796, 3.0452327573746614e+23, 2.131662930162263e+24, 0.28097085666355393, 0.092455501443875737, 0, 0.0, 0, 0.0, 0, 0.0]
CPU times: user 4min 18s, sys: 250 ms, total: 4min 18s
Wall time: 4min 21s


### ProbM, 10 nodes, gems, jBox 10

In [10]:
%%time
random.seed(1)

PARAMS['n_cols']      = 10
EV_PARAMS['gems']     = True
EV_PARAMS['j_box_size'] = 10

all_measurements['10,True,10'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.12095524544055436, 9.9390976256697243, 25.86235130255811, 0.18980812559953381, 0.024113866814439215, 46, 4.5999999999999996, 71, 7.0999999999999996, 0, 0.0]
CPU times: user 1min 32s, sys: 109 ms, total: 1min 32s
Wall time: 1min 34s


### ProbM, 50 nodes, gems, jbox 10

In [11]:
%%time
random.seed(1)

PARAMS['n_cols']      = 50
EV_PARAMS['gems']     = True
EV_PARAMS['j_box_size'] = 10

all_measurements['50,True,10'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.15450182862392237, 4.0841549712271243e+24, 1.9386971370959768e+25, 0.20542548052163764, 0.05086463543582681, 0, 0.0, 0, 0.0, 0, 0.0]
CPU times: user 2min 42s, sys: 156 ms, total: 2min 42s
Wall time: 2min 42s


### ProbM, 100 nodes, gems, jbox 10

In [12]:
%%time
random.seed(1)

PARAMS['n_cols']      = 100
EV_PARAMS['gems']     = True
EV_PARAMS['j_box_size'] = 10

all_measurements['100,True,10'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 

  output_errors = np.average((y_true - y_pred) ** 2, axis=0,


4, 5, 6, 7, 8, 9, [0.16776289758895796, 3.0452327573746614e+23, 2.131662930162263e+24, 0.28097085666355393, 0.092455501443875737, 0, 0.0, 0, 0.0, 0, 0.0]
CPU times: user 3min 58s, sys: 78.1 ms, total: 3min 58s
Wall time: 3min 58s


In [13]:
import pandas as pd
df = pd.DataFrame.from_dict(all_measurements, orient='index')
df.columns = ['best fitness', 'mean of last gen', 'std of last gen', 'mean of best individual', 'std of best indvidiual',
             'g_better', 'g_better avg', 'g_worse', 'g_worse avg', 'g_same', 'g_same avg']
df

Unnamed: 0,best fitness,mean of last gen,std of last gen,mean of best individual,std of best indvidiual,g_better,g_better avg,g_worse,g_worse avg,g_same,g_same avg
"10,false",0.117614,332.4493,2256.656,0.229645,0.09636,0,0.0,0,0.0,0,0.0
"50,false",0.154502,4.084155e+24,1.938697e+25,0.205425,0.050865,0,0.0,0,0.0,0,0.0
"100,false",0.167763,3.045233e+23,2.131663e+24,0.280971,0.092456,0,0.0,0,0.0,0,0.0
105,0.19859,1.08262e+24,5.383818e+24,0.261283,0.065403,18,1.8,44,4.4,0,0.0
"50,True,5",0.154502,4.084155e+24,1.938697e+25,0.205425,0.050865,0,0.0,0,0.0,0,0.0
"100,True,5",0.167763,3.045233e+23,2.131663e+24,0.280971,0.092456,0,0.0,0,0.0,0,0.0
"10,True,10",0.120955,9.939098,25.86235,0.189808,0.024114,46,4.6,71,7.1,0,0.0
"50,True,10",0.154502,4.084155e+24,1.938697e+25,0.205425,0.050865,0,0.0,0,0.0,0,0.0
"100,True,10",0.167763,3.045233e+23,2.131663e+24,0.280971,0.092456,0,0.0,0,0.0,0,0.0
