# Symbolic regression benchmark, Probabilistic mutation

In [6]:
from pycgp.benchmarks.symbolic import PARAMS, EV_PARAMS, X, y, target_function
from pycgp.evolution import evolution
import random
import numpy as np

In [7]:
def run_experiment(params, ev_params, x, y):
    rstat = []
    for i in range(0, 10):
        print(i, end=', ')

        result = evolution(PARAMS, EV_PARAMS, X, y)

        rstat.append([EV_PARAMS['cost_func'](y, individual.execute(X)) for individual in result['final']])
    
    #print('Best fitness: {}'.format(np.min(stats)))
    #print('mean and std of fitness of last generation: {}, {}'.format(np.mean(stats), np.std(stats)))
    #print('Mean and std of best fitness: {}, {}'.format(np.mean(np.min(stats, axis=1)), np.std(np.min(stats, axis=1))))
    # best fitness, mean of last generation, std of last generation, mean of best individual, std of best individual
    results = [
        np.min(rstat), np.mean(rstat), np.std(rstat), np.mean(np.min(rstat, axis=1)), np.std(np.min(rstat, axis=1)) 
    ]
    print(results)
    return results

## Probabilistic mutation

In [8]:
from pycgp.mutation import probabilistic_mutation
from pycgp.gems import MatchSMStrategy, GemSM
EV_PARAMS['gem_type'] = GemSM
EV_PARAMS['match_strategy'] = MatchSMStrategy
EV_PARAMS['mutation'] = probabilistic_mutation
all_measurements = {}

### ProbM, 10 nodes

In [9]:
%%time
random.seed(1)

PARAMS['n_cols']      = 10
EV_PARAMS['gems']     = False

all_measurements['10,false'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.11761416387514366, 332.44934324353596, 2256.6560028539516, 0.22964535027716329, 0.096359742757995553]
CPU times: user 2min 1s, sys: 203 ms, total: 2min 1s
Wall time: 2min 6s


### ProbM, 50 nodes

In [10]:
%%time
random.seed(1)

PARAMS['n_cols']      = 50
EV_PARAMS['gems']     = False

all_measurements['50,false'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.15450182862392237, 4.0841549712271243e+24, 1.9386971370959768e+25, 0.20542548052163764, 0.05086463543582681]
CPU times: user 3min 18s, sys: 219 ms, total: 3min 18s
Wall time: 3min 20s


### ProbM, 100 nodes

In [11]:
%%time
random.seed(1)

PARAMS['n_cols']      = 100
EV_PARAMS['gems']     = False

all_measurements['100,false'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 

  output_errors = np.average((y_true - y_pred) ** 2, axis=0,


4, 5, 6, 7, 8, 9, [0.16776289758895796, 3.0452327573746614e+23, 2.131662930162263e+24, 0.28097085666355393, 0.092455501443875737]
CPU times: user 4min 3s, sys: 578 ms, total: 4min 4s
Wall time: 4min 31s


### ProbM, 10 nodes, gems

In [12]:
%%time
random.seed(1)

PARAMS['n_cols']      = 10
EV_PARAMS['gems']     = True

all_measurements['10,True,5'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.19858953760134257, 1.0826201469542883e+24, 5.3838183381464922e+24, 0.26128284461272588, 0.06540258641497973]
CPU times: user 2min 7s, sys: 281 ms, total: 2min 7s
Wall time: 2min 23s


### ProbM, 50 nodes, gems

In [13]:
%%time
random.seed(1)

PARAMS['n_cols']      = 50
EV_PARAMS['gems']     = True

all_measurements['50,True,5'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.15450182862392237, 4.0841549712271243e+24, 1.9386971370959768e+25, 0.20542548052163764, 0.05086463543582681]
CPU times: user 3min 45s, sys: 500 ms, total: 3min 45s
Wall time: 3min 58s


### ProbM, 100 nodes, gems

In [14]:
%%time
random.seed(1)

PARAMS['n_cols']      = 100
EV_PARAMS['gems']     = True

all_measurements['100,True,5'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 

  output_errors = np.average((y_true - y_pred) ** 2, axis=0,


4, 5, 6, 7, 8, 9, [0.16776289758895796, 3.0452327573746614e+23, 2.131662930162263e+24, 0.28097085666355393, 0.092455501443875737]
CPU times: user 7min 16s, sys: 516 ms, total: 7min 17s
Wall time: 7min 43s


### ProbM, 10 nodes, gems, jBox 10

In [19]:
%%time
random.seed(1)

PARAMS['n_cols']      = 10
EV_PARAMS['gems']     = True
EV_PARAMS['j_box_size'] = 10

all_measurements['10,True,10'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.12095524544055436, 9.9390976256697243, 25.86235130255811, 0.18980812559953381, 0.024113866814439215]
CPU times: user 2min 23s, sys: 359 ms, total: 2min 23s
Wall time: 2min 35s


### ProbM, 50 nodes, gems, jbox 10

In [22]:
%%time
random.seed(1)

PARAMS['n_cols']      = 50
EV_PARAMS['gems']     = True
EV_PARAMS['j_box_size'] = 10

all_measurements['50,True,10'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, [0.15450182862392237, 4.0841549712271243e+24, 1.9386971370959768e+25, 0.20542548052163764, 0.05086463543582681]
CPU times: user 2min 58s, sys: 312 ms, total: 2min 58s
Wall time: 3min 3s


### ProbM, 100 nodes, gems, jbox 10

In [17]:
%%time
random.seed(1)

PARAMS['n_cols']      = 100
EV_PARAMS['gems']     = True
EV_PARAMS['j_box_size'] = 10

all_measurements['100,True,10'] = run_experiment(PARAMS, EV_PARAMS, X, y)

0, 1, 2, 3, 

  output_errors = np.average((y_true - y_pred) ** 2, axis=0,


4, 5, 6, 7, 8, 9, [0.16776289758895796, 3.0452327573746614e+23, 2.131662930162263e+24, 0.28097085666355393, 0.092455501443875737]
CPU times: user 7min 7s, sys: 766 ms, total: 7min 7s
Wall time: 7min 42s


In [23]:
import pandas as pd
df = pd.DataFrame.from_dict(all_measurements, orient='index')
df.columns = ['best fitness', 'mean of last gen', 'std of last gen', 'mean of best individual', 'std of best indvidiual']
df

Unnamed: 0,best fitness,mean of last gen,std of last gen,mean of best individual,std of best indvidiual
"10,false",0.117614,332.4493,2256.656,0.229645,0.09636
"50,false",0.154502,4.084155e+24,1.938697e+25,0.205425,0.050865
"100,false",0.167763,3.045233e+23,2.131663e+24,0.280971,0.092456
"10,True,5",0.19859,1.08262e+24,5.383818e+24,0.261283,0.065403
"50,True,5",0.154502,4.084155e+24,1.938697e+25,0.205425,0.050865
"100,True,5",0.167763,3.045233e+23,2.131663e+24,0.280971,0.092456
"10,True,10",0.120955,9.939098,25.86235,0.189808,0.024114
"50,True,10",0.154502,4.084155e+24,1.938697e+25,0.205425,0.050865
"100,True,10",0.167763,3.045233e+23,2.131663e+24,0.280971,0.092456
