# Binary classification, Single mutation

In [1]:
from pycgp.benchmarks.classification import X_train, y_train, X_test, y_test, PARAMS, EV_PARAMS
from pycgp.evolution import evolution
import random
import numpy as np

Statistics we want to measure for classification task:
- best train error achieved
- mean and std of best train errors
- best test error achieved
- mean and std of best test errors
- mean and std of last generation's train error
- mean and std of last generation's test error

In [2]:
def run_experiment(PARAMS,EV_PARAMS):
    train_stat = []
    test_stat = []
    for i in range(0, 5):
        print(i, end=', ')
        result = evolution(PARAMS, EV_PARAMS, X_train, y_train)
        
        train_stat.append([x.fitness for x in result['final']])
        test_stat.append([EV_PARAMS['cost_func'](y_test, x.execute(X_test)) for x in result['final']   ])
    
    train_results = [
        np.min(train_stat), # best fitness
        np.mean(train_stat), # mean of fitnesses of all last generations
        np.std(train_stat), # std of fitnesses of all last generations
        np.mean(np.min(train_stat, axis=1)), # mean of best individuals from run
        np.std(np.min(train_stat, axis=1)) # std of best individuals from run
    ]
    
    test_results = [
        np.min(test_stat), # best fitness
        np.mean(test_stat), # mean of fitnesses of all last generations
        np.std(test_stat), # std of fitnesses of all last generations
        np.mean(np.min(test_stat, axis=1)), # mean of best individuals from run
        np.std(np.min(test_stat, axis=1)) # std of best individuals from run
    ]
    print('Train: ', train_results)
    print('Test: ', test_results)
    return train_results, test_results
        
        

In [3]:
train_measurements = {}
test_measurements = {}

In [4]:
from pycgp.mutation import single_mutation
from pycgp.gems import GemSM, MatchSMStrategy
EV_PARAMS['gem_type'] = GemSM
EV_PARAMS['mutation'] = single_mutation
EV_PARAMS['match_strategy'] = MatchSMStrategy

### BinClassification, PM, 10 nodes

In [5]:
%%time
random.seed(1)

key = '10,false'
results = run_experiment(PARAMS, EV_PARAMS)
train_measurements[key] = results[0]
test_measurements[key] = results[1]

0, 1, 2, 3, 4, Train:  [-0.91959798994974873, -0.7212060301507538, 0.19186700906604634, -0.89899497487437185, 0.027172876049438764]
Test:  [-0.90058479532163738, -0.71134502923976628, 0.18240275122314853, -0.87134502923976598, 0.034695665436704853]
CPU times: user 3min 59s, sys: 516 ms, total: 3min 59s
Wall time: 4min 24s


### BinClassification, PM, 50 nodes

In [6]:
%%time
random.seed(1)

PARAMS['n_cols'] = 50
EV_PARAMS['gems'] = False

key = '50,false'
results = run_experiment(PARAMS, EV_PARAMS)
train_measurements[key] = results[0]
test_measurements[key] = results[1]

0, 1, 2, 3, 4, Train:  [-0.94221105527638194, -0.71798994974874375, 0.22788407854368789, -0.92261306532663334, 0.023322213840949322]
Test:  [-0.92982456140350878, -0.69918128654970757, 0.21556720037845623, -0.88538011695906427, 0.032370415211925269]
CPU times: user 5min 20s, sys: 609 ms, total: 5min 20s
Wall time: 5min 35s


### BinClassification, PM, 100 nodes

In [7]:
%%time
random.seed(1)

PARAMS['n_cols'] = 100
EV_PARAMS['gems'] = False

key = '100,false'
results = run_experiment(PARAMS, EV_PARAMS)
train_measurements[key] = results[0]
test_measurements[key] = results[1]

0, 1, 2, 3, 4, Train:  [-0.95226130653266328, -0.75819095477386933, 0.24451176945886255, -0.93919597989949755, 0.0073511250443858032]
Test:  [-0.92397660818713445, -0.73754385964912272, 0.23027763622912378, -0.90877192982456134, 0.012596876741835105]
CPU times: user 5min 32s, sys: 266 ms, total: 5min 33s
Wall time: 5min 43s


### BinClassification, PM, 10 nodes, gems

In [8]:
%%time
random.seed(1)

PARAMS['n_cols'] = 10
EV_PARAMS['gems'] = True
EV_PARAMS['j_box_size'] = 5

key = '10,True'
results = run_experiment(PARAMS, EV_PARAMS)
train_measurements[key] = results[0]
test_measurements[key] = results[1]

0, 1, 2, 3, 4, Train:  [-0.91959798994974873, -0.6571859296482413, 0.16704152519543664, -0.88994974874371857, 0.0460670015532295]
Test:  [-0.92397660818713445, -0.65520467836257312, 0.15912004528101481, -0.87017543859649127, 0.050631253384592637]
CPU times: user 2min 8s, sys: 234 ms, total: 2min 8s
Wall time: 2min 12s


### BinClassification, PM, 50 nodes, gems

In [9]:
%%time
random.seed(1)

PARAMS['n_cols'] = 50
EV_PARAMS['gems'] = True
EV_PARAMS['j_box_size'] = 5

key = '50,True'
results = run_experiment(PARAMS, EV_PARAMS)
train_measurements[key] = results[0]
test_measurements[key] = results[1]

0, 1, 2, 3, 4, Train:  [-0.96733668341708545, -0.77427135678391967, 0.19860206884102607, -0.9206030150753769, 0.032744391070643031]
Test:  [-0.97660818713450293, -0.76491228070175443, 0.20195007036972054, -0.91461988304093578, 0.050115287217844577]
CPU times: user 3min 16s, sys: 125 ms, total: 3min 16s
Wall time: 51min 27s


### BinClassification, PM, 100 nodes, gems

In [10]:
%%time
random.seed(1)

PARAMS['n_cols'] = 100
EV_PARAMS['gems'] = True
EV_PARAMS['j_box_size'] = 5

key = '100,True'
results = run_experiment(PARAMS, EV_PARAMS)
train_measurements[key] = results[0]
test_measurements[key] = results[1]

0, 1, 2, 3, 4, Train:  [-0.95477386934673369, -0.76412060301507523, 0.19217715075232286, -0.93115577889447232, 0.025946393253116762]
Test:  [-0.91228070175438591, -0.74619883040935675, 0.17459446041578566, -0.89473684210526305, 0.027178830499701167]
CPU times: user 3min 37s, sys: 46.9 ms, total: 3min 37s
Wall time: 3min 38s


### BinClassification, PM, 10 nodes, gems, 10

In [11]:
%%time
random.seed(1)

PARAMS['n_cols'] = 10
EV_PARAMS['gems'] = True
EV_PARAMS['j_box_size'] = 10

key = '10,True,10'
results = run_experiment(PARAMS, EV_PARAMS)
train_measurements[key] = results[0]
test_measurements[key] = results[1]

0, 1, 2, 3, 4, Train:  [-0.91959798994974873, -0.63678391959798997, 0.18988337358295315, -0.91105527638190953, 0.0094813880724186912]
Test:  [-0.92397660818713445, -0.63040935672514609, 0.18419474544861644, -0.88421052631578934, 0.020392509677383243]
CPU times: user 2min 38s, sys: 297 ms, total: 2min 38s
Wall time: 2min 44s


### BinClassification, PM, 50 nodes, gems, 10

In [12]:
%%time
random.seed(1)

PARAMS['n_cols'] = 50
EV_PARAMS['gems'] = True
EV_PARAMS['j_box_size'] = 10

key = '50,True,10'
results = run_experiment(PARAMS, EV_PARAMS)
train_measurements[key] = results[0]
test_measurements[key] = results[1]

0, 1, 2, 3, 4, Train:  [-0.96733668341708545, -0.80371859296482417, 0.19514875943011642, -0.93919597989949755, 0.015682901039524872]
Test:  [-0.97660818713450293, -0.78994152046783628, 0.18825628683758125, -0.90760233918128663, 0.041251911318241873]
CPU times: user 3min 32s, sys: 156 ms, total: 3min 32s
Wall time: 3min 33s


### BinClassification, PM, 100 nodes, gems, 10

In [13]:
%%time
random.seed(1)

PARAMS['n_cols'] = 100
EV_PARAMS['gems'] = True
EV_PARAMS['j_box_size'] = 10

key = '100,True,10'
results = run_experiment(PARAMS, EV_PARAMS)
train_measurements[key] = results[0]
test_measurements[key] = results[1]

0, 1, 2, 3, 4, Train:  [-0.957286432160804, -0.73899497487437171, 0.21563623860406231, -0.9402010050251256, 0.011720506321297069]
Test:  [-0.95906432748538006, -0.72912280701754384, 0.2062775952234428, -0.92046783625730977, 0.0257841025556124]
CPU times: user 4min 26s, sys: 78.1 ms, total: 4min 26s
Wall time: 4min 27s


In [14]:
import pandas as pd
df = pd.DataFrame.from_dict(train_measurements, orient='index')
df.columns = ['best fitness', 'mean of last gen', 'std of last gen', 'mean of best individual', 'std of best indvidiual']
df

Unnamed: 0,best fitness,mean of last gen,std of last gen,mean of best individual,std of best indvidiual
"10,false",-0.919598,-0.721206,0.191867,-0.898995,0.027173
"50,false",-0.942211,-0.71799,0.227884,-0.922613,0.023322
"100,false",-0.952261,-0.758191,0.244512,-0.939196,0.007351
"10,True",-0.919598,-0.657186,0.167042,-0.88995,0.046067
"50,True",-0.967337,-0.774271,0.198602,-0.920603,0.032744
"100,True",-0.954774,-0.764121,0.192177,-0.931156,0.025946
"10,True,10",-0.919598,-0.636784,0.189883,-0.911055,0.009481
"50,True,10",-0.967337,-0.803719,0.195149,-0.939196,0.015683
"100,True,10",-0.957286,-0.738995,0.215636,-0.940201,0.011721


In [15]:
import pandas as pd
df = pd.DataFrame.from_dict(test_measurements, orient='index')
df.columns = ['best fitness', 'mean of last gen', 'std of last gen', 'mean of best individual', 'std of best indvidiual']
df

Unnamed: 0,best fitness,mean of last gen,std of last gen,mean of best individual,std of best indvidiual
"10,false",-0.900585,-0.711345,0.182403,-0.871345,0.034696
"50,false",-0.929825,-0.699181,0.215567,-0.88538,0.03237
"100,false",-0.923977,-0.737544,0.230278,-0.908772,0.012597
"10,True",-0.923977,-0.655205,0.15912,-0.870175,0.050631
"50,True",-0.976608,-0.764912,0.20195,-0.91462,0.050115
"100,True",-0.912281,-0.746199,0.174594,-0.894737,0.027179
"10,True,10",-0.923977,-0.630409,0.184195,-0.884211,0.020393
"50,True,10",-0.976608,-0.789942,0.188256,-0.907602,0.041252
"100,True,10",-0.959064,-0.729123,0.206278,-0.920468,0.025784
