In [1]:
from pycgp.benchmarks.classification import X_train, y_train, X_test, y_test, PARAMS, EV_PARAMS
from pycgp.evolution import evolution
import random
import numpy as np

Statistics we want to measure for classification task:
- best train error achieved
- mean and std of best train errors
- best test error achieved
- mean and std of best test errors
- mean and std of last generation's train error
- mean and std of last generation's test error

In [2]:
def run_experiment(PARAMS,EV_PARAMS):
    train_stat = []
    test_stat = []
    for i in range(0, 5):
        print(i, end=', ')
        result = evolution(PARAMS, EV_PARAMS, X_train, y_train)
        
        train_stat.append([x.fitness for x in result['final']])
        test_stat.append([EV_PARAMS['cost_func'](y_test, x.execute(X_test)) for x in result['final']   ])
    
    train_results = [
        np.min(train_stat), # best fitness
        np.mean(train_stat), # mean of fitnesses of all last generations
        np.std(train_stat), # std of fitnesses of all last generations
        np.mean(np.min(train_stat, axis=1)), # mean of best individuals from run
        np.std(np.min(train_stat, axis=1)) # std of best individuals from run
    ]
    
    test_results = [
        np.min(test_stat), # best fitness
        np.mean(test_stat), # mean of fitnesses of all last generations
        np.std(test_stat), # std of fitnesses of all last generations
        np.mean(np.min(test_stat, axis=1)), # mean of best individuals from run
        np.std(np.min(test_stat, axis=1)) # std of best individuals from run
    ]
    print('Train: ', train_results)
    print('Test: ', test_results)
    return train_results, test_results
        
        

In [3]:
train_measurements = {}
test_measurements = {}

### BinClassification, PM, 10 nodes

In [4]:
%%time
random.seed(1)

key = '10,false'
results = run_experiment(PARAMS, EV_PARAMS)
train_measurements[key] = results[0]
test_measurements[key] = results[1]

0, 1, 2, 3, 4, Train:  [-0.94221105527638194, -0.78954773869346739, 0.16938666156201132, -0.89899497487437185, 0.035190233969872566]
Test:  [-0.92982456140350878, -0.77590643274853799, 0.15604916626933713, -0.87836257309941534, 0.032956731704340068]
CPU times: user 4min 31s, sys: 891 ms, total: 4min 32s
Wall time: 5min 24s


### BinClassification, PM, 50 nodes

In [5]:
%%time
random.seed(1)

PARAMS['n_cols'] = 50
EV_PARAMS['gems'] = False

key = '50,false'
results = run_experiment(PARAMS, EV_PARAMS)
train_measurements[key] = results[0]
test_measurements[key] = results[1]

0, 1, 2, 3, 4, Train:  [-0.95979899497487442, -0.85497487437185937, 0.18786110736851766, -0.93266331658291457, 0.020062778671032303]
Test:  [-0.92982456140350878, -0.82643274853801174, 0.17904839864828093, -0.89941520467836256, 0.02379764906359743]
CPU times: user 11min, sys: 3.16 s, total: 11min 3s
Wall time: 15min 45s


### BinClassification, PM, 100 nodes

In [6]:
%%time
random.seed(1)

PARAMS['n_cols'] = 100
EV_PARAMS['gems'] = False

key = '100,false'
results = run_experiment(PARAMS, EV_PARAMS)
train_measurements[key] = results[0]
test_measurements[key] = results[1]

0, 1, 2, 3, 4, Train:  [-0.95477386934673369, -0.84090452261306525, 0.224377030670764, -0.93115577889447232, 0.021177193473070861]
Test:  [-0.93567251461988299, -0.81450292397660828, 0.20466512906258533, -0.89590643274853787, 0.031687057740687877]
CPU times: user 16min 35s, sys: 1.72 s, total: 16min 36s
Wall time: 17min 46s


### BinClassification, PM, 10 nodes, gems

In [7]:
%%time
random.seed(1)

PARAMS['n_cols'] = 10
EV_PARAMS['gems'] = True

key = '10,True'
results = run_experiment(PARAMS, EV_PARAMS)
train_measurements[key] = results[0]
test_measurements[key] = results[1]

0, 1, 2, 3, 4, Train:  [-0.92964824120603018, -0.76462311557788953, 0.20368529599742949, -0.88793969849246235, 0.032395500424695883]
Test:  [-0.89473684210526316, -0.74502923976608182, 0.18699556245600066, -0.85263157894736852, 0.026516453915215531]
CPU times: user 4min 37s, sys: 500 ms, total: 4min 37s
Wall time: 4min 54s


### BinClassification, PM, 50 nodes, gems

In [8]:
%%time
random.seed(1)

PARAMS['n_cols'] = 50
EV_PARAMS['gems'] = True

key = '50,True'
results = run_experiment(PARAMS, EV_PARAMS)
train_measurements[key] = results[0]
test_measurements[key] = results[1]

0, 1, 2, 3, 4, Train:  [-0.95477386934673369, -0.85356783919598, 0.16817965649466998, -0.93919597989949755, 0.012349955504969362]
Test:  [-0.92397660818713445, -0.82736842105263164, 0.14926946119719081, -0.89941520467836256, 0.025463790710031863]
CPU times: user 9min 21s, sys: 812 ms, total: 9min 21s
Wall time: 10min 9s


### BinClassification, PM, 100 nodes, gems

In [9]:
%%time
random.seed(1)

PARAMS['n_cols'] = 100
EV_PARAMS['gems'] = True

key = '100,True'
results = run_experiment(PARAMS, EV_PARAMS)
train_measurements[key] = results[0]
test_measurements[key] = results[1]

0, 1, 2, 3, 4, Train:  [-0.95477386934673369, -0.79748743718592963, 0.23579461620826334, -0.92814070351758793, 0.020876962892302512]
Test:  [-0.92397660818713445, -0.77964912280701748, 0.22514373811976476, -0.90175438596491231, 0.019360170008475841]
CPU times: user 16min 33s, sys: 1.58 s, total: 16min 34s
Wall time: 17min 29s


### BinClassification, PM, 10 nodes, gems, 5

In [12]:
%%time
random.seed(1)

PARAMS['n_cols'] = 10
EV_PARAMS['gems'] = True
EV_PARAMS['j_box_size'] = 10

key = '10,True,10'
results = run_experiment(PARAMS, EV_PARAMS)
train_measurements[key] = results[0]
test_measurements[key] = results[1]

0, 1, 2, 3, 4, Train:  [-0.94723618090452266, -0.76653266331658287, 0.17617336523956406, -0.87939698492462315, 0.047751915651073784]
Test:  [-0.92982456140350878, -0.74619883040935675, 0.1673209686675981, -0.84561403508771915, 0.049289608200480645]
CPU times: user 5min 20s, sys: 453 ms, total: 5min 21s
Wall time: 5min 44s


In [None]:
%%time
random.seed(1)

PARAMS['n_cols'] = 50
EV_PARAMS['gems'] = True
EV_PARAMS['j_box_size'] = 10

key = '50,True,10'
results = run_experiment(PARAMS, EV_PARAMS)
train_measurements[key] = results[0]
test_measurements[key] = results[1]

0, 1, 2, 3, 4, 

In [None]:
%%time
random.seed(1)

PARAMS['n_cols'] = 100
EV_PARAMS['gems'] = True
EV_PARAMS['j_box_size'] = 10

key = '100,True,10'
results = run_experiment(PARAMS, EV_PARAMS)
train_measurements[key] = results[0]
test_measurements[key] = results[1]

In [10]:
import pandas as pd
df = pd.DataFrame.from_dict(train_measurements, orient='index')
df.columns = ['best fitness', 'mean of last gen', 'std of last gen', 'mean of best individual', 'std of best indvidiual']
df

Unnamed: 0,best fitness,mean of last gen,std of last gen,mean of best individual,std of best indvidiual
"10,false",-0.942211,-0.789548,0.169387,-0.898995,0.03519
"50,false",-0.959799,-0.854975,0.187861,-0.932663,0.020063
"100,false",-0.954774,-0.840905,0.224377,-0.931156,0.021177
"10,True",-0.929648,-0.764623,0.203685,-0.88794,0.032396
"50,True",-0.954774,-0.853568,0.16818,-0.939196,0.01235
"100,True",-0.954774,-0.797487,0.235795,-0.928141,0.020877


In [11]:
import pandas as pd
df = pd.DataFrame.from_dict(test_measurements, orient='index')
df.columns = ['best fitness', 'mean of last gen', 'std of last gen', 'mean of best individual', 'std of best indvidiual']
df

Unnamed: 0,best fitness,mean of last gen,std of last gen,mean of best individual,std of best indvidiual
"10,false",-0.929825,-0.775906,0.156049,-0.878363,0.032957
"50,false",-0.929825,-0.826433,0.179048,-0.899415,0.023798
"100,false",-0.935673,-0.814503,0.204665,-0.895906,0.031687
"10,True",-0.894737,-0.745029,0.186996,-0.852632,0.026516
"50,True",-0.923977,-0.827368,0.149269,-0.899415,0.025464
"100,True",-0.923977,-0.779649,0.225144,-0.901754,0.01936
