# Binary classification using probabilistic mutation

In [1]:
from pycgp.benchmarks.classification import X_train, y_train, X_test, y_test, PARAMS, EV_PARAMS
from pycgp.evolution import evolution
import random
import numpy as np

Statistics we want to measure for classification task:
- best train error achieved
- mean and std of best train errors
- best test error achieved
- mean and std of best test errors
- mean and std of last generation's train error
- mean and std of last generation's test error

In [2]:
def run_experiment(PARAMS,EV_PARAMS):
    train_stat = []
    test_stat = []
    for i in range(0, 5):
        print(i, end=', ')
        result = evolution(PARAMS, EV_PARAMS, X_train, y_train)
        
        train_stat.append([x.fitness for x in result['final']])
        test_stat.append([EV_PARAMS['cost_func'](y_test, x.execute(X_test)) for x in result['final']   ])
    
    train_results = [
        np.min(train_stat), # best fitness
        np.mean(train_stat), # mean of fitnesses of all last generations
        np.std(train_stat), # std of fitnesses of all last generations
        np.mean(np.min(train_stat, axis=1)), # mean of best individuals from run
        np.std(np.min(train_stat, axis=1)) # std of best individuals from run
    ]
    
    test_results = [
        np.min(test_stat), # best fitness
        np.mean(test_stat), # mean of fitnesses of all last generations
        np.std(test_stat), # std of fitnesses of all last generations
        np.mean(np.min(test_stat, axis=1)), # mean of best individuals from run
        np.std(np.min(test_stat, axis=1)) # std of best individuals from run
    ]
    print('Train: ', train_results)
    print('Test: ', test_results)
    return train_results, test_results
        
        

In [3]:
train_measurements = {}
test_measurements = {}

In [4]:
from pycgp.mutation import probabilistic_mutation
from pycgp.gems import GemSM, MatchSMStrategy
EV_PARAMS['gem_type'] = GemSM
EV_PARAMS['mutation'] = probabilistic_mutation
EV_PARAMS['match_strategy'] = MatchSMStrategy

### BinClassification, PM, 10 nodes

In [5]:
%%time
random.seed(1)

key = '10,false'
results = run_experiment(PARAMS, EV_PARAMS)
train_measurements[key] = results[0]
test_measurements[key] = results[1]

0, 1, 2, 3, 4, Train:  [-0.9346733668341709, -0.72522613065326635, 0.16940097248108835, -0.91859296482412067, 0.011524969790130377]
Test:  [-0.9064327485380117, -0.72070175438596507, 0.15832542516955778, -0.89824561403508763, 0.0070175438596491377]
CPU times: user 4min 24s, sys: 328 ms, total: 4min 25s
Wall time: 4min 46s


### BinClassification, PM, 50 nodes

In [6]:
%%time
random.seed(1)

PARAMS['n_cols'] = 50
EV_PARAMS['gems'] = False

key = '50,false'
results = run_experiment(PARAMS, EV_PARAMS)
train_measurements[key] = results[0]
test_measurements[key] = results[1]

0, 1, 2, 3, 4, Train:  [-0.95477386934673369, -0.64562814070351748, 0.20988967438213121, -0.92763819095477396, 0.020250695155386814]
Test:  [-0.92397660818713445, -0.63812865497076021, 0.19540440169672602, -0.89122807017543848, 0.017967592392675084]
CPU times: user 6min 30s, sys: 812 ms, total: 6min 31s
Wall time: 7min 23s


### BinClassification, PM, 100 nodes

In [7]:
%%time
random.seed(1)

PARAMS['n_cols'] = 100
EV_PARAMS['gems'] = False

key = '100,false'
results = run_experiment(PARAMS, EV_PARAMS)
train_measurements[key] = results[0]
test_measurements[key] = results[1]

0, 1, 2, 3, 4, Train:  [-0.95226130653266328, -0.67809045226130649, 0.16612478595615118, -0.93517587939698499, 0.020062778671032275]
Test:  [-0.93567251461988299, -0.66573099415204684, 0.15584706450049976, -0.90526315789473677, 0.033369222497513708]
CPU times: user 7min 14s, sys: 969 ms, total: 7min 15s
Wall time: 8min 1s


### BinClassification, PM, 10 nodes, gems

In [8]:
%%time
random.seed(1)

PARAMS['n_cols'] = 10
EV_PARAMS['gems'] = True

key = '10,True'
results = run_experiment(PARAMS, EV_PARAMS)
train_measurements[key] = results[0]
test_measurements[key] = results[1]

0, 1, 2, 3, 4, Train:  [-0.9346733668341709, -0.72522613065326635, 0.16940097248108835, -0.91859296482412067, 0.011524969790130377]
Test:  [-0.9064327485380117, -0.72070175438596507, 0.15832542516955778, -0.89824561403508763, 0.0070175438596491377]
CPU times: user 3min 11s, sys: 203 ms, total: 3min 11s
Wall time: 3min 24s


### BinClassification, PM, 50 nodes, gems

In [9]:
%%time
random.seed(1)

PARAMS['n_cols'] = 50
EV_PARAMS['gems'] = True

key = '50,True'
results = run_experiment(PARAMS, EV_PARAMS)
train_measurements[key] = results[0]
test_measurements[key] = results[1]

0, 1, 2, 3, 4, Train:  [-0.95477386934673369, -0.64562814070351748, 0.20988967438213121, -0.92763819095477396, 0.020250695155386814]
Test:  [-0.92397660818713445, -0.63812865497076021, 0.19540440169672602, -0.89122807017543848, 0.017967592392675084]
CPU times: user 4min 46s, sys: 344 ms, total: 4min 47s
Wall time: 4min 59s


### BinClassification, PM, 100 nodes, gems

In [10]:
%%time
random.seed(1)

PARAMS['n_cols'] = 100
EV_PARAMS['gems'] = True

key = '100,True'
results = run_experiment(PARAMS, EV_PARAMS)
train_measurements[key] = results[0]
test_measurements[key] = results[1]

0, 1, 2, 3, 4, Train:  [-0.95226130653266328, -0.67809045226130649, 0.16612478595615118, -0.93517587939698499, 0.020062778671032275]
Test:  [-0.93567251461988299, -0.66573099415204684, 0.15584706450049976, -0.90526315789473677, 0.033369222497513708]
CPU times: user 7min 21s, sys: 656 ms, total: 7min 22s
Wall time: 7min 58s


### BinClassification, PM, 10 nodes, gems, 5

In [11]:
%%time
random.seed(1)

PARAMS['n_cols'] = 10
EV_PARAMS['gems'] = True
EV_PARAMS['j_box_size'] = 10

key = '10,True,10'
results = run_experiment(PARAMS, EV_PARAMS)
train_measurements[key] = results[0]
test_measurements[key] = results[1]

0, 1, 2, 3, 4, Train:  [-0.9346733668341709, -0.72522613065326635, 0.16940097248108835, -0.91859296482412067, 0.011524969790130377]
Test:  [-0.9064327485380117, -0.72070175438596507, 0.15832542516955778, -0.89824561403508763, 0.0070175438596491377]
CPU times: user 3min 15s, sys: 453 ms, total: 3min 15s
Wall time: 3min 34s


In [12]:
%%time
random.seed(1)

PARAMS['n_cols'] = 50
EV_PARAMS['gems'] = True
EV_PARAMS['j_box_size'] = 10

key = '50,True,10'
results = run_experiment(PARAMS, EV_PARAMS)
train_measurements[key] = results[0]
test_measurements[key] = results[1]

0, 1, 2, 3, 4, Train:  [-0.95477386934673369, -0.64562814070351748, 0.20988967438213121, -0.92763819095477396, 0.020250695155386814]
Test:  [-0.92397660818713445, -0.63812865497076021, 0.19540440169672602, -0.89122807017543848, 0.017967592392675084]
CPU times: user 5min 27s, sys: 734 ms, total: 5min 27s
Wall time: 6min


In [13]:
%%time
random.seed(1)

PARAMS['n_cols'] = 100
EV_PARAMS['gems'] = True
EV_PARAMS['j_box_size'] = 10

key = '100,True,10'
results = run_experiment(PARAMS, EV_PARAMS)
train_measurements[key] = results[0]
test_measurements[key] = results[1]

0, 1, 2, 3, 4, Train:  [-0.95226130653266328, -0.67809045226130649, 0.16612478595615118, -0.93517587939698499, 0.020062778671032275]
Test:  [-0.93567251461988299, -0.66573099415204684, 0.15584706450049976, -0.90526315789473677, 0.033369222497513708]
CPU times: user 7min 15s, sys: 688 ms, total: 7min 16s
Wall time: 7min 57s


# Summary

In [14]:
import pandas as pd
df = pd.DataFrame.from_dict(train_measurements, orient='index')
df.columns = ['best fitness', 'mean of last gen', 'std of last gen', 'mean of best individual', 'std of best indvidiual']
df

Unnamed: 0,best fitness,mean of last gen,std of last gen,mean of best individual,std of best indvidiual
"10,false",-0.934673,-0.725226,0.169401,-0.918593,0.011525
"50,false",-0.954774,-0.645628,0.20989,-0.927638,0.020251
"100,false",-0.952261,-0.67809,0.166125,-0.935176,0.020063
"10,True",-0.934673,-0.725226,0.169401,-0.918593,0.011525
"50,True",-0.954774,-0.645628,0.20989,-0.927638,0.020251
"100,True",-0.952261,-0.67809,0.166125,-0.935176,0.020063
"10,True,10",-0.934673,-0.725226,0.169401,-0.918593,0.011525
"50,True,10",-0.954774,-0.645628,0.20989,-0.927638,0.020251
"100,True,10",-0.952261,-0.67809,0.166125,-0.935176,0.020063


In [15]:
import pandas as pd
df = pd.DataFrame.from_dict(test_measurements, orient='index')
df.columns = ['best fitness', 'mean of last gen', 'std of last gen', 'mean of best individual', 'std of best indvidiual']
df

Unnamed: 0,best fitness,mean of last gen,std of last gen,mean of best individual,std of best indvidiual
"10,false",-0.906433,-0.720702,0.158325,-0.898246,0.007018
"50,false",-0.923977,-0.638129,0.195404,-0.891228,0.017968
"100,false",-0.935673,-0.665731,0.155847,-0.905263,0.033369
"10,True",-0.906433,-0.720702,0.158325,-0.898246,0.007018
"50,True",-0.923977,-0.638129,0.195404,-0.891228,0.017968
"100,True",-0.935673,-0.665731,0.155847,-0.905263,0.033369
"10,True,10",-0.906433,-0.720702,0.158325,-0.898246,0.007018
"50,True,10",-0.923977,-0.638129,0.195404,-0.891228,0.017968
"100,True,10",-0.935673,-0.665731,0.155847,-0.905263,0.033369


In [16]:
from pycgp.counter import Counter

In [20]:
Counter.get().dict.keys()

dict_keys(['g_better', 'g_worse', 'mean', 'best', 'gens', 'g_same_as_parent'])

In [23]:
Counter.get().dict['g_same_as_parent']

0