# Gem stats analysis - symbolic regression

In [3]:
import os
import pickle
from itertools import product

import numpy as np
import pandas as pd

from pycgp import probabilistic_mutation, point_mutation, single_mutation
from pycgp.gems import MatchByActiveStrategy, MatchSMStrategy, MatchPMStrategy, MatchPhenotypeStrategy

from utils import DataIterator

In [4]:
symreg = DataIterator('scripts/symbolic_out/')
bincls = DataIterator('scripts/bin_class_out/')
santaf = DataIterator('scripts/santa_fe_out/')

In [5]:
data = []

for m, s, g, c, d in symreg.iterate_folder():
    mean_of_best_individual = np.mean([x['best'].fitness for x in d])
    median_of_means_of_last_generations = np.median([x['mean_of_generation'][-1] for x in d])
    data.append([g, mean_of_best_individual, median_of_means_of_last_generations])
    
df = pd.DataFrame(data)
df.columns = ['gems', 'best mean', 'last generation mean']
df.groupby('gems').mean()

Unnamed: 0_level_0,best mean,last generation mean
gems,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0.274295,1.409832
5,0.267794,3.053452
10,0.261354,3.818298


In [5]:
from pycgp.benchmarks.classification import cost_func, X_test, y_test

data = []

for m, s, g, c, d in bincls.iterate_folder():
    train_errors = np.mean([-x['best'].fitness*100 for x in d])
    test_errors  = np.mean([-cost_func(y_test, x['best'].execute(X_test)) for x in d])
    median_of_means_of_last_generations = np.mean([-x['mean_of_generation'][-1]*100 for x in d])
    data.append([g, train_errors, test_errors, median_of_means_of_last_generations])
    
df = pd.DataFrame(data)
df.columns = ['gems', 'mean best train', 'mean best test', 'last generation mean']
df.groupby('gems').mean()

Unnamed: 0_level_0,mean best train,mean best test,last generation mean
gems,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,92.183417,0.893704,71.441039
5,92.239531,0.892495,70.858961
10,92.327471,0.895127,71.20134


In [6]:
data = []

for m, s, g, c, d in santaf.iterate_folder():
    mean_of_best_individual = np.mean([-x['best'].fitness for x in d])
    median_of_means_of_last_generations = np.mean([-x['mean_of_generation'][-1] for x in d])
    data.append([g, mean_of_best_individual, median_of_means_of_last_generations])
    
df = pd.DataFrame(data)
df.columns = ['gems', 'best mean', 'last generation mean']
df.groupby('gems').mean()

Unnamed: 0_level_0,best mean,last generation mean
gems,Unnamed: 1_level_1,Unnamed: 2_level_1
0,53.606667,33.517333
5,56.003333,34.855333
10,55.61,34.954667
