# Thesis work--2
# A new method for multivariate regression problem for improving fitness in Genetic Programming

### Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from random import randrange, uniform
from sklearn.utils import check_random_state
from sklearn.metrics import mean_squared_error

### Global variables and functions for tuning

In [2]:
FEATURES = 2
NUMBER_OF_GENERATION = 20
ROWS = 300
NUMBER_OF_REGIONS = 3
formula = lambda X: X[:, 0]**2 - X[:, 1]**2 + X[:, 1] - 1

### Generating Training Samples

In [3]:
rng = check_random_state(0)
X = rng.uniform(-1, 1, ROWS).reshape(ROWS//FEATURES, FEATURES)
Y = formula(X)

### Splitting the dataset into the Training set and Test set

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state = 0)

### Applying PCA

In [5]:
from sklearn.decomposition import PCA
pca = PCA(n_components = 1)
#print(X_train)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)
explained_variance = pca.explained_variance_ratio_
#print(X_train_pca)
#print(X_test_pca)
# print(np.shape(X_train),np.shape(X_test))
#print(explained_variance)

### Calculating DV

In [6]:
# sort X_train_pca and y_train by index
sorted_indexes = np.argsort(X_train_pca,axis=0)
sorted_x_train_pca = X_train_pca[sorted_indexes]
sorted_y_train = y_train[sorted_indexes]

# Finding Change of Slope
slope1 = []
slope2 = []
for itr in range(1,len(sorted_x_train_pca)):
    slope1.append((sorted_y_train[itr]-sorted_y_train[itr-1])/(sorted_x_train_pca[itr]-sorted_x_train_pca[itr-1]))
for itr in range(1,len(slope1)):
    slope2.append((slope1[itr]-slope1[itr-1])/(sorted_x_train_pca[itr]-sorted_x_train_pca[itr-1]))

# normalize slope2 
normalized_slope2 = (slope2-min(slope2))/(max(slope2)-min(slope2))

# Calculating Quantiles
normalized_slope2 = np.reshape(normalized_slope2,len(normalized_slope2))
quantile_ranges = pd.qcut(normalized_slope2,NUMBER_OF_REGIONS,labels=False,retbins=True)
quantile_ranges = quantile_ranges[1]

# Adding the difficult Vectors
difficult_points = {}
for q_ind in range(NUMBER_OF_REGIONS):
    low = quantile_ranges[q_ind]
    high = quantile_ranges[q_ind+1]
    difficult_points[q_ind] = []
    for n_ind in range(0,len(normalized_slope2)):
        if normalized_slope2[n_ind] >= low and normalized_slope2[n_ind] <= high:
            difficult_points[q_ind].append(n_ind)
print(difficult_points)

{0: [2, 3, 7, 10, 12, 14, 19, 22, 24, 25, 29, 31, 33, 35, 37, 41, 44, 49, 51, 53, 54, 56, 62, 64, 67, 72, 74, 78, 79, 82, 84, 86, 90, 94, 97, 99, 101, 103, 110, 113], 1: [0, 1, 5, 9, 11, 17, 18, 27, 36, 40, 46, 47, 48, 58, 59, 60, 65, 66, 69, 70, 75, 76, 79, 81, 87, 88, 89, 91, 92, 95, 98, 105, 106, 107, 108, 111, 114, 115, 116, 117], 2: [4, 6, 8, 11, 13, 15, 16, 20, 21, 23, 26, 28, 30, 32, 34, 38, 39, 42, 43, 45, 50, 52, 55, 57, 61, 63, 68, 71, 73, 77, 80, 83, 85, 93, 96, 100, 102, 104, 109, 112]}


### Training of converted PCA

In [8]:
from gplearn.genetic import SymbolicRegressor
est_gp = SymbolicRegressor(population_size=200,
                           generations=NUMBER_OF_GENERATION, stopping_criteria=0.01,
                           p_crossover=0.7, p_subtree_mutation=0.1,
                           p_hoist_mutation=0.05, p_point_mutation=0.1,
                           max_samples=0.9, verbose=1,
                           parsimony_coefficient=0.01, random_state=0)


### Getting the different regions of difficulty level

In [9]:
# Hard to evolve points
hard_to_evolve_x = X_train_pca[difficult_points[0]]
hard_to_evolve_y = y_train[difficult_points[0]]

# Medium to evolve points
medium_to_evolve_x = X_train_pca[difficult_points[1]]
medium_to_evolve_y = y_train[difficult_points[1]]

# Easy to evolve points
easy_to_evolve_x = X_train_pca[difficult_points[2]]
easy_to_evolve_y = y_train[difficult_points[2]]


### Feed the system hard to evolve points first

In [10]:
est_gp.fit(hard_to_evolve_x,hard_to_evolve_y)
print(est_gp._program)

    |    Population Average   |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0    46.36 103.95173308566565       15 0.5226602104681045 0.18657394344567546      3.97s
   1    11.86 23.322916748615256       33 0.3884718195129449 0.7602442076253353      4.63s
   2    11.55 5.540024311412967       35 0.39116604487612255 0.7285268396738209      4.71s
   3     9.02 1.7667541914578937       11 0.37055460998054696 0.7542954557966416      4.71s
   4     7.01 0.8683875072631347        9 0.3579271037837494 0.753341459143939      4.46s
   5     7.51 0.8936308508199103        9 0.3690778501839116 0.6529847415424794      4.27s
   6     7.03 0.8814993504082793        7 0.369011178876816 0.6078763650522377      4.03s
   7     5.42 0.7360323971326159        5 0.3550027430328087 0.8006980076021338      3.80s
   8     5.47 0.75857284449

### Feed the system Medium to evolve points keeping the current modal state appending generation by current generation

In [11]:
est_gp.set_params(generations=NUMBER_OF_GENERATION*2, warm_start=True)
est_gp.fit(medium_to_evolve_x,medium_to_evolve_y)
print(est_gp._program)

    |    Population Average   |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
  20      5.4 0.8903543725132039        5 0.3393817360350701 0.7455350791063368      0.24s
  21     5.08 0.7943857101105892        5 0.3416565270180202 0.7250619602597863      0.51s
  22     5.54 0.6894790128054473        5 0.32458809052611953 0.8786778886868923      0.70s
  23     5.57 0.8629974275560174        5 0.3406840738790801 0.6312661117174043      0.85s
  24     5.91 1.000437640319763        5 0.33425546474095513 0.7916715207533717      0.97s
  25     5.46 0.6374379955687229        5 0.3283484783056425 0.8448343986711856      1.15s
  26     5.14 0.7438375907032123        5 0.3356697665269631 0.7789428046793003      1.23s
  27     5.05 0.6642443282170993        5 0.3188566175517007 0.827713218663819      1.21s
  28     5.34 1.39118788767

### Same process but for easy to evolve points and this is our result

In [12]:
est_gp.set_params(generations=NUMBER_OF_GENERATION*3, warm_start=True)
est_gp.fit(easy_to_evolve_x,easy_to_evolve_y)
print(est_gp._program)

    |    Population Average   |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
  40     5.05 0.7225369142325638        5 0.41481548031694193 0.9397840236417476      0.19s
  41     4.86 0.798482772282206        5 0.4120304407060219 0.9648493801400273      0.29s
  42     5.29 1.4473423551602473        5 0.417675859943919 0.9140406069989542      0.43s
  43     4.85 0.8995415782111235        5 0.41958015843496965 0.8969019205794977      0.47s
  44     5.14 0.9849580389314643        5 0.4260040859823168 0.8390865726533734      0.51s
  45     3.84 0.8126980226047249        5 0.39874060087898244 1.0844579385833826      0.55s
  46     4.47 0.7722303318209905        5 0.40226990838292437 1.0526941710479052      0.58s
  47     4.32 0.7457366675863559        5 0.40233619357999334 1.0520976042742842      0.57s
  48     4.27 0.81291747

### Training of data without PCA without DV

In [13]:
# Generating random data
rng = check_random_state(0)
X = rng.uniform(-1, 1, ROWS).reshape(ROWS//FEATURES, FEATURES)
Y = formula(X)
# Dividing it into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state = 0)
# Training the system
est_gp = SymbolicRegressor(population_size=400,
                           generations=NUMBER_OF_GENERATION, stopping_criteria=0.01,
                           p_crossover=0.7, p_subtree_mutation=0.1,
                           p_hoist_mutation=0.05, p_point_mutation=0.1,
                           max_samples=0.9, verbose=1,
                           parsimony_coefficient=0.01, random_state=0)
est_gp.fit(X_train,y_train)
print(est_gp._program)

    |    Population Average   |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0    37.62 3240.640395038094        7 0.6545326193170877 0.6616696381861505      6.37s
   1     10.7 92.47048856168368       33 0.4709350621474633 0.2862887087816357      8.87s
   2     8.14 1.273082989831628        9 0.3569102194188221 0.3925020603803262      8.99s
   3     5.94 0.9222795634594203        9 0.28938466071396307 0.22662414357543156      8.46s
   4     7.12 0.9018641283795599        9 0.27050030819879933 0.39658331621190523      8.16s
   5      7.2 1.4341901879856875       13 0.2544773141806192 0.42613402596172145      8.04s
   6     8.99 1.2355242305923293        9 0.253047676152755 0.5536570046263042      7.50s
   7     9.06 0.7637524505958402       11 0.2545534634939961 0.3196812755892966      7.07s
   8     9.14 0.7732535419

### Calculate fitness 

In [14]:
x0 = X_test[:,0]
x1 = X_test[:,1]
predicted_formula_result_y = ((x0*x0)-(((x1/x1)-x1)+(x1*x1)))
fitness = mean_squared_error(y_test, predicted_formula_result_y)
print('Fitness of Training of data without PCA without DV: '+str(fitness))

Fitness of Training of data without PCA without DV: 3.4923529658221876e-33


### Training of data with PCA without DV

In [19]:
# Generating random data
rng = check_random_state(0)
X = rng.uniform(-1, 1, ROWS).reshape(ROWS//FEATURES, FEATURES)
Y = formula(X)

# Dividing it into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state = 0)

# Convert it to PCA
pca = PCA(n_components = 1)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

# Training the system
est_gp = SymbolicRegressor(population_size=400,
                           generations=NUMBER_OF_GENERATION, stopping_criteria=0.01,
                           p_crossover=0.7, p_subtree_mutation=0.1,
                           p_hoist_mutation=0.05, p_point_mutation=0.1,
                           max_samples=0.9, verbose=1,
                           parsimony_coefficient=0.01, random_state=0)
est_gp.fit(X_train_pca,y_train)
print(est_gp._program)

    |    Population Average   |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0    46.62  73.561646844872       15 0.45794064423252584 0.5832914875042755      8.85s
   1    12.11 2.2732534282980037        7 0.37893692918293836 0.5132012503244892     10.14s
   2      8.2 1.199192283177446        7 0.37817371139549316 0.5200702104114959     10.20s
   3     6.06 0.9396369646395795        7 0.38028213399020167 0.42934383959468914      9.32s
   4     5.74 0.9382336536739111        7 0.3675312186046419 0.6158526455291579      8.79s
   5     7.14 0.9837011059871148        7 0.3669262113467895 0.621297710849829      8.56s
   6     7.04 1.1911653009775511        7 0.35647760357587505 0.7153351807880591      7.88s
   7     7.15 1.3965400296965695        7 0.36429674944117313 0.6449628680003767      7.44s
   8      7.2 1.21795504

In [20]:
# Multiple runs still produces this result

### Calculate Fitness

In [22]:
x0 = X_test[:,0]
x1 = X_test[:,1]
predicted_formula_result_y = (x0-1)
fitness = mean_squared_error(y_test, predicted_formula_result_y)
print('Training of data with PCA without DV: '+str(fitness))

Training of data with PCA without DV: 1.1264060303374657


### Training of data with PCA with DV

In [49]:
def train_pca_dv(seed_value):
    # Generating random data
    rng = check_random_state(seed_value)
    X = rng.uniform(-1, 1, ROWS).reshape(ROWS//FEATURES, FEATURES)
    Y = formula(X)

    # Dividing it into training and test set
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state = 0)
    list_of_x_test.append(X_test)
    list_of_y_test.append(y_test)
    # Convert it to PCA
    pca = PCA(n_components = 1)
    X_train_pca = pca.fit_transform(X_train)
    X_test_pca = pca.transform(X_test)

    # Applying DV
    # sort X_train_pca and y_train by index
    sorted_indexes = np.argsort(X_train_pca,axis=0)
    sorted_x_train_pca = X_train_pca[sorted_indexes]
    sorted_y_train = y_train[sorted_indexes]

    # Finding Change of Slope
    slope1 = []
    slope2 = []
    for itr in range(1,len(sorted_x_train_pca)):
        slope1.append((sorted_y_train[itr]-sorted_y_train[itr-1])/(sorted_x_train_pca[itr]-sorted_x_train_pca[itr-1]))
    for itr in range(1,len(slope1)):
        slope2.append((slope1[itr]-slope1[itr-1])/(sorted_x_train_pca[itr]-sorted_x_train_pca[itr-1]))

    # normalize slope2 
    normalized_slope2 = (slope2-min(slope2))/(max(slope2)-min(slope2))

    # Calculating Quantiles
    normalized_slope2 = np.reshape(normalized_slope2,len(normalized_slope2))
    quantile_ranges = pd.qcut(normalized_slope2,NUMBER_OF_REGIONS,labels=False,retbins=True)
    quantile_ranges = quantile_ranges[1]

    # Adding the difficult Vectors
    difficult_points = {}
    for q_ind in range(NUMBER_OF_REGIONS):
        low = quantile_ranges[q_ind]
        high = quantile_ranges[q_ind+1]
        difficult_points[q_ind] = []
        for n_ind in range(0,len(normalized_slope2)):
            if normalized_slope2[n_ind] >= low and normalized_slope2[n_ind] <= high:
                difficult_points[q_ind].append(n_ind)
    # Hard to evolve points
    hard_to_evolve_x = X_train_pca[difficult_points[0]]
    hard_to_evolve_y = y_train[difficult_points[0]]

    # Medium to evolve points
    medium_to_evolve_x = X_train_pca[difficult_points[1]]
    medium_to_evolve_y = y_train[difficult_points[1]]

    # Easy to evolve points
    easy_to_evolve_x = X_train_pca[difficult_points[2]]
    easy_to_evolve_y = y_train[difficult_points[2]]

    # Training the system
    est_gp = SymbolicRegressor(population_size=400,
                               generations=NUMBER_OF_GENERATION, stopping_criteria=0.01,
                               p_crossover=0.7, p_subtree_mutation=0.1,
                               p_hoist_mutation=0.05, p_point_mutation=0.1,
                               max_samples=0.9, verbose=1,
                               parsimony_coefficient=0.01, 
                               random_state=0, 
                               init_depth=(2,17),
#                                init_method='half and half'
                              )
    est_gp.fit(hard_to_evolve_x,hard_to_evolve_y)
#     print(est_gp._program)
    est_gp.set_params(generations=NUMBER_OF_GENERATION*2, warm_start=True)
    est_gp.fit(medium_to_evolve_x,medium_to_evolve_y)
#     print(est_gp._program)
    est_gp.set_params(generations=NUMBER_OF_GENERATION*3, warm_start=True)
    est_gp.fit(easy_to_evolve_x,easy_to_evolve_y)
    print(est_gp._program)
    list_of_est_program.append(str(est_gp._program))

In [50]:
list_of_est_program = []
list_of_x_test = []
list_of_y_test = []
for itr in range(0,20):
    print('Run :',itr+1)
    train_pca_dv(itr) 

Run : 1
    |    Population Average   |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0  34438.9 1.1207478835364914e+56        7 0.6415020848222629 0.8565181100260962     66.05m
   1    20.46 1.9791146456120372        7 0.4516293623188091 0.5385158797199848     32.14m
   2      8.3 1.5232141056159176        5 0.38509015737798596 0.7548553836238179     20.55m
   3     7.24 8.50713673663683       11 0.3443478711539696 0.9108005944479538     14.97m
   4     5.85 1.2343594113532774       11 0.3834076910299042 0.5592622155645431     11.37m
   5     6.06 3.9579073733524943        5 0.355481330777238 1.02133482303055      9.02m
   6      5.3 1.7054419961940102        5 0.3572065149295345 1.0058081656598816      7.43m
   7     5.39 4.069511418643701        5 0.364536872022341 0.9398349518246225      6.17m
   8     6.16 1.07989

  19      1.7 19.193045555117227        1 0.3597325428646895 1.0496613126830856      0.00s
    |    Population Average   |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
  20     1.56 0.556563287072702        1 0.3712311903496276 1.4749293265583898      4.43s
  21     2.88 3.6060176015416867        1 0.40582289872218547 1.1636039512053689     15.22s
  22     2.54 9.111250863619206        1 0.40359239872289276 1.1836784511990033     19.88s
  23     1.42 0.5875884115968358        1 0.3867089521208598 1.3356294706172993     22.63s
  24     1.54 1.0242656821036888        1 0.40974635699349776 1.128292826763558     23.70s
  25     2.45 305316.06966201076        1 0.3801131151233208 1.3949920035951506     23.83s
  26     2.18 1.5786804482141477        1 0.3878344616021539 1.3254998852856528     23.86s
  27     1.54 0.71123640708

  39     3.46 1.2352826421502738        3 0.29711398907426506 0.7050733343639235      0.00s
    |    Population Average   |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
  40     4.28 0.8666327623606555        3 0.4205480231797404 0.8048573408963697      3.42s
  41     3.71 1.0091967007158442        3 0.41066813522863876 0.8937763324562845      6.80s
  42     3.47 0.726554631379158        3 0.417778243546947 0.8297853575915108      9.13s
  43     3.24 0.7580706135481943        3 0.4160918591182095 0.8449628174501478     10.24s
  44     3.72 2.442480460707094        5 0.39402911525506007 0.45520530690977357     11.30s
  45     3.53 0.7621783380024604        5 0.35184173780910677 0.8348917039233527     12.23s
  46     4.26 0.988263127739282        5 0.3324285716351905 1.0096101994885993     12.08s
  47     5.44 2.7283614094

  58     3.74 0.7716430883908708        3 0.3123525127382629 0.7402928537922591      2.38s
  59     3.84 0.9321753706405239        3 0.31658347753308586 0.7022141706388529      0.00s
add(-0.974, X0)
Run : 5
    |    Population Average   |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0  34438.9 2.444310580832821e+67        7 0.6283777133042467 0.4502364321986234     71.92m
   1    21.89 1.8904594173032172        3 0.4901872165430656 0.8915185832983346     34.93m
   2     5.92 1.1473058891470211        5 0.4654087623633697 0.9830079797721102     22.33m
   3     5.52 3.7268865790920542        5 0.4676766948736532 0.9625965871795592     16.21m
   4     4.51 0.90342786994773        5 0.4554792797263541 1.0723733235052513     12.32m
   5     5.03 1.9999055887270965        5 0.46066346441569966 1.0257156613011411      9.75m


  17     2.64 16.101418588369015        1 0.4287033293840514 1.2161234586934384     33.73s
  18     5.14 4.36531963921835        1 0.4410519915138646 1.1049854995251194     16.21s
  19      1.7 28.193860060980832        1 0.4334908958555004 1.1730353604503971      0.00s
    |    Population Average   |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
  20     1.56 0.5488431460870925        1 0.42743061623937184 0.9980269529827006      5.09s
  21     2.88 3.0847947900437136        3 0.37722533585549756 0.6813246693518942     17.06s
  22     2.62 6.403260948589679        3 0.3555219222348913 0.8766553919373505     21.50s
  23     2.37 0.5824924831316594        3 0.33416357270309766 1.0688805377234933     24.42s
  24     3.38 1.0094984881519584        3 0.3345693114199871 1.0652288892714885     25.32s
  25     4.28 172.745404107

  36     1.66 0.7088708527357042        1 0.4292301369225422 1.34018127759054     10.12s
  37     1.54 0.67848990983341        1 0.42545781226460433 1.3741321995119804      6.76s
  38      3.0 1.0825066865325508        1 0.42827208518332954 1.3488037432434536      3.48s
  39      1.6 1.066882157177605        1 0.4214263223247372 1.4104156089707844      0.00s
    |    Population Average   |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
  40     2.44 0.6971350686024792        1 0.3854417251698155 1.12641638899858      3.55s
  41     1.97 0.8059450654835072        1 0.3990379801474417 1.0040500941999446      7.18s
  42     1.72 0.5710172059566238        1 0.3942744683979903 1.0469216999450075      9.67s
  43     1.45 0.5668024822940803        1 0.3825932322431236 1.1520528253388078     10.74s
  44     1.88 0.884079272217493 

  56     7.18 1.4116340565572012        5 0.39502403983024187 1.0600826981477929      6.75s
  57     5.52 0.8144901381190331        5 0.3853166724732605 1.0889405377991437      4.78s
  58     5.59 0.7549724979184097        5 0.37763268531400457 1.1580964222324472      2.42s
  59     5.73 0.9022023096937036        5 0.3936644088160125 1.0138109107143767      0.00s
add(-0.922, add(X0, -0.232))
Run : 9
    |    Population Average   |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0  34438.9 5.3871607276320605e+53        7 0.5162663230211522 0.5091816894625708     75.40m
   1    19.54 2.6897071869908706        7 0.4115298409918557 0.4449856195879005     36.70m
   2     6.46 1.2057775763996696       15 0.3521218590319399 0.7305402558199388     23.45m
   3     6.65 5.622719488583563       19 0.34096200189451475 0.499628459639

  15     6.45 1.8875922023952245        3 0.2814873464731618 0.6604255576882201      1.29m
  16      3.9 0.679796288223358        3 0.27219976068531315 0.7440138297788583     55.26s
  17     4.43 11.715406474795065        3 0.27876893841136413 0.6848912302443996     35.21s
  18     6.98 10.526579915687675        3 0.2704501832731634 0.759760026488206     16.91s
  19     3.54 14.53623030010532        3 0.2729265294418041 0.7374729109704393      0.00s
    |    Population Average   |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
  20     3.47 0.5404909162713833        3 0.2637946078356646 0.7985030244120082      4.81s
  21     4.66 4.263690605252251        3 0.2712756929564202 0.7311732583252082     15.77s
  22     4.36 14.084065294782453        3 0.2755442599648579 0.692756155249269     19.98s
  23     3.26 0.61332255732132

  35     3.69 2.231017720291566        3 0.3585286822481854 0.8064122709405592     11.78s
  36     3.52 0.785220558313152        3 0.3436894326349621 0.9399655174595687      9.29s
  37     3.36 0.7651228657802649        3 0.3622483332424864 0.7729354119918499      6.21s
  38     4.76 3.2341561539084918        3 0.35335050081439273 0.8530159038446925      3.21s
  39     3.46 1.9155344009965865        3 0.36107797645152656 0.7834686231104886      0.00s
    |    Population Average   |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
  40     4.28 0.8791678185039621        3 0.33242166535466466 0.8499963078808488      3.46s
  41     3.71 0.9832329839803836        3 0.3008378230461663 0.8131221220898373      6.95s
  42     3.47 0.6212686400331701        3 0.30293637556628017 0.7942351494088127      9.34s
  43     3.24 0.633540345

  54     3.79 0.7741271869292629        3 0.3410370642264462 0.8178921811359776      8.64s
  55     4.14 0.8479811358122646        3 0.3369308807978069 0.8548478319937309      7.44s
  56     5.38 1.4426388469248865        3 0.34431925664158136 0.7883524493997607      5.86s
  57     3.55 0.7125651676674803        3 0.3427466065347764 0.8025063003610055      4.18s
  58     3.74 0.6726322838362703        3 0.3372775697483039 0.8668929622099439      2.11s
  59     3.84 0.892664789849959        3 0.3461167932481539 0.7721746199406079      0.00s
add(-0.922, X0)
Run : 13
    |    Population Average   |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0  34438.9 7.834609059225135e+69        7 0.5694383657913098 0.838829295198541     65.81m
   1    21.33 1.8632881643306005        3 0.41481972654717914 0.27222064508668187     32.00

  13     4.55 4.441889116223956        1 0.4577176624104389 1.1601710005965258      1.99m
  14     1.58 1.709826494548398        1 0.4467167537032419 1.2591791789612987      1.57m
  15     4.72 1.2853744106445466        1 0.4595396542424678 1.1437730741082657      1.19m
  16     2.13 0.7184328905634743        1 0.44322261714389183 1.290626407995449     51.32s
  17     2.64 1.3768299352751878        1 0.43590689355174966 1.3564679203247285     32.70s
  18     5.14 7.0716259018500205        1 0.44943996895438826 1.2346702417009816     15.70s
  19      1.7 19.338778322370946        1 0.4461790252527326 1.2640187350158822      0.00s
    |    Population Average   |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
  20     1.56 0.6171331507395834        1 0.46068082906549307 1.367409832652475      4.60s
  21     2.88 3.60516525616

  33     5.04 1.4746182207339273        3 0.36052431367976373 0.8078057840717324     16.36s
  34     3.95 1.0503046463821901        3 0.3622780111376391 0.791333804450849     14.05s
  35     3.69 4.098489070173824        3 0.35924052480888913 0.8193598839096039     11.69s
  36     3.52 0.7684472336556333        3 0.3574463827466736 0.834818459969538      9.23s
  37     3.36 0.7392402078833679        3 0.36471757700868923 0.7693777116113971      6.17s
  38     4.76 1.2099785972924317        3 0.36039213346470333 0.8089954060072759      3.19s
  39     3.46 0.8925839651220758        3 0.3688045929926995 0.7332832702553107      0.00s
    |    Population Average   |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
  40     4.28 0.9116989381626885        3 0.3893404698546207 0.7621682584741556      3.40s
  41     3.71 1.0489458308

  53     3.44 52.26903302361204        3 0.45949819187845997 1.2910039395195823      9.25s
  54     3.62 1.4913794851790692        3 0.4403945960836306 1.4629363016730463      8.36s
  55     4.12 1.2771727462867741        3 0.4744384684294292 1.1565414505608582      7.22s
  56     5.37 1.1279168780062836        3 0.4748343305595135 1.1529786913900995      5.68s
  57     3.55 1.1280871814959856        3 0.4386611308701565 1.4785374885943128      4.03s
  58     3.74 1.088798180577835        3 0.4745564248030919 1.1554798431978943      2.04s
  59     3.84 1.5428246819680334        3 0.4615383666220423 1.2726423668273408      0.00s
sub(-0.985, X0)
Run : 17
    |    Population Average   |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0  34438.9 1.23946099622042e+63        7 0.5502554234831221 1.3572478655280098     65.12m
 

  12     3.56 1.4661121415224665        3 0.38718661056281295 0.90276831033107      2.49m
  13     6.34 56.72584568620212        3 0.3945268960131746 0.8367057412778153      2.03m
  14     3.42 1.7111376652416743        3 0.4000026656067373 0.7874238149357509      1.60m
  15     6.45 1.086881117113076        3 0.39315599431488985 0.8490438565623782      1.22m
  16      3.9 0.8085294498784585        3 0.3939505919859924 0.8418924775224553     52.38s
  17     4.43 22.494205212432153        3 0.3894938203182522 0.882003422532117     33.40s
  18     6.98 6.005969285703444        3 0.38660253575310377 0.9080249836184526     16.05s
  19     3.54 12.377819547194722        3 0.38620398835598785 0.9116119101924963      0.00s
    |    Population Average   |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
  20     3.47 0.6814822058401

  32     3.18 0.5542680180759139        3 0.24442195746514483 0.6566933416517596     17.86s
  33     5.04 1.0364104382951982        3 0.24271014526977108 0.6720996514101238     16.35s
  34     3.95 0.7485214686733397        3 0.24251775880533305 0.6738311295900659     14.01s
  35     3.69 1.5928454284918445        3 0.2536032907566612 0.5740613420281118     11.67s
  36     3.52 0.6039966082123246        3 0.23508185307507162 0.7407542811624184      9.21s
  37     3.36 0.7320878617819822        3 0.2380468998079627 0.7140688605663984      6.16s
  38     4.76 1.5528843917471147        3 0.24854334088858832 0.6196008908407684      3.18s
  39     3.46 0.6609223280252801        3 0.2465097942278092 0.6379028107877808      0.00s
    |    Population Average   |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
  40     4.28 0.834403

  52     3.45 0.8235039546044627        3 0.4803357295885118 1.1377789665723772     10.58s
  53      3.8 0.9075969064155717        3 0.47195113720415827 1.2132402980315593      9.70s
  54     3.79 0.9076408695492176        3 0.47683169363162126 1.1693152901843926      8.75s
  55     4.14 0.9730616485037973        3 0.4758880536555343 1.1778080499691743      7.53s
  56     5.38 1.1383218463939695        3 0.4677905484747778 1.2506855965959836      5.92s
  57     3.55 0.7810150643641728        3 0.4842274002768741 1.1027539303771168      4.19s
  58     3.74 0.7534813954653649        3 0.4864180942562791 1.0830376845624716      2.12s
  59     3.84 0.8370801996494314        3 0.4742465974553508 1.1925811557708266      0.00s
add(X0, -0.974)


In [51]:
print('Predicted functions of ' +str(len(list_of_est_program)) +' Runs with tree depth (2,17)')
for ind in range(0,len(list_of_est_program)):
    print('Function '+str(ind)+' '+list_of_est_program[ind])

Predicted functions of 20 Runs with tree depth (2,17)
Function 0 add(-0.848, sub(X0, mul(X0, X0)))
Function 1 -0.723
Function 2 add(-0.993, add(X0, -0.197))
Function 3 add(-0.974, X0)
Function 4 -0.824
Function 5 add(X0, -0.974)
Function 6 -0.765
Function 7 add(-0.922, add(X0, -0.232))
Function 8 -0.823
Function 9 sub(-0.911, X0)
Function 10 add(-0.993, X0)
Function 11 add(-0.922, X0)
Function 12 sub(-0.986, X0)
Function 13 -0.823
Function 14 sub(X0, sub(0.888, -0.335))
Function 15 sub(-0.985, X0)
Function 16 sub(-0.902, X0)
Function 17 add(-0.922, X0)
Function 18 -0.697
Function 19 add(X0, -0.974)


In [38]:
print('Predicted functions of ' +str(len(list_of_est_program)) +' Runs with tree depth (6,10)')
for ind in range(0,len(list_of_est_program)):
    print('Function '+str(ind)+' '+list_of_est_program[ind])

Predicted functions of 20 Runs with tree depth (6,10)
Function 0 sub(sub(X0, 0.778), mul(X0, X0))
Function 1 -0.801
Function 2 sub(X0, div(-0.957, -0.885))
Function 3 sub(sub(X0, 0.778), mul(X0, X0))
Function 4 -0.871
Function 5 sub(X0, div(X0, X0))
Function 6 -0.781
Function 7 sub(X0, sub(0.699, -0.444))
Function 8 sub(mul(X0, -0.458), 0.898)
Function 9 sub(-0.986, X0)
Function 10 sub(X0, 0.991)
Function 11 sub(X0, 0.902)
Function 12 sub(sub(0.081, X0), div(0.278, 0.231))
Function 13 -0.926
Function 14 sub(X0, add(0.729, 0.341))
Function 15 -0.785
Function 16 sub(mul(X0, 0.224), sub(X0, -0.947))
Function 17 add(-0.974, X0)
Function 18 -0.769
Function 19 add(-0.929, X0)


In [26]:
print('Predicted functions of ' +str(len(list_of_est_program)) +' Runs with tree depth (2,6)')
for ind in range(0,len(list_of_est_program)):
    print('Function '+str(ind)+' '+list_of_est_program[ind])

Predicted functions of 20 Runs with tree depth (2,6)
Function 0 sub(-0.749, sub(mul(X0, X0), X0))
Function 1 -0.713
Function 2 add(sub(X0, 0.159), -0.965)
Function 3 sub(add(-0.834, X0), mul(X0, X0))
Function 4 -0.803
Function 5 sub(-0.931, mul(X0, -0.606))
Function 6 -0.765
Function 7 sub(sub(X0, 0.159), 0.971)
Function 8 -0.931
Function 9 sub(-0.931, X0)
Function 10 add(-0.986, X0)
Function 11 sub(sub(X0, 0.834), mul(X0, X0))
Function 12 sub(sub(-0.440, X0), 0.671)
Function 13 -0.749
Function 14 sub(sub(X0, 0.159), 0.946)
Function 15 -0.781
Function 16 sub(-0.781, add(X0, 0.366))
Function 17 add(mul(0.674, X0), -0.965)
Function 18 -0.637
Function 19 add(X0, -0.974)


### Calculate fitness

In [30]:
x0 = X_test[:,0]
x1 = X_test[:,1]
predicted_formula_result_y = (x0-.877)-(x0*x0)
fitness = mean_squared_error(y_test, predicted_formula_result_y)
print('Training of data with PCA with DV: '+str(fitness))

Training of data with PCA with DV: 1.6638831596176011
