### Libraries

In [12]:
from gplearn.genetic import SymbolicTransformer
from gplearn.fitness import make_fitness
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MaxAbsScaler
from sklearn.datasets import load_boston
from sklearn.utils import check_random_state

In [13]:
import cufflinks as cf
cf.go_offline()

### Load

In [14]:
rng = check_random_state(0)
boston = load_boston()
perm = rng.permutation(boston.target.size)
boston.data = boston.data[perm]
boston.target = boston.target[perm]
# 
Y = boston.target
X = boston.data

In [15]:
#split
x_train1, x_test1, y_train1, y_test1 = train_test_split(X, Y, test_size=0.2, random_state = 5)

#normalize
scaler = MaxAbsScaler().fit(x_train1)
X_train = scaler.transform(x_train1); X_test = scaler.transform(x_test1)

scaler1 = MaxAbsScaler().fit(y_train1.reshape(-1, 1))
y_train = scaler1.transform(y_train1.reshape(-1, 1)).reshape(-1)
y_test = scaler1.transform(y_test1.reshape(-1, 1)).reshape(-1)

### functions

In [16]:
# custom metric
def _mape(y, y_pred, w):
    diffs = np.abs(np.divide((np.maximum(0.001, y) - np.maximum(0.001, y_pred)),np.maximum(0.001, y)))
    
    return 100. * np.average(diffs, weights=w)

mape = make_fitness(_mape, greater_is_better=False)

### model

In [19]:
function_set = ['add', 'sub', 'mul', 'div',
                'sqrt', 'log', 'abs', 'neg', 'inv',
                'max', 'min']
model = SymbolicTransformer(generations=40, population_size=20000,
                         hall_of_fame=100, n_components=10, 
                         init_method = "half and half", 
#                          p_hoist_mutation = 0.05, p_crossover = 0.6, p_subtree_mutation = 0.3,
                         function_set=function_set, metric = mape,
                         parsimony_coefficient=0.0005,
                         max_samples=0.9, verbose=1,
                         random_state=0)
model.fit(X_train, y_train)

    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0    11.35          2333.88       19          21.1003          20.8942     17.32m
   1     8.70          159.484       19          20.3613          27.4366     16.90m
   2     8.47          108.667       16          17.9577           21.492     15.63m
   3    10.65           100.03       14          15.7383          10.3593     15.44m
   4    14.69          122.732       14          14.6474          20.0172     16.64m
   5    18.61          104.792       18           14.636           19.093     17.23m
   6    21.24          80.2295       39          13.8813           17.884     16.26m
   7    27.81          83.3482       35          14.1482          19.3171     17.59m
   8    35.60          83.0146       47          13.6098          12.5427  

SymbolicTransformer(const_range=(-1.0, 1.0), feature_names=None,
                    function_set=['add', 'sub', 'mul', 'div', 'sqrt', 'log',
                                  'abs', 'neg', 'inv', 'max', 'min'],
                    generations=40, hall_of_fame=100, init_depth=(2, 6),
                    init_method='half and half', low_memory=False,
                    max_samples=0.9,
                    metric=<gplearn.fitness._Fitness object at 0x000001CE2ACA7748>,
                    n_components=10, n_jobs=1, p_crossover=0.9,
                    p_hoist_mutation=0.01, p_point_mutation=0.01,
                    p_point_replace=0.05, p_subtree_mutation=0.01,
                    parsimony_coefficient=0.0005, population_size=20000,
                    random_state=0, stopping_criteria=1.0, tournament_size=20,
                    verbose=1, warm_start=False)

In [None]:
print(model._best_programs[0])
# model._Program

In [24]:
gp_features = model.transform(boston.data)
new_boston = np.hstack((X, gp_features))

In [29]:
pd.DataFrame(new_boston).to_csv("..//Data//new_boston.csv")