In [1]:
import numpy as np
import pandas as pd
from gplearn.genetic import SymbolicRegressor
from sklearn.model_selection import train_test_split
from sympy import *

In [2]:
nsample = 400
sig = 0.2
x = np.linspace(-50, 50, nsample)
X = np.column_stack((x/5, 10*np.sin(x), (x-5)**3, np.ones(nsample)))
beta = [0.01, 1, 0.001, 5.]
y_true = np.dot(X, beta)
y = y_true + sig * np.random.normal(size=nsample)
df = pd.DataFrame()
df['x']=x
df['y']=y

In [3]:
X = df[['x']]
y = df['y']
y_true = y
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

In [4]:
# First Test
function_set = ['add', 'sub', 'mul', 'div','cos','sin','neg','inv']
est_gp = SymbolicRegressor(population_size=5000,function_set=function_set,
                           generations=40, stopping_criteria=0.01,
                           p_crossover=0.7, p_subtree_mutation=0.1,
                           p_hoist_mutation=0.05, p_point_mutation=0.1,
                           max_samples=0.9, verbose=1,
                           parsimony_coefficient=0.01, random_state=0,
                          feature_names=X_train.columns)

In [5]:
converter = {
    'sub': lambda x, y : x - y,
    'div': lambda x, y : x/y,
    'mul': lambda x, y : x*y,
    'add': lambda x, y : x + y,
    'neg': lambda x    : -x,
    'pow': lambda x, y : x**y,
    'sin': lambda x    : sin(x),
    'cos': lambda x    : cos(x),
    'inv': lambda x: 1/x,
    'sqrt': lambda x: x**0.5,
    'pow3': lambda x: x**3
}

In [6]:
est_gp.fit(X_train, y_train)
print('R2:',est_gp.score(X_test,y_test))
next_e = sympify((est_gp._program), locals=converter)
next_e

    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0    15.94           3204.3       26          16.2791          24.3567      4.07m
   1    11.71          514.702       11          15.9268          26.2175      2.97m
   2     9.02          78.0172        5          15.6357           34.183      2.62m
   3     8.75          64.8027        8          15.7165          27.7958      2.59m
   4     8.60          49.3552        9          15.5209          30.6865      2.47m
   5     9.19          58.6209       10          15.2393          33.5307      3.65m
   6     9.41          48.4439       23           15.644          28.6679      2.79m
   7     9.67          69.3363       18          15.5823          28.5027      2.32m
   8     9.19           48.812        8          15.4549          30.1227  

1.14942528735632*x - 9.06060606060606*sin(0.112*x) + 6.06060606060606*sin(0.184*x) - sin(0.202*x) + 9.62141378290447*sin(x) - sin(1.12107623318386*x) + 6.06060606060606*sin(0.0509778943750324*x/sin(0.112*x)) - 6.06060606060606*sin(sin(0.112*x)) + 6.06060606060606*cos(0.112*x) - 3.03668292396704

In [7]:
y_gp = est_gp.predict(X_test)
score_gp = est_gp.score(X_test, y_test)

In [8]:
from gplearn.functions import make_function

In [9]:
def pow_3(x1):
    f = x1**3
    return f
pow_3 = make_function(function=pow_3,name='pow3',arity=1)
# add the new function to the function_set
function_set = ['add', 'sub', 'mul', 'div','cos','sin','neg','inv',pow_3]
est_gp = SymbolicRegressor(population_size=5000,function_set=function_set,
                           generations=45, stopping_criteria=0.01,
                           p_crossover=0.7, p_subtree_mutation=0.1,
                           p_hoist_mutation=0.05, p_point_mutation=0.1,
                           max_samples=0.9, verbose=1,
                           parsimony_coefficient=0.01, random_state=0,
                          feature_names=X_train.columns)
est_gp.fit(X_train, y_train)
print('R2:',est_gp.score(X_test,y_test))
next_e = sympify((est_gp._program), locals=converter)
next_e

    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0    14.13       2.8592e+61       22           16.284          27.9367      5.56m
   1    10.69      2.35374e+42       20          13.8693          16.2199      5.13m
   2     9.06      8.16763e+13       19          13.5223          21.5509      3.45m
   3     8.19      6.89542e+55       20          13.6531          18.1653      2.80m
   4     9.50      3.44649e+21       22          13.2693          21.0334      2.89m
   5    14.80      1.10938e+46       25          13.1966          13.4552      3.16m
   6    20.30      1.09321e+46       19          11.3174          12.2777      3.67m
   7    22.35      5.19681e+45       25           10.991           10.632      3.38m
   8    23.77     4.92458e+132       17           10.576          12.7059  

0.0169991811784987*x + 0.0169991811784987*(0.390339208439988*x - 0.390339208439988*sin(0.952 - 1/(cos(x*sin(x)/(0.390339208439988*x - 1.7865856305669)**3) - 3.65000787401575)) - 1.53676853716531)**3 + 9.58901018958925*sin(x) + cos(0.126/x) + cos(0.255/x) + 2.71267419544457