In [None]:
import numpy as np
import matplotlib.pyplot as plt

from gplearn.genetic import SymbolicRegressor
from sklearn.utils.random import check_random_state
import graphviz

## Simulate data

In [None]:
x0 = np.arange(-1, 1, 1/10.)
x1 = np.arange(-1, 1, 1/10.)
x0, x1 = np.meshgrid(x0, x1)
y_truth = x0**2 - x1**2 + x1 - 1

In [None]:
ax = plt.figure().add_subplot(projection='3d')
ax.set_xlim(-1, 1)
ax.set_ylim(-1, 1)
surf = ax.plot_surface(x0, x1, y_truth, rstride=1, cstride=1, color='green', alpha=0.5)
plt.tight_layout()
plt.show()

In [None]:
rng = check_random_state(0)

# Training samples
X_train = rng.uniform(-10, 10, 1000).reshape(500, 2)
y_train = X_train[:, 0]**2 - X_train[:, 1]**2 + X_train[:, 1] - 1

# Testing samples
X_test = rng.uniform(-10, 10, 1000).reshape(500, 2)
y_test = X_test[:, 0]**2 - X_test[:, 1]**2 + X_test[:, 1] - 1

## Fit model

In [None]:
est_gp = SymbolicRegressor(population_size=5000,
                           generations=20, stopping_criteria=0.01,
                           p_crossover=0.7, p_subtree_mutation=0.1,
                           p_hoist_mutation=0.05, p_point_mutation=0.1,
                           max_samples=0.9, verbose=1,
                           parsimony_coefficient=0.01, random_state=0)

In [None]:
est_gp.fit(X_train, y_train)

In [None]:
y_pred = est_gp.predict(X_test)

In [None]:
plt.scatter(y_pred, y_test)
plt.plot([y_pred.min(), y_pred.max()], [y_pred.min(), y_pred.max()], color='C1')

In [None]:
np.corrcoef(y_pred, y_test)[0, 1]

[Source - gplearn](https://gplearn.readthedocs.io/en/stable/examples.html)

## Newtonian gravity

In [None]:
G = 6.6743 * 10 ** -11
n = 10 ** 3

m_train = 10**np.random.uniform(0,8,(n, 2))
m_test = 10**np.random.uniform(0,8,(n, 2))

r_train = 10**np.random.uniform(0,4,(n, 1))
r_test = 10**np.random.uniform(0,4,(n, 1))

train = np.hstack((m_train, r_train))
test = np.hstack((m_test, r_test))

train_log = np.log10(train)
test_log = np.log10(test)

noise = np.random.randn(n)

f_train = G * train[:, 0] * train[:, 1] / train[:, 2] ** 2
f_test = G * test[:, 0] * test[:, 1] / test[:, 2] ** 2

f_train_log = np.log10(f_train) + noise
f_test_log = np.log10(f_test)

In [None]:
fun_set = ('add', 'sub', 'div')
est_gp = SymbolicRegressor(population_size=10000,
                           generations=50, stopping_criteria=0.75,
                           function_set=fun_set,
                           p_crossover=0.55, p_subtree_mutation=0.15,
                           p_hoist_mutation=0.1, p_point_mutation=0.15,
                           max_samples=0.9, verbose=1,
                           parsimony_coefficient=0.01)

In [None]:
est_gp.fit(train_log, f_train_log)

In [None]:
dot_data = est_gp._program.export_graphviz()
graph = graphviz.Source(dot_data)

In [None]:
graph

In [None]:
y_pred = est_gp.predict(test_log)

In [None]:
plt.scatter(y_pred, f_test_log)
plt.plot([y_pred.min(), y_pred.max()], [y_pred.min(), y_pred.max()], color='C1')

In [None]:
np.corrcoef(y_pred, f_test_log)[0, 1]

In [None]:
resid = 10**y_pred-10**f_test_log

In [None]:
fig, axs = plt.subplots(1,2, figsize=[10,5])
axs[0].plot(resid)
axs[0].set_yscale('log')
axs[1].hist(np.log10(resid), bins=50)
plt.tight_layout()