In [1]:
import sympy
import numpy as np
from matplotlib import pyplot as plt
from pysr import PySRRegressor
from sklearn.model_selection import train_test_split
from scipy.special import jn,j0, factorial

Detected IPython. Loading juliacall extension. See https://juliapy.github.io/PythonCall.jl/stable/compat/#IPython


**Goal**: Obtain the Taylor series expansion of synthetic data generated from cosine and Bessel function

\begin{aligned}
\cos(x) &= 1 - \frac{1}{2}x^2 + \frac{1}{24}x^4 - \frac{1}{720}x^6 + \frac{1}{40320}x^8 - \cdots
\end{aligned}


In [2]:
# Dataset
np.random.seed(0)
# Small values of x
x = np.linspace(-2, 2, 500)
noise = 0.001 * np.random.randn(len(x))
y = np.cos(x) + noise

In [3]:
default_pysr_params = dict(
    populations=30,
    model_selection="best",
)

In [4]:
model = PySRRegressor(
    niterations=300,
    populations=30,
    binary_operators=["+", "*"],
    unary_operators=["sq(x) = x^2/2", "exa(x) = x^6"],
    extra_sympy_mappings={"sq": lambda x: x**2/2, "exa" : lambda x: x**6},
    #maxsize = 20
)
model.fit(x.reshape(-1, 1), y)

Compiling Julia backend...
[ Info: Started!



Expressions evaluated per second: 2.430e+05
Progress: 1385 / 9000 total iterations (15.389%)
════════════════════════════════════════════════════════════════════════════════════════════════════
───────────────────────────────────────────────────────────────────────────────────────────────────
Complexity  Loss       Score      Equation
1           1.998e-01  0.000e+00  y = 0.4528
5           2.083e-02  5.652e-01  y = sq(sq(x₀) + -1.4798)
6           1.816e-03  2.440e+00  y = (sq(x₀) * -0.74316) + 0.9503
7           1.816e-03  2.384e-07  y = ((x₀ * x₀) * -0.37158) + 0.9503
8           1.816e-03  1.669e-06  y = (sq(x₀ + 7.3007e-05) * -0.74316) + 0.9503
9           1.816e-03  2.980e-07  y = ((x₀ + 0.00016237) * (x₀ * -0.37158)) + 0.9503
10          1.615e-03  1.172e-01  y = exa((x₀ * (x₀ * -0.0047209)) + 1.6871) + -22.096
11          2.961e-06  6.302e+00  y = (((sq(x₀) * 0.1386) + -0.98171) * sq(x₀)) + 0.9982
14          1.249e-06  2.877e-01  y = (sq(x₀ * (((x₀ * x₀) * -0.056143) + 1.4298

[ Info: Final population:
[ Info: Results saved to:


0,1,2
,model_selection,'best'
,binary_operators,"['+', '*']"
,unary_operators,"['sq(x) = x^2/2', 'exa(x) = x^6']"
,expression_spec,
,niterations,300
,populations,30
,population_size,27
,max_evals,
,maxsize,30
,maxdepth,


  - outputs/20250926_103811_tsde74/hall_of_fame.csv


In [5]:
model.sympy()

(x0**2*(1.3602041 + (x0**2/2)*(-0.106374405))**2/2 - 1.8619431)*(-0.53669125)

In [6]:
# Define the symbolic variable
x0 = sympy.symbols('x0')

# Build each part of the expression
expr = (sympy.simplify(model.sympy()))

# Optionally simplify or expand the expression
expanded_expr = sympy.expand(expr)
expanded_expr

-0.000759117171754911*x0**6 + 0.0388271704796444*x0**4 - 0.496481051788833*x0**2 + 0.999288569767875

In [7]:
1/factorial(6), 1/factorial(4)

(0.001388888888888889, 0.041666666666666664)

In [8]:
sympy.latex(expanded_expr)

'- 0.000759117171754911 x_{0}^{6} + 0.0388271704796444 x_{0}^{4} - 0.496481051788833 x_{0}^{2} + 0.999288569767875'

<ul>
Results around $x = 0$ in the range $\left[-1, +1\right]$
<li> obtained for niter=100, maxsize=20, noise=0.001 and fitting with sq: $$0.0404522973487644 x_{0}^{4} - 0.49966508442707 x_{0}^{2} + 0.9999080045205$$
<li> obtained for niter=100, maxsize=20, noise=0.001 and fitting with only + and *: $$0.04024306 x_{0}^{4} - 0.49952888 x_{0}^{2} + 0.99990594$$
Increasing maxsize in the range $\left[-2, +2\right]$
<li> obtained for niter=100, maxsize=30, noise=0.001 and fitting with sq and exa: $$- 0.000759117171754911 x_{0}^{6} + 0.0388271704796444 x_{0}^{4} - 0.496481051788833 x_{0}^{2} + 0.999288569767875$$
<li> obtained for niter=100, maxsize=30, noise=0.001 and fitting with only + and *: $$- 0.0011299928 x_{0}^{6} + 0.04083892 x_{0}^{4} - 0.49913806 x_{0}^{2} + 0.9997868$$
</ul>

# Bessel Function

$$J_0(x) = 1 - \frac{1}{4}x^2 + \frac{1}{64}x^4 - \frac{1}{2304}x^6 + \frac{1}{147456}x^8 - \cdots $$

In [20]:
# Dataset
np.random.seed(0)
# Small values of x
x = np.linspace(-3, 3, 500)
noise = 0.001 * np.random.randn(len(x))
y = jn(0,x) + noise

In [21]:
model = PySRRegressor(
    niterations=300,
    populations=30,
    binary_operators=["+", "*"],
    #unary_operators=["sq(x) = x^2/2", "exa(x) = x^6"],
    #extra_sympy_mappings={"sq": lambda x: x**2/2, "exa" : lambda x: x**6},
    #maxsize = 20
)
model.fit(x.reshape(-1, 1), y)

[ Info: Started!



Expressions evaluated per second: 2.810e+05
Progress: 1560 / 9000 total iterations (17.333%)
════════════════════════════════════════════════════════════════════════════════════════════════════
───────────────────────────────────────────────────────────────────────────────────────────────────
Complexity  Loss       Score      Equation
1           1.712e-01  0.000e+00  y = 0.46105
7           4.727e-03  5.983e-01  y = ((x₀ * x₀) * -0.15146) + 0.91726
9           4.727e-03  4.470e-07  y = (((x₀ + 0.00018733) * -0.15146) * x₀) + 0.91726
11          4.727e-03  8.941e-08  y = ((((x₀ + 0.00023739) * x₀) + 0.11857) * -0.15147) + 0....
                                      93522
13          2.225e-05  2.679e+00  y = ((((x₀ * 0.011027) * x₀) + -0.23686) * (x₀ * x₀)) + 0....
                                      99443
15          2.225e-05  9.505e-05  y = ((((x₀ * 0.011026) * x₀) + -0.23686) * ((x₀ + 0.000210...
                                      27) * x₀)) + 0.99442
17          2.225e-05  3

[ Info: Final population:
[ Info: Results saved to:


0,1,2
,model_selection,'best'
,binary_operators,"['+', '*']"
,unary_operators,
,expression_spec,
,niterations,300
,populations,30
,population_size,27
,max_evals,
,maxsize,30
,maxdepth,


  - outputs/20250926_105001_SzqzwM/hall_of_fame.csv


In [22]:
model.sympy()

x0*x0*(x0*x0*(x0*x0*(-0.00032506048) + 0.015031787) - 0.24892703) + 0.9996159

In [23]:
# Define the symbolic variable
x0 = sympy.symbols('x0')

# Build each part of the expression
expr = (sympy.simplify(model.sympy()))

# Optionally simplify or expand the expression
expanded_expr = sympy.expand(expr)
expanded_expr

-0.00032506048*x0**6 + 0.015031787*x0**4 - 0.24892703*x0**2 + 0.9996159

In [24]:
sympy.latex(expanded_expr)

'- 0.00032506048 x_{0}^{6} + 0.015031787 x_{0}^{4} - 0.24892703 x_{0}^{2} + 0.9996159'

In [25]:
1/2304, 1/64, -1/4

(0.00043402777777777775, 0.015625, -0.25)

<ul>
Results around $x = 0$ in the range $\left[-1, +1\right]$
<li> obtained for niter=300, maxsize=20, noise=0.001 and fitting with only + and *: $$0.015487796 x_{0}^{4} - 0.24995881 x_{0}^{2} + 0.9999266$$
Increasing maxsize in the range $\left[-3, +3\right]$
<li> obtained for niter=300, maxsize=30, noise=0.001 and fitting with only + and *: $$- 0.00032506048 x_{0}^{6} + 0.015031787 x_{0}^{4} - 0.24892703 x_{0}^{2} + 0.9996159$$
</ul>