In [1]:
import optuna
from pysr import PySRRegressor
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

  from .autonotebook import tqdm as notebook_tqdm


Detected IPython. Loading juliacall extension. See https://juliapy.github.io/PythonCall.jl/stable/compat/#IPython


In [4]:
# 1. Prepare toy data
X = np.random.randn(200, 5)
y = 2.5 * np.cos(X[:, 2]) + X[:, 0]**2 - 0.3 * X[:, 1]

X_train, X_valid, y_train, y_valid = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 2. Define Optuna objective
def objective(trial: optuna.Trial) -> float:
    # Suggest PySR hyperparameters
    pop_size = trial.suggest_int("population_size", 50, 500)
    n_iters = trial.suggest_int("niterations", 10, 100)
    parsimony = trial.suggest_float("parsimony", 0.001, 0.1, log=True)
    optimizer_restarts = trial.suggest_int("optimizer_nrestarts", 1, 10)

    model = PySRRegressor(
        population_size=pop_size,
        niterations=n_iters,
        parsimony=parsimony,
        optimizer_nrestarts=optimizer_restarts,
        # you can add more PySR settings here...
    )

    # Fit and evaluate
    model.fit(X_train, y_train)
    y_pred = model.predict(X_valid)
    return mean_squared_error(y_valid, y_pred)

# 3. Create and run the study
sampler = optuna.samplers.TPESampler()  # default TPE sampler :contentReference[oaicite:2]{index=2}
pruner = optuna.pruners.MedianPruner()  # stop unpromising trials early :contentReference[oaicite:3]{index=3}

study = optuna.create_study(
    storage = "sqlite:///db.sqlite3",
    study_name = "pysr-test_study",
    direction="minimize",
    sampler=sampler,
    pruner=pruner
)
study.optimize(objective, n_trials=50, timeout=600)

print("Best MSE:", study.best_value)
print("Best params:", study.best_params)

[I 2025-04-27 11:03:37,426] A new study created in RDB with name: pysr-test_study
[ Info: Started!



Expressions evaluated per second: 1.610e+05
Progress: 63 / 341 total iterations (18.475%)
════════════════════════════════════════════════════════════════════════════════════════════════════
───────────────────────────────────────────────────────────────────────────────────────────────────
Complexity  Loss       Score      Equation
1           2.521e+00  1.594e+01  y = 2.5392
5           1.246e+00  1.761e-01  y = (x₀ * x₀) + 1.5219
7           1.178e+00  2.828e-02  y = (x₀ * (x₀ / 1.2258)) + 1.7094
9           1.154e+00  1.024e-02  y = (x₁ * -0.3257) + ((x₀ * x₀) + 1.5293)
11          1.089e+00  2.902e-02  y = ((x₁ * -0.31956) + ((x₀ * 0.82043) * x₀)) - -1.7118
15          5.849e-01  1.553e-01  y = (-0.32566 * (x₂ * x₂)) + ((x₀ * (0.88948 * x₀)) - (-1....
                                      5293 / 0.90682))
───────────────────────────────────────────────────────────────────────────────────────────────────
══════════════════════════════════════════════════════════════════════════════

[ Info: Final population:
[ Info: Results saved to:
[I 2025-04-27 11:04:03,597] Trial 0 finished with value: 0.020423910166040882 and parameters: {'population_size': 283, 'niterations': 11, 'parsimony': 0.03237795731318641, 'optimizer_nrestarts': 10}. Best is trial 0 with value: 0.020423910166040882.
[ Info: Started!


  - outputs\20250427_110339_RLwBcB\hall_of_fame.csv

Expressions evaluated per second: 2.480e+05
Progress: 94 / 2821 total iterations (3.332%)
════════════════════════════════════════════════════════════════════════════════════════════════════
───────────────────────────────────────────────────────────────────────────────────────────────────
Complexity  Loss       Score      Equation
1           2.521e+00  1.594e+01  y = 2.5392
5           1.246e+00  1.761e-01  y = (x₀ * x₀) + 1.5221
7           1.178e+00  2.828e-02  y = ((x₀ * 0.81575) * x₀) + 1.7095
9           1.154e+00  1.024e-02  y = ((x₀ * x₀) - -1.5293) - (x₁ * 0.32563)
11          1.263e-01  1.106e+00  y = (x₀ * x₀) + ((x₂ * (x₂ * -0.83902)) + 2.3284)
13          1.235e-01  1.119e-02  y = (x₀ * (x₀ + 0.052602)) + ((x₂ * (x₂ * -0.83625)) + 2.3...
                                      286)
15          1.235e-01  5.960e-08  y = ((x₀ * (x₀ + 0.052607)) - 0.19482) + ((x₂ * (x₂ * -0.8...
                                      3625)) +

[ Info: Final population:
[ Info: Results saved to:


───────────────────────────────────────────────────────────────────────────────────────────────────
Complexity  Loss       Score      Equation
1           2.521e+00  1.594e+01  y = 2.5392
5           1.246e+00  1.761e-01  y = (x₀ * x₀) + 1.5221
7           1.178e+00  2.828e-02  y = ((x₀ * 0.81575) * x₀) + 1.7095
9           1.675e-01  9.751e-01  y = (x₀ * x₀) - ((x₂ * x₂) + -2.4832)
11          1.263e-01  1.413e-01  y = (x₀ * x₀) + ((x₂ * (x₂ * -0.83902)) + 2.3284)
13          9.496e-02  1.425e-01  y = (x₀ * x₀) + ((x₁ * -0.28852) - ((x₂ * x₂) + -2.4895))
15          5.064e-02  3.144e-01  y = ((x₂ * x₂) * -0.83302) + (((x₁ / -3.3931) - -2.3292) +...
                                       (x₀ * x₀))
17          4.483e-02  6.095e-02  y = ((x₁ / (x₂ + -3.4459)) + ((x₂ * x₂) * -0.85232)) + ((x...
                                      ₀ * x₀) - -2.3416)
19          4.204e-02  3.209e-02  y = ((x₂ * x₂) * -0.87321) + (((x₁ / ((x₂ + -2.9076) * 1.3...
                                      731))

[I 2025-04-27 11:06:15,938] Trial 1 finished with value: 0.011345767261082029 and parameters: {'population_size': 362, 'niterations': 91, 'parsimony': 0.04964004847139617, 'optimizer_nrestarts': 6}. Best is trial 1 with value: 0.011345767261082029.
[ Info: Started!


  - outputs\20250427_110405_QYohNr\hall_of_fame.csv

Expressions evaluated per second: 1.890e+05
Progress: 461 / 527 total iterations (87.476%)
════════════════════════════════════════════════════════════════════════════════════════════════════
───────────────────────────────────────────────────────────────────────────────────────────────────
Complexity  Loss       Score      Equation
1           2.521e+00  1.594e+01  y = 2.5392
5           1.246e+00  1.761e-01  y = (x₀ * x₀) + 1.5222
7           1.178e+00  2.828e-02  y = ((x₀ * x₀) + 2.0957) * 0.81572
9           1.675e-01  9.751e-01  y = ((x₀ * x₀) + 2.4832) - (x₂ * x₂)
11          1.263e-01  1.413e-01  y = ((2.7752 - (x₂ * x₂)) * 0.83901) + (x₀ * x₀)
13          9.496e-02  1.425e-01  y = (((x₀ * x₀) + (x₁ * -0.28852)) - -2.4895) - (x₂ * x₂)
15          6.901e-02  1.596e-01  y = (x₀ * x₀) + ((((x₁ * -0.31367) - -2.7145) - (x₂ * x₂))...
                                       * 0.91852)
17          5.044e-02  1.567e-01  y = (x₀ * x₀) +

[ Info: Final population:
[ Info: Results saved to:


───────────────────────────────────────────────────────────────────────────────────────────────────
Complexity  Loss       Score      Equation
1           2.521e+00  1.594e+01  y = 2.5392
5           1.246e+00  1.761e-01  y = (x₀ * x₀) + 1.5222
7           1.178e+00  2.828e-02  y = ((x₀ * x₀) + 2.0957) * 0.81572
9           1.675e-01  9.751e-01  y = ((x₀ * x₀) + 2.4832) - (x₂ * x₂)
11          1.263e-01  1.413e-01  y = ((2.7752 - (x₂ * x₂)) * 0.83901) + (x₀ * x₀)
13          9.496e-02  1.425e-01  y = (((x₀ * x₀) + (x₁ * -0.28852)) - -2.4895) - (x₂ * x₂)
15          5.064e-02  3.144e-01  y = (x₀ * x₀) + ((((x₁ * -0.35372) - -2.7961) - (x₂ * x₂))...
                                       * 0.83302)
17          5.032e-02  3.182e-03  y = (x₀ * x₀) + ((((x₁ * -0.35512) - -2.7967) - ((x₂ + 0.0...
                                      21987) * x₂)) * 0.83294)
19          5.016e-02  1.567e-03  y = (((((x₀ * x₀) + 2.8324) - ((x₂ * x₂) * 0.84247)) / 1.0...
                                      1

[I 2025-04-27 11:06:25,414] Trial 2 finished with value: 0.02042033920711284 and parameters: {'population_size': 57, 'niterations': 17, 'parsimony': 0.015140863114284343, 'optimizer_nrestarts': 8}. Best is trial 1 with value: 0.011345767261082029.
[ Info: Started!



Expressions evaluated per second: 1.860e+05
Progress: 68 / 1023 total iterations (6.647%)
════════════════════════════════════════════════════════════════════════════════════════════════════
───────────────────────────────────────────────────────────────────────────────────────────────────
Complexity  Loss       Score      Equation
1           2.521e+00  1.594e+01  y = 2.5392
5           1.246e+00  1.761e-01  y = (x₀ * x₀) + 1.5219
7           1.178e+00  2.828e-02  y = ((x₀ * x₀) / 1.226) + 1.7096
9           1.675e-01  9.751e-01  y = (x₀ * x₀) + (2.4832 - (x₂ * x₂))
11          1.263e-01  1.413e-01  y = ((x₀ * x₀) - ((x₂ * 0.83901) * x₂)) + 2.3284
13          1.235e-01  1.119e-02  y = (x₀ * (x₀ - -0.052607)) - (((x₂ * x₂) / 1.1959) - 2.32...
                                      86)
15          5.437e-02  4.101e-01  y = ((x₀ * x₀) - -2.3627) - ((x₂ * (x₂ / 1.1961)) - (x₁ / ...
                                      -2.8421))
────────────────────────────────────────────────────────────

[ Info: Final population:
[ Info: Results saved to:
[I 2025-04-27 11:07:30,069] Trial 3 finished with value: 0.00015436753875577814 and parameters: {'population_size': 437, 'niterations': 33, 'parsimony': 0.018866426742401243, 'optimizer_nrestarts': 3}. Best is trial 3 with value: 0.00015436753875577814.
[ Info: Started!



Expressions evaluated per second: 1.870e+05
Progress: 57 / 3038 total iterations (1.876%)
════════════════════════════════════════════════════════════════════════════════════════════════════
───────────────────────────────────────────────────────────────────────────────────────────────────
Complexity  Loss       Score      Equation
1           2.521e+00  1.594e+01  y = 2.5392
5           1.246e+00  1.761e-01  y = (x₀ * x₀) + 1.5221
7           1.178e+00  2.828e-02  y = ((x₀ * x₀) * 0.81577) + 1.7095
9           1.675e-01  9.751e-01  y = ((x₀ * x₀) - (x₂ * x₂)) + 2.4832
11          1.345e-01  1.097e-01  y = (((x₂ / -1.1588) * x₂) + (x₀ * x₀)) + 2.437
13          1.263e-01  3.159e-02  y = (x₂ * ((x₂ / -1.1919) - 0.0025722)) + ((x₀ * x₀) + 2.3...
                                      284)
15          1.162e-01  4.151e-02  y = 0.666 + ((x₀ * x₀) + (((x₁ * -0.15686) - -1.899) - (x₂...
                                       * x₂)))
17          5.064e-02  4.153e-01  y = (((x₂ * (x₂ / -0.9280

[ Info: Final population:
[ Info: Results saved to:


───────────────────────────────────────────────────────────────────────────────────────────────────
Complexity  Loss       Score      Equation
1           2.521e+00  1.594e+01  y = 2.5392
5           1.246e+00  1.761e-01  y = (x₀ * x₀) + 1.5221
7           1.178e+00  2.828e-02  y = ((x₀ * x₀) + 2.0961) * 0.81564
9           1.675e-01  9.751e-01  y = (x₀ * x₀) + (2.4832 - (x₂ * x₂))
11          1.263e-01  1.413e-01  y = ((x₀ * x₀) - -2.3284) - ((x₂ * 0.83902) * x₂)
13          9.496e-02  1.425e-01  y = ((x₁ / -3.466) - ((x₂ - x₀) * (x₀ + x₂))) - -2.4895
15          5.064e-02  3.144e-01  y = (((x₀ * x₀) + (x₁ * -0.29473)) + 2.3292) + (x₂ * (x₂ *...
                                       -0.83302))
17          4.837e-02  2.296e-02  y = ((x₂ * (x₂ * -0.83054)) + (((x₀ - -0.047415) * x₀) + (...
                                      x₁ * -0.29371))) + 2.3293
19          4.502e-02  3.579e-02  y = (x₁ * -0.29189) + ((((x₂ * x₂) * (-0.83804 - (x₀ * -0....
                                      0

[I 2025-04-27 11:11:33,424] Trial 4 finished with value: 1.142247087914398e-07 and parameters: {'population_size': 386, 'niterations': 98, 'parsimony': 0.004604403381777251, 'optimizer_nrestarts': 9}. Best is trial 4 with value: 1.142247087914398e-07.
[ Info: Started!



Expressions evaluated per second: 1.310e+05
Progress: 60 / 1736 total iterations (3.456%)
════════════════════════════════════════════════════════════════════════════════════════════════════
───────────────────────────────────────────────────────────────────────────────────────────────────
Complexity  Loss       Score      Equation
1           2.521e+00  1.594e+01  y = 2.5392
5           1.246e+00  1.761e-01  y = (x₀ * x₀) + 1.5221
7           1.178e+00  2.828e-02  y = (x₀ * (x₀ / 1.2259)) - -1.7095
9           1.154e+00  1.024e-02  y = ((x₁ * -0.32565) + 1.5293) + (x₀ * x₀)
11          1.263e-01  1.106e+00  y = 2.3284 - (((x₂ * 0.83902) * x₂) - (x₀ * x₀))
13          1.235e-01  1.119e-02  y = 2.3286 - (((x₂ / 1.1958) * x₂) - ((x₀ + 0.052629) * x₀...
                                      ))
17          5.017e-02  2.252e-01  y = ((((x₂ * x₂) * 0.84046) - (x₀ * x₀)) * -0.98471) - ((x...
                                      ₁ * 0.29372) + -2.3403)
───────────────────────────────────────

[ Info: Final population:
[ Info: Results saved to:
[I 2025-04-27 11:13:06,460] Trial 5 finished with value: 0.0001543608617401771 and parameters: {'population_size': 274, 'niterations': 56, 'parsimony': 0.0013126993641312664, 'optimizer_nrestarts': 9}. Best is trial 4 with value: 1.142247087914398e-07.
[ Info: Started!



Expressions evaluated per second: 1.460e+05
Progress: 88 / 2914 total iterations (3.020%)
════════════════════════════════════════════════════════════════════════════════════════════════════
───────────────────────────────────────────────────────────────────────────────────────────────────
Complexity  Loss       Score      Equation
1           2.521e+00  1.594e+01  y = 2.5392
5           1.246e+00  1.761e-01  y = (x₀ * x₀) + 1.5221
7           1.178e+00  2.828e-02  y = (x₀ * (x₀ * 0.81575)) + 1.7095
9           1.675e-01  9.751e-01  y = (x₀ * x₀) + (2.4832 - (x₂ * x₂))
11          1.263e-01  1.413e-01  y = ((x₀ * x₀) - (x₂ * (x₂ / 1.1919))) + 2.3284
13          1.235e-01  1.119e-02  y = ((x₀ * (x₀ + 0.052603)) - (x₂ * (x₂ / 1.1958))) - -2.3...
                                      286
15          1.235e-01  -0.000e+00  y = (x₀ * (x₀ + 0.052569)) + (((0.24397 - (x₂ * x₂)) / 1....
                                       1958) - -2.1245)
17          5.064e-02  4.456e-01  y = (((x₀ * x₀) +

[ Info: Final population:
[ Info: Results saved to:
[I 2025-04-27 11:15:03,106] Trial 6 finished with value: 0.00015436025648980035 and parameters: {'population_size': 237, 'niterations': 94, 'parsimony': 0.0027556508691657182, 'optimizer_nrestarts': 4}. Best is trial 4 with value: 1.142247087914398e-07.


Best MSE: 1.142247087914398e-07
Best params: {'population_size': 386, 'niterations': 98, 'parsimony': 0.004604403381777251, 'optimizer_nrestarts': 9}


In [3]:
study.best_params

{'population_size': 407,
 'niterations': 77,
 'parsimony': 0.08919810761494844,
 'optimizer_nrestarts': 6}