### Symbolic Regression with PySr


In [7]:
import pandas as pd
import numpy as np
import sys
import os

# Adjust the path to point to external/AlphaPEM
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from src.sampling.sampler import get_polarisation_curve_samples, build_fixed_parameters, sample_parameters, PARAMETER_RANGES

In [2]:
data = pd.read_pickle('../data/raw/N10_levels4_seed42.pkl')

In [4]:
exploaded_df = data.explode(['ifc', 'Ucell'])
exploaded_df = exploaded_df[0:100]  # Limit to 1000 samples for faster processing

In [8]:
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
import numpy as np
from pysr import PySRRegressor

vars = list(PARAMETER_RANGES.keys())
vars.append('ifc')

X = exploaded_df[vars].values
y = exploaded_df['Ucell'].values

kf = KFold(n_splits=5, shuffle=True, random_state=42)
mse_scores = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    model = PySRRegressor(
        niterations=100,
        binary_operators=["+", "-", "*", "/"],
        unary_operators=["exp", "log", "sqrt", "square", "cube"],
        loss="loss(x, y) = (x - y)^2",
        model_selection="best",
        maxsize=20,
        maxdepth=5,
        ncyclesperiteration=30,
        verbosity=1,
        output_directory="models/pysr_equations"
    )
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

print(f"Average MSE: {np.mean(mse_scores):.4f} ± {np.std(mse_scores):.4f}")


[juliapkg] Found dependencies: c:\users\dejvi\documents\github\sensitivity-analysis-and-surrogate-modeling-of-pem-fuel-cells\alphapem_env\lib\site-packages\pysr\juliapkg.json
[juliapkg] Found dependencies: c:\users\dejvi\documents\github\sensitivity-analysis-and-surrogate-modeling-of-pem-fuel-cells\alphapem_env\lib\site-packages\juliacall\juliapkg.json
[juliapkg] Found dependencies: c:\users\dejvi\documents\github\sensitivity-analysis-and-surrogate-modeling-of-pem-fuel-cells\alphapem_env\lib\site-packages\juliapkg\juliapkg.json
[juliapkg] Locating Julia =1.10.0, ^1.10.3
[juliapkg] Querying Julia versions from https://julialang-s3.julialang.org/bin/versions.json
[juliapkg]   If you use juliapkg in more than one environment, you are likely to
[juliapkg]   have Julia installed in multiple locations. It is recommended to
[juliapkg]   install JuliaUp (https://github.com/JuliaLang/juliaup) or Julia
[juliapkg]   (https://julialang.org/downloads) yourself.
[juliapkg] Downloading Julia from htt

Compiling Julia backend...
[ Info: Started!



Expressions evaluated per second: 1.550e+05
Progress: 2180 / 3100 total iterations (70.323%)
════════════════════════════════════════════════════════════════════════════════════════════════════
───────────────────────────────────────────────────────────────────────────────────────────────────
Complexity  Loss       Score      Equation
1           9.795e+00  0.000e+00  y = 3.8124
3           3.926e+00  4.571e-01  y = x₁ * 2.6471e-05
4           3.181e+00  2.105e-01  y = square(x₂) * 1.5022e-10
5           8.840e-01  1.280e+00  y = (x₁ * 3.3905e-05) - x₁₇
7           3.228e-01  5.037e-01  y = (x₁₇ * -1.652) - (x₁ * -3.8752e-05)
9           2.230e-01  1.849e-01  y = (x₁₇ * -1.8217) - ((x₂ * -0.014384) / x₀)
11          1.806e-01  1.056e-01  y = (x₀ * (x₁₇ * -0.0053531)) - ((x₂ * -0.014345) / x₀)
13          1.604e-01  5.934e-02  y = (((x₀ * x₁₇) * -0.025856) - (x₂ * -4.2322e-05)) + (x₁₇...
                                       * 6.9733)
15          1.604e-01  5.960e-08  y = (x₀ * (x₁₇ *

[ Info: Final population:
[ Info: Results saved to:


  - models/pysr_equations\20250618_185137_nhZ8ed\hall_of_fame.csv

Expressions evaluated per second: 1.480e+05
Progress: 2355 / 3100 total iterations (75.968%)
════════════════════════════════════════════════════════════════════════════════════════════════════
───────────────────────────────────────────────────────────────────────────────────────────────────
Complexity  Loss       Score      Equation
1           8.798e+00  0.000e+00  y = 3.759
3           3.745e+00  4.270e-01  y = x₁ * 2.5912e-05
5           9.143e-01  7.050e-01  y = (x₁ * 3.3801e-05) - x₁₇
7           3.142e-01  5.340e-01  y = (x₂ * 4.3191e-05) + (x₁₇ * -1.858)
9           3.110e-01  5.110e-03  y = (x₂ * 4.4109e-05) + ((x₁₇ * -1.8282) + -0.19731)
10          3.100e-01  3.472e-03  y = ((x₁₇ * -1.818) + (sqrt(x₂) * 0.035265)) - 7.0592
11          2.539e-01  1.996e-01  y = ((x₂ * 4.3409e-05) + (x₁₇ * (x₀ * -0.0054431))) - 0.06...
                                      6785
12          1.876e-01  3.024e-01  y = (sqrt(x₂) *

[ Info: Started!
[ Info: Final population:
[ Info: Results saved to:
[ Info: Started!



Expressions evaluated per second: 1.230e+05
Progress: 2666 / 3100 total iterations (86.000%)
════════════════════════════════════════════════════════════════════════════════════════════════════
───────────────────────────────────────────────────────────────────────────────────────────────────
Complexity  Loss       Score      Equation
1           9.056e+00  0.000e+00  y = 3.9015
3           3.676e+00  4.508e-01  y = x₁ * 2.6352e-05
5           8.536e-01  7.301e-01  y = (x₁ * 3.4012e-05) - x₁₇
7           3.438e-01  4.547e-01  y = (x₁ * 3.8932e-05) - (x₁₇ / 0.60889)
9           3.203e-01  3.544e-02  y = ((x₁ * 3.6855e-05) - (x₁₇ / 0.57492)) + 0.52109
11          3.203e-01  2.384e-07  y = (x₁ * 3.6858e-05) - (((x₁₇ - x₁₆) - -0.70109) / 0.5750...
                                      3)
13          3.124e-01  1.248e-02  y = cube(log(x₁) + -10.58) - ((x₈ + 1.4502) * (x₁₇ - 2.006...
                                      2))
14          2.702e-01  1.450e-01  y = cube(log(x₁) + -10.582) - ((

[ Info: Final population:
[ Info: Results saved to:


  - models/pysr_equations\20250618_185203_U7QG4U\hall_of_fame.csv


[ Info: Started!



Expressions evaluated per second: 1.370e+05
Progress: 2473 / 3100 total iterations (79.774%)
════════════════════════════════════════════════════════════════════════════════════════════════════
───────────────────────────────────────────────────────────────────────────────────────────────────
Complexity  Loss       Score      Equation
1           8.917e+00  0.000e+00  y = 3.7621
3           3.932e+00  4.094e-01  y = x₁ * 2.5886e-05
4           3.286e+00  1.795e-01  y = square(x₁ / 89473)
5           1.065e+00  1.126e+00  y = (x₁ * 3.3687e-05) - x₁₇
7           3.377e-01  5.744e-01  y = (x₂ * 4.3911e-05) - (x₁₇ + x₁₇)
9           2.840e-01  8.658e-02  y = (x₂ * 4.4196e-05) - (x₀ * (x₁₇ / x₁₁))
11          2.761e-01  1.411e-02  y = (x₂ * 4.3453e-05) - ((x₁₇ / x₁₁) * (x₀ + -14.717))
13          2.607e-01  2.861e-02  y = (x₂ * 4.5288e-05) - ((x₀ + -22.622) * ((x₁₇ / x₁₁) - -...
                                      0.0011956))
15          1.462e-01  2.894e-01  y = (16.984 - (x₀ * 0.047872

[ Info: Final population:
[ Info: Results saved to:
[ Info: Started!
[ Info: Final population:
[ Info: Results saved to:


  - models/pysr_equations\20250618_185210_Rkg4rc\hall_of_fame.csv

Expressions evaluated per second: 1.650e+05
Progress: 2314 / 3100 total iterations (74.645%)
════════════════════════════════════════════════════════════════════════════════════════════════════
───────────────────────────────────────────────────────────────────────────────────────────────────
Complexity  Loss       Score      Equation
1           9.444e+00  0.000e+00  y = 3.9312
3           3.667e+00  4.730e-01  y = x₁ * 2.7529e-05
4           2.767e+00  2.815e-01  y = square(x₁ * 1.1598e-05)
5           9.125e-01  1.109e+00  y = (x₁ * 3.45e-05) - x₁₇
7           3.467e-01  4.839e-01  y = (x₂ * 4.3556e-05) - (x₁₇ * 1.8927)
9           3.386e-01  1.182e-02  y = ((x₂ * 2.319e-05) - x₁₇) * (2.1517 - x₈)
10          3.384e-01  6.831e-04  y = (sqrt(x₂) * 0.035614) + (-7.1477 - (x₁₇ * 1.8424))
11          3.351e-01  9.884e-03  y = (((x₂ * 2.4243e-05) - x₁₇) * (2.1127 - x₈)) - 0.21888
12          1.777e-01  6.341e-01  y = (sqr