Skip to content

Commit

Permalink
Merge ff874cd into ccbe668
Browse files Browse the repository at this point in the history
  • Loading branch information
MilesCranmer committed Mar 22, 2023
2 parents ccbe668 + ff874cd commit 47d11a1
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 9 deletions.
1 change: 1 addition & 0 deletions docs/param_groupings.yml
Expand Up @@ -11,6 +11,7 @@
- ncyclesperiteration
- The Objective:
- loss
- full_objective
- model_selection
- Working with Complexities:
- parsimony
Expand Down
36 changes: 32 additions & 4 deletions pysr/sr.py
Expand Up @@ -320,9 +320,9 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
argument is constrained.
Default is `None`.
loss : str
String of Julia code specifying the loss function. Can either
be a loss from LossFunctions.jl, or your own loss written as a
function. Examples of custom written losses include:
String of Julia code specifying an elementwise loss function.
Can either be a loss from LossFunctions.jl, or your own loss
written as a function. Examples of custom written losses include:
`myloss(x, y) = abs(x-y)` for non-weighted, or
`myloss(x, y, w) = w*abs(x-y)` for weighted.
The included losses include:
Expand All @@ -335,6 +335,26 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
`ModifiedHuberLoss()`, `L2MarginLoss()`, `ExpLoss()`,
`SigmoidLoss()`, `DWDMarginLoss(q)`.
Default is `"L2DistLoss()"`.
full_objective : str
Alternatively, you can specify the full objective function as
a snippet of Julia code, including any sort of custom evaluation
(including symbolic manipulations beforehand), and any sort
of loss function or regularizations. The default `full_objective`
used in SymbolicRegression.jl is roughly equal to:
```julia
function eval_loss(tree, dataset::Dataset{T}, options)::T where T
prediction, flag = eval_tree_array(tree, dataset.X, options)
if !flag
return T(Inf)
end
sum((prediction .- dataset.y) .^ 2) / dataset.n
end
```
where the example elementwise loss is mean-squared error.
You may pass a function with the same arguments as this (note
that the name of the function doesn't matter). Here,
both `prediction` and `dataset.y` are 1D arrays of length `dataset.n`.
Default is `None`.
complexity_of_operators : dict[str, float]
If you would like to use a complexity other than 1 for an
operator, specify the complexity here. For example,
Expand Down Expand Up @@ -678,7 +698,8 @@ def __init__(
timeout_in_seconds=None,
constraints=None,
nested_constraints=None,
loss="L2DistLoss()",
loss=None,
full_objective=None,
complexity_of_operators=None,
complexity_of_constants=1,
complexity_of_variables=1,
Expand Down Expand Up @@ -765,6 +786,7 @@ def __init__(
self.early_stop_condition = early_stop_condition
# - Loss parameters
self.loss = loss
self.full_objective = full_objective
self.complexity_of_operators = complexity_of_operators
self.complexity_of_constants = complexity_of_constants
self.complexity_of_variables = complexity_of_variables
Expand Down Expand Up @@ -1219,6 +1241,9 @@ def _validate_and_set_init_params(self):
"to True and `procs` to 0 will result in non-deterministic searches. "
)

if self.loss is not None and self.full_objective is not None:
raise ValueError("You cannot set both `loss` and `objective`.")

# NotImplementedError - Values that could be supported at a later time
if self.optimizer_algorithm not in VALID_OPTIMIZER_ALGORITHMS:
raise NotImplementedError(
Expand Down Expand Up @@ -1548,6 +1573,8 @@ def _run(self, X, y, mutated_params, weights, seed):
complexity_of_operators = Main.eval(complexity_of_operators_str)

custom_loss = Main.eval(self.loss)
custom_full_objective = Main.eval(self.full_objective)

early_stop_condition = Main.eval(
str(self.early_stop_condition) if self.early_stop_condition else None
)
Expand Down Expand Up @@ -1576,6 +1603,7 @@ def _run(self, X, y, mutated_params, weights, seed):
complexity_of_variables=self.complexity_of_variables,
nested_constraints=nested_constraints,
elementwise_loss=custom_loss,
loss_function=custom_full_objective,
maxsize=int(self.maxsize),
output_file=_escape_filename(self.equation_file_),
npopulations=int(self.populations),
Expand Down
21 changes: 17 additions & 4 deletions pysr/test/test.py
Expand Up @@ -72,26 +72,39 @@ def test_linear_relation_weighted(self):
print(model.equations_)
self.assertLessEqual(model.get_best()["loss"], 1e-4)

def test_multiprocessing_turbo(self):
def test_multiprocessing_turbo_custom_objective(self):
rstate = np.random.RandomState(0)
y = self.X[:, 0]
y += rstate.randn(*y.shape) * 1e-4
model = PySRRegressor(
**self.default_test_kwargs,
# Turbo needs to work with unsafe operators:
unary_operators=["sqrt"],
procs=2,
multithreading=False,
turbo=True,
early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 1",
early_stop_condition="stop_if(loss, complexity) = loss < 1e-10 && complexity == 1",
full_objective="""
function my_objective(tree::Node{T}, dataset::Dataset{T}, options::Options) where T
prediction, flag = eval_tree_array(tree, dataset.X, options)
!flag && return T(Inf)
abs3(x) = abs(x) ^ 3
return sum(abs3, prediction .- dataset.y) / length(prediction)
end
""",
)
model.fit(self.X, y)
print(model.equations_)
self.assertLessEqual(model.equations_.iloc[-1]["loss"], 1e-4)
best_loss = model.equations_.iloc[-1]["loss"]
self.assertLessEqual(best_loss, 1e-10)
self.assertGreaterEqual(best_loss, 0.0)

def test_high_precision_search(self):
def test_high_precision_search_custom_loss(self):
y = 1.23456789 * self.X[:, 0]
model = PySRRegressor(
**self.default_test_kwargs,
early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 3",
loss="my_loss(prediction, target) = (prediction - target)^2",
precision=64,
parsimony=0.01,
warm_start=True,
Expand Down
2 changes: 1 addition & 1 deletion pysr/version.py
@@ -1,2 +1,2 @@
__version__ = "0.12.0"
__version__ = "0.12.1"
__symbolic_regression_jl_version__ = "0.16.1"

0 comments on commit 47d11a1

Please sign in to comment.