Merge ff874cd into ccbe668

MilesCranmer · Mar 22, 2023 · 47d11a1 · 47d11a1
2 parents ccbe668 + ff874cd
commit 47d11a1
Show file tree

Hide file tree

Showing 4 changed files with 51 additions and 9 deletions.
diff --git a/docs/param_groupings.yml b/docs/param_groupings.yml
@@ -11,6 +11,7 @@
     - ncyclesperiteration
   - The Objective:
     - loss
+    - full_objective
     - model_selection
   - Working with Complexities:
     - parsimony

diff --git a/pysr/sr.py b/pysr/sr.py
@@ -320,9 +320,9 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         argument is constrained.
         Default is `None`.
     loss : str
-        String of Julia code specifying the loss function. Can either
-        be a loss from LossFunctions.jl, or your own loss written as a
-        function. Examples of custom written losses include:
+        String of Julia code specifying an elementwise loss function.
+        Can either be a loss from LossFunctions.jl, or your own loss
+        written as a function. Examples of custom written losses include:
         `myloss(x, y) = abs(x-y)` for non-weighted, or
         `myloss(x, y, w) = w*abs(x-y)` for weighted.
         The included losses include:
@@ -335,6 +335,26 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         `ModifiedHuberLoss()`, `L2MarginLoss()`, `ExpLoss()`,
         `SigmoidLoss()`, `DWDMarginLoss(q)`.
         Default is `"L2DistLoss()"`.
+    full_objective : str
+        Alternatively, you can specify the full objective function as
+        a snippet of Julia code, including any sort of custom evaluation
+        (including symbolic manipulations beforehand), and any sort
+        of loss function or regularizations. The default `full_objective`
+        used in SymbolicRegression.jl is roughly equal to:
+        ```julia
+        function eval_loss(tree, dataset::Dataset{T}, options)::T where T
+            prediction, flag = eval_tree_array(tree, dataset.X, options)
+            if !flag
+                return T(Inf)
+            end
+            sum((prediction .- dataset.y) .^ 2) / dataset.n
+        end
+        ```
+        where the example elementwise loss is mean-squared error.
+        You may pass a function with the same arguments as this (note
+        that the name of the function doesn't matter). Here,
+        both `prediction` and `dataset.y` are 1D arrays of length `dataset.n`.
+        Default is `None`.
     complexity_of_operators : dict[str, float]
         If you would like to use a complexity other than 1 for an
         operator, specify the complexity here. For example,
@@ -678,7 +698,8 @@ def __init__(
         timeout_in_seconds=None,
         constraints=None,
         nested_constraints=None,
-        loss="L2DistLoss()",
+        loss=None,
+        full_objective=None,
         complexity_of_operators=None,
         complexity_of_constants=1,
         complexity_of_variables=1,
@@ -765,6 +786,7 @@ def __init__(
         self.early_stop_condition = early_stop_condition
         # - Loss parameters
         self.loss = loss
+        self.full_objective = full_objective
         self.complexity_of_operators = complexity_of_operators
         self.complexity_of_constants = complexity_of_constants
         self.complexity_of_variables = complexity_of_variables
@@ -1219,6 +1241,9 @@ def _validate_and_set_init_params(self):
                 "to True and `procs` to 0 will result in non-deterministic searches. "
             )
 
+        if self.loss is not None and self.full_objective is not None:
+            raise ValueError("You cannot set both `loss` and `objective`.")
+
         # NotImplementedError - Values that could be supported at a later time
         if self.optimizer_algorithm not in VALID_OPTIMIZER_ALGORITHMS:
             raise NotImplementedError(
@@ -1548,6 +1573,8 @@ def _run(self, X, y, mutated_params, weights, seed):
             complexity_of_operators = Main.eval(complexity_of_operators_str)
 
         custom_loss = Main.eval(self.loss)
+        custom_full_objective = Main.eval(self.full_objective)
+
         early_stop_condition = Main.eval(
             str(self.early_stop_condition) if self.early_stop_condition else None
         )
@@ -1576,6 +1603,7 @@ def _run(self, X, y, mutated_params, weights, seed):
             complexity_of_variables=self.complexity_of_variables,
             nested_constraints=nested_constraints,
             elementwise_loss=custom_loss,
+            loss_function=custom_full_objective,
             maxsize=int(self.maxsize),
             output_file=_escape_filename(self.equation_file_),
             npopulations=int(self.populations),

diff --git a/pysr/test/test.py b/pysr/test/test.py
@@ -72,26 +72,39 @@ def test_linear_relation_weighted(self):
         print(model.equations_)
         self.assertLessEqual(model.get_best()["loss"], 1e-4)
 
-    def test_multiprocessing_turbo(self):
+    def test_multiprocessing_turbo_custom_objective(self):
+        rstate = np.random.RandomState(0)
         y = self.X[:, 0]
+        y += rstate.randn(*y.shape) * 1e-4
         model = PySRRegressor(
             **self.default_test_kwargs,
             # Turbo needs to work with unsafe operators:
             unary_operators=["sqrt"],
             procs=2,
             multithreading=False,
             turbo=True,
-            early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 1",
+            early_stop_condition="stop_if(loss, complexity) = loss < 1e-10 && complexity == 1",
+            full_objective="""
+            function my_objective(tree::Node{T}, dataset::Dataset{T}, options::Options) where T
+                prediction, flag = eval_tree_array(tree, dataset.X, options)
+                !flag && return T(Inf)
+                abs3(x) = abs(x) ^ 3
+                return sum(abs3, prediction .- dataset.y) / length(prediction)
+            end
+            """,
         )
         model.fit(self.X, y)
         print(model.equations_)
-        self.assertLessEqual(model.equations_.iloc[-1]["loss"], 1e-4)
+        best_loss = model.equations_.iloc[-1]["loss"]
+        self.assertLessEqual(best_loss, 1e-10)
+        self.assertGreaterEqual(best_loss, 0.0)
 
-    def test_high_precision_search(self):
+    def test_high_precision_search_custom_loss(self):
         y = 1.23456789 * self.X[:, 0]
         model = PySRRegressor(
             **self.default_test_kwargs,
             early_stop_condition="stop_if(loss, complexity) = loss < 1e-4 && complexity == 3",
+            loss="my_loss(prediction, target) = (prediction - target)^2",
             precision=64,
             parsimony=0.01,
             warm_start=True,

diff --git a/pysr/version.py b/pysr/version.py
@@ -1,2 +1,2 @@
-__version__ = "0.12.0"
+__version__ = "0.12.1"
 __symbolic_regression_jl_version__ = "0.16.1"