From 18c1f9cc18ec32e1c5e5f1d61d6a21be8b231877 Mon Sep 17 00:00:00 2001
From: Miles Cranmer <miles.cranmer@gmail.com>
Date: Tue, 12 Jul 2022 20:12:14 -0400
Subject: [PATCH 01/40] Center formatting for README

---
 README.md | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 9d2e72e27..a3630781c 100644
--- a/README.md
+++ b/README.md
@@ -1,15 +1,24 @@
 [//]: # (Logo:)
 
-<img src="https://raw.githubusercontent.com/MilesCranmer/PySR/master/docs/images/pysr_logo.svg" width="400" />
+<div align="center">
 
+<img src="https://raw.githubusercontent.com/MilesCranmer/PySR/master/docs/images/pysr_logo.svg" width="200" />
+    
 # PySR: High-Performance Symbolic Regression in Python
 
+</div>
+
+
 PySR is built on an extremely optimized pure-Julia backend, and uses regularized evolution, simulated annealing, and gradient-free optimization to search for equations that fit your data.
 
+<div align="center">
+
 | **Docs** | **colab** | **pip** | **conda** | **Stats** |
 |---|---|---|---|---|
 |[![Documentation](https://github.com/MilesCranmer/PySR/actions/workflows/docs.yml/badge.svg)](https://astroautomata.com/PySR/)|[![Colab](https://img.shields.io/badge/colab-notebook-yellow)](https://colab.research.google.com/github/MilesCranmer/PySR/blob/master/examples/pysr_demo.ipynb)|[![PyPI version](https://badge.fury.io/py/pysr.svg)](https://badge.fury.io/py/pysr)|[![Conda Version](https://img.shields.io/conda/vn/conda-forge/pysr.svg)](https://anaconda.org/conda-forge/pysr)|[![Downloads](https://pepy.tech/badge/pysr)](https://badge.fury.io/py/pysr)|
 
+</div>
+
 
 (pronounced like *py* as in python, and then *sur* as in surface)
 
@@ -17,7 +26,10 @@ If you find PySR useful, please cite it using the citation information given in
 If you've finished a project with PySR, please submit a PR to showcase your work on the [Research Showcase page](https://astroautomata.com/PySR/#/papers)!
 
 
-### Test status:
+<div align="center">
+
+### Test status
+
 | **Linux** | **Windows** | **macOS (intel)** |
 |---|---|---|
 |[![Linux](https://github.com/MilesCranmer/PySR/actions/workflows/CI.yml/badge.svg)](https://github.com/MilesCranmer/PySR/actions/workflows/CI.yml)|[![Windows](https://github.com/MilesCranmer/PySR/actions/workflows/CI_Windows.yml/badge.svg)](https://github.com/MilesCranmer/PySR/actions/workflows/CI_Windows.yml)|[![macOS](https://github.com/MilesCranmer/PySR/actions/workflows/CI_mac.yml/badge.svg)](https://github.com/MilesCranmer/PySR/actions/workflows/CI_mac.yml)|
@@ -25,6 +37,8 @@ If you've finished a project with PySR, please submit a PR to showcase your work
 |[![Docker](https://github.com/MilesCranmer/PySR/actions/workflows/CI_docker.yml/badge.svg)](https://github.com/MilesCranmer/PySR/actions/workflows/CI_docker.yml)|[![conda-forge](https://github.com/MilesCranmer/PySR/actions/workflows/CI_conda_forge.yml/badge.svg)](https://github.com/MilesCranmer/PySR/actions/workflows/CI_conda_forge.yml)|[![Coverage Status](https://coveralls.io/repos/github/MilesCranmer/PySR/badge.svg?branch=master&service=github)](https://coveralls.io/github/MilesCranmer/PySR)|
 
 
+</div>
+
 Check out [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl) for
 the pure-Julia backend of this package.
 
@@ -58,10 +72,14 @@ python interface.
 
 # Installation
 
+<div align="center">
+
 | pip (macOS, Linux, Windows) | conda (macOS - only Intel, Linux) |
 |---|---|
 | 1. Install Julia manually (see [downloads](https://julialang.org/downloads/))<br>2. `pip install pysr`<br>3. `python -c 'import pysr; pysr.install()'` | 1. `conda install -c conda-forge pysr`<br>2. `python -c 'import pysr; pysr.install()'`|
 
+</div>
+
 This last step will install and update the required Julia packages, including
 `PyCall.jl`.
 

From ba3d14088548e5f3a747e090ffec6df4bb2cf602 Mon Sep 17 00:00:00 2001
From: Saurav Maheshkar <sauravvmaheshkar@gmail.com>
Date: Mon, 18 Jul 2022 01:28:30 +0530
Subject: [PATCH 02/40] feat(docker): Add opencontainers image-spec to
 `Dockerfile`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR makes a few tiny changes to improve the overall quality of the docker image 🐳 . For reference more annotations can be found [here](https://github.com/opencontainers/image-spec/blob/main/annotations.md)
---
 Dockerfile | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/Dockerfile b/Dockerfile
index e3382f3bd..216b2885a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -6,6 +6,12 @@ ARG VERSION=latest
 
 FROM --platform=$ARCH julia:$VERSION
 
+# metainformation
+LABEL org.opencontainers.image.version = "0.9.0"
+LABEL org.opencontainers.image.authors = "Miles Cranmer"
+LABEL org.opencontainers.image.source = "https://github.com/MilesCranmer/PySR"
+LABEL org.opencontainers.image.licenses = "Apache License 2.0"
+
 # Need to use ARG after FROM, otherwise it won't get passed through.
 ARG PYVERSION=3.9.10
 

From 36e49b9581dc6c792e84cb3a1171b0d52fd38864 Mon Sep 17 00:00:00 2001
From: Saurav Maheshkar <sauravvmaheshkar@gmail.com>
Date: Mon, 18 Jul 2022 02:09:15 +0530
Subject: [PATCH 03/40] feat: add `ARG` for package version

---
 Dockerfile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index 216b2885a..e38351b41 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -3,11 +3,12 @@
 
 ARG ARCH=linux/amd64
 ARG VERSION=latest
+ARG PKGVERSION=0.9.0
 
 FROM --platform=$ARCH julia:$VERSION
 
 # metainformation
-LABEL org.opencontainers.image.version = "0.9.0"
+LABEL org.opencontainers.image.version = $PKGVERSION
 LABEL org.opencontainers.image.authors = "Miles Cranmer"
 LABEL org.opencontainers.image.source = "https://github.com/MilesCranmer/PySR"
 LABEL org.opencontainers.image.licenses = "Apache License 2.0"

From 7fa5f44ea9c0d3be65b9022b24e0b07b47e121a7 Mon Sep 17 00:00:00 2001
From: Miles Cranmer <miles.cranmer@gmail.com>
Date: Sun, 17 Jul 2022 17:02:25 -0400
Subject: [PATCH 04/40] Update Dockerfile

---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index e38351b41..9a45c39db 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -3,7 +3,7 @@
 
 ARG ARCH=linux/amd64
 ARG VERSION=latest
-ARG PKGVERSION=0.9.0
+ARG PKGVERSION=0.9.5
 
 FROM --platform=$ARCH julia:$VERSION
 

From ccf71e9b9eb54f0bdb5cca88414cfb34639ec7f8 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Wed, 20 Jul 2022 14:28:04 -0400
Subject: [PATCH 05/40] `load` function to init model from saved equations

---
 pysr/__init__.py |  1 +
 pysr/sr.py       | 74 ++++++++++++++++++++++++++++++++++++++++++++++++
 test/test.py     | 28 +++++++++++++++++-
 3 files changed, 102 insertions(+), 1 deletion(-)

diff --git a/pysr/__init__.py b/pysr/__init__.py
index e303becb2..210e85cb7 100644
--- a/pysr/__init__.py
+++ b/pysr/__init__.py
@@ -6,6 +6,7 @@
     best_tex,
     best_callable,
     best_row,
+    load,
 )
 from .julia_helpers import install
 from .feynman_problems import Problem, FeynmanProblem
diff --git a/pysr/sr.py b/pysr/sr.py
index 3e2112975..ec12fe877 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -2034,3 +2034,77 @@ def run_feature_selection(X, y, select_k_features, random_state=None):
         clf, threshold=-np.inf, max_features=select_k_features, prefit=True
     )
     return selector.get_support(indices=True)
+
+
+def load(
+    equation_file,
+    *,
+    binary_operators,
+    unary_operators,
+    n_features_in,
+    feature_names_in=None,
+    selection_mask=None,
+    nout=1,
+    **pysr_kwargs,
+):
+    """
+    Create a model from equations stored as a csv file
+
+    Parameters
+    ----------
+    equation_file : str
+        Path to a csv file containing equations.
+
+    binary_operators : list[str], default=["+", "-", "*", "/"]
+        The same binary operators used when creating the model.
+
+    unary_operators : list[str], default=None
+        The same unary operators used when creating the model.
+
+    n_features_in : int
+        Number of features passed to the model.
+
+    feature_names_in : list[str], default=None
+        Names of the features passed to the model.
+
+    selection_mask : list[bool], default=None
+        If using select_k_features, you must pass `model.selection_mask_` here.
+
+    nout : int, default=1
+        Number of outputs of the model.
+
+    pysr_kwargs : dict
+        Any other keyword arguments to initialize the PySRRegressor object.
+
+    Returns
+    -------
+    model : PySRRegressor
+        The model with fitted equations.
+    """
+
+    # TODO: copy .bkup file if exists.
+    model = PySRRegressor(
+        equation_file=equation_file,
+        binary_operators=binary_operators,
+        unary_operators=unary_operators,
+        **pysr_kwargs,
+    )
+
+    model.equation_file_ = equation_file
+    model.nout_ = nout
+    model.n_features_in_ = n_features_in
+
+    if feature_names_in is None:
+        model.feature_names_in_ = [f"x{i}" for i in range(n_features_in)]
+    else:
+        assert len(feature_names_in) == n_features_in
+        model.feature_names_in_ = feature_names_in
+
+    if selection_mask is None:
+        model.selection_mask_ = np.ones(n_features_in, dtype=bool)
+    else:
+        model.selection_mask_ = selection_mask
+
+    model.refresh()
+
+    return model
diff --git a/test/test.py b/test/test.py
index 4c82a17e1..1c581b7fe 100644
--- a/test/test.py
+++ b/test/test.py
@@ -4,7 +4,7 @@
 import unittest
 import numpy as np
 from sklearn import model_selection
-from pysr import PySRRegressor
+from pysr import PySRRegressor, load
 from pysr.sr import run_feature_selection, _handle_feature_selection
 from sklearn.utils.estimator_checks import check_estimator
 import sympy
@@ -280,6 +280,32 @@ def test_high_dim_selection_early_stop(self):
         model.fit(X.values, y.values, Xresampled=Xresampled.values)
         self.assertLess(np.average((model.predict(X.values) - y.values) ** 2), 1e-4)
 
+    def test_load_model(self):
+        """See if we can load a ran model from the equation file."""
+        csv_file_data = """
+        Complexity|MSE|Equation
+        1|0.19951081|1.9762075
+        3|0.12717344|(f0 + 1.4724599)
+        4|0.104823045|pow_abs(2.2683423, cos(f3))"""
+        # Strip the indents:
+        csv_file_data = "\n".join([l.strip() for l in csv_file_data.split("\n")])
+        with open("equation_file.csv", "w") as f:
+            f.write(csv_file_data)
+        with open("equation_file.csv.bkup", "w") as f:
+            f.write(csv_file_data)
+        model = load(
+            "equation_file.csv",
+            n_features_in=5,
+            feature_names_in=["f0", "f1", "f2", "f3", "f4"],
+            binary_operators=["+", "*", "/", "-", "^"],
+            unary_operators=["cos"],
+        )
+        X = self.rstate.rand(100, 5)
+        y_truth = 2.2683423 ** np.cos(X[:, 3])
+        y_test = model.predict(X, 2)
+
+        np.testing.assert_allclose(y_truth, y_test)
+
 
 class TestBest(unittest.TestCase):
     def setUp(self):

From e5b4869851bd5a5f7b6fa783cf6d0f8ff10ca8a5 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Wed, 20 Jul 2022 14:32:27 -0400
Subject: [PATCH 06/40] Call `refresh` in load function

---
 pysr/sr.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pysr/sr.py b/pysr/sr.py
index ec12fe877..07b34cd31 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -2090,7 +2090,6 @@ def load(
         **pysr_kwargs,
     )
 
-    model.equation_file_ = equation_file
     model.nout_ = nout
     model.n_features_in_ = n_features_in
 
@@ -2105,6 +2104,6 @@ def load(
     else:
         model.selection_mask_ = selection_mask
 
-    model.refresh()
+    model.refresh(checkpoint_file=equation_file)
 
     return model

From 179fef6351ee7d5356ec2a1ce9efcda8241dd935 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sun, 24 Jul 2022 22:42:59 -0400
Subject: [PATCH 07/40] Correctly set path names

---
 test/test.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/test/test.py b/test/test.py
index 1c581b7fe..59c7d76bc 100644
--- a/test/test.py
+++ b/test/test.py
@@ -12,6 +12,7 @@
 import warnings
 import pickle as pkl
 import tempfile
+from pathlib import Path
 
 DEFAULT_PARAMS = inspect.signature(PySRRegressor.__init__).parameters
 DEFAULT_NITERATIONS = DEFAULT_PARAMS["niterations"].default
@@ -289,12 +290,14 @@ def test_load_model(self):
         4|0.104823045|pow_abs(2.2683423, cos(f3))"""
         # Strip the indents:
         csv_file_data = "\n".join([l.strip() for l in csv_file_data.split("\n")])
-        with open("equation_file.csv", "w") as f:
+        rand_dir = Path(tempfile.mkdtemp())
+        equation_filename = rand_dir / "equation.csv"
+        with open(equation_filename, "w") as f:
             f.write(csv_file_data)
-        with open("equation_file.csv.bkup", "w") as f:
+        with open(equation_filename + ".bkup", "w") as f:
             f.write(csv_file_data)
         model = load(
-            "equation_file.csv",
+            equation_filename,
             n_features_in=5,
             feature_names_in=["f0", "f1", "f2", "f3", "f4"],
             binary_operators=["+", "*", "/", "-", "^"],

From 85371bb899ddc546f448adec34e5b2977f080f9d Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sun, 24 Jul 2022 22:44:41 -0400
Subject: [PATCH 08/40] Allow pickling without equations_ stored

---
 pysr/sr.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pysr/sr.py b/pysr/sr.py
index 07b34cd31..67c11f58c 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -883,7 +883,9 @@ def __getstate__(self):
             key: None if key == "raw_julia_state_" else value
             for key, value in state.items()
         }
-        if "equations_" in pickled_state:
+        if ("equations_" in pickled_state) and (
+            pickled_state["equations_"] is not None
+        ):
             pickled_state["output_torch_format"] = False
             pickled_state["output_jax_format"] = False
             if self.nout_ == 1:

From dde0ef7e3c2606415b9d7c03c56370402c398e3e Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sun, 24 Jul 2022 22:45:13 -0400
Subject: [PATCH 09/40] Remove extra_sympy_mappings from pickle file

---
 pysr/sr.py | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/pysr/sr.py b/pysr/sr.py
index 67c11f58c..e147d08a7 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -562,6 +562,9 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
     equation_file_contents_ : list[pandas.DataFrame]
         Contents of the equation file output by the Julia backend.
 
+    show_pickle_warnings_ : bool
+        Whether to show warnings about what attributes can be pickled.
+
     Notes
     -----
     Most default parameters have been tuned over several example equations,
@@ -873,14 +876,26 @@ def __getstate__(self):
         from the pickled instance.
         """
         state = self.__dict__
-        if "raw_julia_state_" in state:
+        show_pickle_warning = not (
+            "show_pickle_warnings_" in state and not state["show_pickle_warnings_"]
+        )
+        if "raw_julia_state_" in state and show_pickle_warning:
             warnings.warn(
                 "raw_julia_state_ cannot be pickled and will be removed from the "
                 "serialized instance. This will prevent a `warm_start` fit of any "
                 "model that is deserialized via `pickle.load()`."
             )
+        state_keys_containing_lambdas = ["extra_sympy_mappings", "extra_torch_mappings"]
+        for state_key in state_keys_containing_lambdas:
+            if state[state_key] is not None and show_pickle_warning:
+                warnings.warn(
+                    f"`{state_key}` cannot be pickled and will be removed from the "
+                    "serialized instance. When loading the model, please redefine "
+                    f"`{state_key}` at runtime."
+                )
+        state_keys_to_clear = ["raw_julia_state_"] + state_keys_containing_lambdas
         pickled_state = {
-            key: None if key == "raw_julia_state_" else value
+            key: (None if key in state_keys_to_clear else value)
             for key, value in state.items()
         }
         if ("equations_" in pickled_state) and (

From b16d9efb3c83ced7870fbb7641fa97cfd9452a2d Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sun, 24 Jul 2022 22:45:42 -0400
Subject: [PATCH 10/40] Automatically pickle file at initialization

---
 pysr/sr.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pysr/sr.py b/pysr/sr.py
index e147d08a7..63d74fe61 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -1623,6 +1623,11 @@ def fit(
             y,
         )
 
+        # Save model state:
+        self.show_pickle_warnings_ = False
+        with open(str(self.equation_file_) + ".pkl", "wb") as f:
+            pkl.dump(self, f)
+        self.show_pickle_warnings_ = True
         # Fitting procedure
         return self._run(X, y, mutated_params, weights=weights, seed=seed)
 

From 5c0ad5569248da926a646aeb6194ad9d03fc9844 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sun, 24 Jul 2022 22:46:11 -0400
Subject: [PATCH 11/40] Allow loading from pickle file

---
 pysr/sr.py | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/pysr/sr.py b/pysr/sr.py
index 63d74fe61..cd851c16b 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -2061,9 +2061,9 @@ def run_feature_selection(X, y, select_k_features, random_state=None):
 def load(
     equation_file,
     *,
-    binary_operators,
-    unary_operators,
-    n_features_in,
+    binary_operators=None,
+    unary_operators=None,
+    n_features_in=None,
     feature_names_in=None,
     selection_mask=None,
     nout=1,
@@ -2097,12 +2097,33 @@ def load(
 
     pysr_kwargs : dict
         Any other keyword arguments to initialize the PySRRegressor object.
+        These will overwrite those stored in the pickle file.
 
     Returns
     -------
     model : PySRRegressor
         The model with fitted equations.
     """
+    # Try to load model from <equation_file>.pkl
+    print(f"Checking if {equation_file}.pkl exists...")
+    if os.path.exists(str(equation_file) + ".pkl"):
+        assert binary_operators is None
+        assert unary_operators is None
+        assert n_features_in is None
+        with open(str(equation_file) + ".pkl", "rb") as f:
+            model = pkl.load(f)
+        model.set_params(**pysr_kwargs)
+        model.refresh()
+        return model
+
+    # Else, we re-create it.
+    print(
+        f"{equation_file}.pkl does not exist, "
+        "so we must create the model from scratch."
+    )
+    assert binary_operators is not None
+    assert unary_operators is not None
+    assert n_features_in is not None
 
     # TODO: copy .bkup file if exists.
     model = PySRRegressor(

From dc1d66378e25d7a8ef5d45811e0a67d0b00c449e Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sun, 24 Jul 2022 22:46:43 -0400
Subject: [PATCH 12/40] Add pickle files to gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 23004701d..f0daf5e88 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@
 *.csv
 *.csv.out*
 *.bkup
+*.pkl
 performance*txt
 *.out
 trials*

From 4ae8a5c2380b0fa6dd34f2f56207d2dc3970a362 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sun, 24 Jul 2022 22:47:00 -0400
Subject: [PATCH 13/40] Add missing pickle import

---
 pysr/sr.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pysr/sr.py b/pysr/sr.py
index cd851c16b..c093d3619 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -8,6 +8,7 @@
 import tempfile
 import shutil
 from pathlib import Path
+import pickle as pkl
 from datetime import datetime
 import warnings
 from multiprocessing import cpu_count

From 78cdb0e736efe0575ecd7797e7ad7e07b6ecd447 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sun, 24 Jul 2022 22:47:15 -0400
Subject: [PATCH 14/40] Add test for loading from pickle file

---
 test/test.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/test/test.py b/test/test.py
index 59c7d76bc..33999e087 100644
--- a/test/test.py
+++ b/test/test.py
@@ -309,6 +309,33 @@ def test_load_model(self):
 
         np.testing.assert_allclose(y_truth, y_test)
 
+    def test_load_model_simple(self):
+        # Test that we can simply load a model from its equation file.
+        y = self.X[:, [0, 1]] ** 2
+        model = PySRRegressor(
+            # Test that passing a single operator works:
+            unary_operators="sq(x) = x^2",
+            binary_operators="plus",
+            extra_sympy_mappings={"sq": lambda x: x**2},
+            **self.default_test_kwargs,
+            procs=0,
+            denoise=True,
+            early_stop_condition="stop_if(loss, complexity) = loss < 0.05 && complexity == 2",
+        )
+        rand_dir = Path(tempfile.mkdtemp())
+        equation_file = rand_dir / "equations.csv"
+        model.set_params(temp_equation_file=False)
+        model.set_params(equation_file=equation_file)
+        model.fit(self.X, y)
+
+        # lambda functions are removed from the pickling, so we need
+        # to pass it during the loading:
+        model2 = load(
+            model.equation_file_, extra_sympy_mappings={"sq": lambda x: x**2}
+        )
+
+        np.testing.assert_allclose(model.predict(self.X), model2.predict(self.X))
+
 
 class TestBest(unittest.TestCase):
     def setUp(self):

From 8da5000dfc58c1a035c623ebd6c6e2acea472134 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sun, 24 Jul 2022 22:57:40 -0400
Subject: [PATCH 15/40] Improve error message for missing operator mappings

---
 pysr/sr.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pysr/sr.py b/pysr/sr.py
index 3e2112975..736be313e 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -1694,7 +1694,8 @@ def predict(self, X, index=None):
             raise ValueError(
                 "Failed to evaluate the expression. "
                 "If you are using a custom operator, make sure to define it in :param`extra_sympy_mappings`, "
-                "e.g., `model.set_params(extra_sympy_mappings={'inv': lambda x: 1 / x})`."
+                "e.g., `model.set_params(extra_sympy_mappings={'inv': lambda x: 1 / x})`. You can then "
+                "run `model.refresh()` to re-load the expressions."
             ) from error
 
     def sympy(self, index=None):

From 214744b5ce5f1a375f5af936b774ca3a3b26bdd4 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sun, 24 Jul 2022 22:59:41 -0400
Subject: [PATCH 16/40] Fix filename concat in test

---
 test/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test.py b/test/test.py
index 33999e087..0e1108400 100644
--- a/test/test.py
+++ b/test/test.py
@@ -294,7 +294,7 @@ def test_load_model(self):
         equation_filename = rand_dir / "equation.csv"
         with open(equation_filename, "w") as f:
             f.write(csv_file_data)
-        with open(equation_filename + ".bkup", "w") as f:
+        with open(str(equation_filename) + ".bkup", "w") as f:
             f.write(csv_file_data)
         model = load(
             equation_filename,

From 58e25a9d7261ceb4509125d6f8d102b68bfe5fd4 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sun, 24 Jul 2022 23:30:52 -0400
Subject: [PATCH 17/40] Test both with and without `bkup` file

---
 test/test.py | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/test/test.py b/test/test.py
index 0e1108400..30a50d1cc 100644
--- a/test/test.py
+++ b/test/test.py
@@ -290,24 +290,24 @@ def test_load_model(self):
         4|0.104823045|pow_abs(2.2683423, cos(f3))"""
         # Strip the indents:
         csv_file_data = "\n".join([l.strip() for l in csv_file_data.split("\n")])
-        rand_dir = Path(tempfile.mkdtemp())
-        equation_filename = rand_dir / "equation.csv"
-        with open(equation_filename, "w") as f:
-            f.write(csv_file_data)
-        with open(str(equation_filename) + ".bkup", "w") as f:
-            f.write(csv_file_data)
-        model = load(
-            equation_filename,
-            n_features_in=5,
-            feature_names_in=["f0", "f1", "f2", "f3", "f4"],
-            binary_operators=["+", "*", "/", "-", "^"],
-            unary_operators=["cos"],
-        )
-        X = self.rstate.rand(100, 5)
-        y_truth = 2.2683423 ** np.cos(X[:, 3])
-        y_test = model.predict(X, 2)
 
-        np.testing.assert_allclose(y_truth, y_test)
+        for from_backup in [False, True]:
+            rand_dir = Path(tempfile.mkdtemp())
+            equation_filename = str(rand_dir / "equation.csv")
+            with open(equation_filename + (".bkup" if from_backup else ""), "w") as f:
+                f.write(csv_file_data)
+            model = load(
+                equation_filename,
+                n_features_in=5,
+                feature_names_in=["f0", "f1", "f2", "f3", "f4"],
+                binary_operators=["+", "*", "/", "-", "^"],
+                unary_operators=["cos"],
+            )
+            X = self.rstate.rand(100, 5)
+            y_truth = 2.2683423 ** np.cos(X[:, 3])
+            y_test = model.predict(X, 2)
+
+            np.testing.assert_allclose(y_truth, y_test)
 
     def test_load_model_simple(self):
         # Test that we can simply load a model from its equation file.

From 1f019764c8af52477ed2f066b50e17e7474ec26e Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sun, 24 Jul 2022 23:31:28 -0400
Subject: [PATCH 18/40] Don't check for `equation_file_` until after
 checkpoint_file set

---
 pysr/sr.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pysr/sr.py b/pysr/sr.py
index c093d3619..be9224893 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -1642,10 +1642,10 @@ def refresh(self, checkpoint_file=None):
         checkpoint_file : str, default=None
             Path to checkpoint hall of fame file to be loaded.
         """
-        check_is_fitted(self, attributes=["equation_file_"])
         if checkpoint_file:
             self.equation_file_ = checkpoint_file
             self.equation_file_contents_ = None
+        check_is_fitted(self, attributes=["equation_file_"])
         self.equations_ = self.get_hof()
 
     def predict(self, X, index=None):

From f1ac7043f981a5d2fa1202d172986ab9ee261f99 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sun, 24 Jul 2022 23:32:00 -0400
Subject: [PATCH 19/40] Allow both `bkup` and `csv` file

---
 pysr/sr.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/pysr/sr.py b/pysr/sr.py
index be9224893..b53d222a0 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -1834,10 +1834,10 @@ def _read_equation_file(self):
             if self.nout_ > 1:
                 all_outputs = []
                 for i in range(1, self.nout_ + 1):
-                    df = pd.read_csv(
-                        str(self.equation_file_) + f".out{i}" + ".bkup",
-                        sep="|",
-                    )
+                    cur_filename = str(self.equation_file_) + f".out{i}" + ".bkup"
+                    if not os.path.exists(cur_filename):
+                        cur_filename = str(self.equation_file_) + f".out{i}"
+                    df = pd.read_csv(cur_filename, sep="|")
                     # Rename Complexity column to complexity:
                     df.rename(
                         columns={
@@ -1850,7 +1850,10 @@ def _read_equation_file(self):
 
                     all_outputs.append(df)
             else:
-                all_outputs = [pd.read_csv(str(self.equation_file_) + ".bkup", sep="|")]
+                filename = str(self.equation_file_) + ".bkup"
+                if not os.path.exists(filename):
+                    filename = str(self.equation_file_)
+                all_outputs = [pd.read_csv(filename, sep="|")]
                 all_outputs[-1].rename(
                     columns={
                         "Complexity": "complexity",

From c6902b714c3a993d00d41a90100cc6de79c5f50f Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sun, 24 Jul 2022 23:32:41 -0400
Subject: [PATCH 20/40] Additional logging messages during load

---
 pysr/sr.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pysr/sr.py b/pysr/sr.py
index b53d222a0..d669147e2 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -2111,6 +2111,7 @@ def load(
     # Try to load model from <equation_file>.pkl
     print(f"Checking if {equation_file}.pkl exists...")
     if os.path.exists(str(equation_file) + ".pkl"):
+        print(f"Loading model from {equation_file}.pkl.")
         assert binary_operators is None
         assert unary_operators is None
         assert n_features_in is None

From 6501ca074793bde8af2ef943394c4e03ef43a775 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Mon, 1 Aug 2022 14:24:05 -0400
Subject: [PATCH 21/40] Checkpoint model before and after fit

---
 pysr/sr.py | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/pysr/sr.py b/pysr/sr.py
index d669147e2..2d8e5463b 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -924,6 +924,16 @@ def __getstate__(self):
                 ]
         return pickled_state
 
+    def _checkpoint(self):
+        """Saves the model's current state to a checkpoint file.
+        
+        This should only be used internally by PySRRegressor."""
+        # Save model state:
+        self.show_pickle_warnings_ = False
+        with open(str(self.equation_file_) + ".pkl", "wb") as f:
+            pkl.dump(self, f)
+        self.show_pickle_warnings_ = True
+
     @property
     def equations(self):  # pragma: no cover
         warnings.warn(
@@ -1624,13 +1634,18 @@ def fit(
             y,
         )
 
-        # Save model state:
-        self.show_pickle_warnings_ = False
-        with open(str(self.equation_file_) + ".pkl", "wb") as f:
-            pkl.dump(self, f)
-        self.show_pickle_warnings_ = True
-        # Fitting procedure
-        return self._run(X, y, mutated_params, weights=weights, seed=seed)
+        # Initially, just save model parameters, so that
+        # it can be loaded from an early exit:
+        self._checkpoint()
+
+        # Perform the search:
+        self._run(X, y, mutated_params, weights=weights, seed=seed)
+
+        # Then, after fit, we save again, so the pickle file contains
+        # the equations:
+        self._checkpoint()
+
+        return self
 
     def refresh(self, checkpoint_file=None):
         """

From b53e7fafda3ee0a06d9e8ee56f98cc46bd7ddd57 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Mon, 1 Aug 2022 14:40:12 -0400
Subject: [PATCH 22/40] Add additional test for loading from pickle file

---
 pysr/sr.py   |  8 ++++++--
 test/test.py | 10 ++++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/pysr/sr.py b/pysr/sr.py
index 2d8e5463b..f99180dd2 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -926,7 +926,7 @@ def __getstate__(self):
 
     def _checkpoint(self):
         """Saves the model's current state to a checkpoint file.
-        
+
         This should only be used internally by PySRRegressor."""
         # Save model state:
         self.show_pickle_warnings_ = False
@@ -2132,8 +2132,12 @@ def load(
         assert n_features_in is None
         with open(str(equation_file) + ".pkl", "rb") as f:
             model = pkl.load(f)
+        # Update any parameters if necessary, such as
+        # extra_sympy_mappings:
         model.set_params(**pysr_kwargs)
-        model.refresh()
+        if "equations_" not in model.__dict__ or model.equations_ is None:
+            model.refresh()
+
         return model
 
     # Else, we re-create it.
diff --git a/test/test.py b/test/test.py
index 30a50d1cc..f5e84570e 100644
--- a/test/test.py
+++ b/test/test.py
@@ -336,6 +336,16 @@ def test_load_model_simple(self):
 
         np.testing.assert_allclose(model.predict(self.X), model2.predict(self.X))
 
+        # Try again, but using only the pickle file:
+        for file_to_delete in [str(equation_file), str(equation_file) + ".bkup"]:
+            if os.path.exists(file_to_delete):
+                os.remove(file_to_delete)
+
+        model3 = load(
+            model.equation_file_, extra_sympy_mappings={"sq": lambda x: x**2}
+        )
+        np.testing.assert_allclose(model.predict(self.X), model3.predict(self.X))
+
 
 class TestBest(unittest.TestCase):
     def setUp(self):

From b8a97f177e29858c39aaeabd1d998b5207be2c95 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Thu, 4 Aug 2022 15:23:41 -0400
Subject: [PATCH 23/40] Use .pkl instead of .csv.pkl

---
 pysr/sr.py   | 38 ++++++++++++++++++++++++++++----------
 test/test.py | 21 ++++++++++++++++++++-
 2 files changed, 48 insertions(+), 11 deletions(-)

diff --git a/pysr/sr.py b/pysr/sr.py
index f99180dd2..d1a209cd6 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -930,7 +930,7 @@ def _checkpoint(self):
         This should only be used internally by PySRRegressor."""
         # Save model state:
         self.show_pickle_warnings_ = False
-        with open(str(self.equation_file_) + ".pkl", "wb") as f:
+        with open(_csv_filename_to_pkl_filename(self.equation_file_), "wb") as f:
             pkl.dump(self, f)
         self.show_pickle_warnings_ = True
 
@@ -1636,14 +1636,16 @@ def fit(
 
         # Initially, just save model parameters, so that
         # it can be loaded from an early exit:
-        self._checkpoint()
+        if not self.temp_equation_file:
+            self._checkpoint()
 
         # Perform the search:
         self._run(X, y, mutated_params, weights=weights, seed=seed)
 
         # Then, after fit, we save again, so the pickle file contains
         # the equations:
-        self._checkpoint()
+        if not self.temp_equation_file:
+            self._checkpoint()
 
         return self
 
@@ -2077,6 +2079,17 @@ def run_feature_selection(X, y, select_k_features, random_state=None):
     return selector.get_support(indices=True)
 
 
+def _csv_filename_to_pkl_filename(csv_filename) -> str:
+    # Assume that the csv filename is of the form "foo.csv"
+    dirname = str(os.path.dirname(csv_filename))
+    basename = str(os.path.basename(csv_filename))
+    base = str(os.path.splitext(basename)[0])
+
+    pkl_basename = base + ".pkl"
+
+    return os.path.join(dirname, pkl_basename)
+
+
 def load(
     equation_file,
     *,
@@ -2094,7 +2107,8 @@ def load(
     Parameters
     ----------
     equation_file : str
-        Path to a csv file containing equations.
+        Path to a csv file containing equations, or a pickle file
+        containing the model.
 
     binary_operators : list[str], default=["+", "-", "*", "/"]
         The same binary operators used when creating the model.
@@ -2123,14 +2137,19 @@ def load(
     model : PySRRegressor
         The model with fitted equations.
     """
+    if os.path.splitext(equation_file)[1] != ".pkl":
+        pkl_filename = _csv_filename_to_pkl_filename(equation_file)
+    else:
+        pkl_filename = equation_file
+
     # Try to load model from <equation_file>.pkl
-    print(f"Checking if {equation_file}.pkl exists...")
-    if os.path.exists(str(equation_file) + ".pkl"):
-        print(f"Loading model from {equation_file}.pkl.")
+    print(f"Checking if {pkl_filename} exists...")
+    if os.path.exists(pkl_filename):
+        print(f"Loading model from {pkl_filename}")
         assert binary_operators is None
         assert unary_operators is None
         assert n_features_in is None
-        with open(str(equation_file) + ".pkl", "rb") as f:
+        with open(pkl_filename, "rb") as f:
             model = pkl.load(f)
         # Update any parameters if necessary, such as
         # extra_sympy_mappings:
@@ -2142,8 +2161,7 @@ def load(
 
     # Else, we re-create it.
     print(
-        f"{equation_file}.pkl does not exist, "
-        "so we must create the model from scratch."
+        f"{equation_file} does not exist, " "so we must create the model from scratch."
     )
     assert binary_operators is not None
     assert unary_operators is not None
diff --git a/test/test.py b/test/test.py
index f5e84570e..dd07c601f 100644
--- a/test/test.py
+++ b/test/test.py
@@ -5,7 +5,11 @@
 import numpy as np
 from sklearn import model_selection
 from pysr import PySRRegressor, load
-from pysr.sr import run_feature_selection, _handle_feature_selection
+from pysr.sr import (
+    run_feature_selection,
+    _handle_feature_selection,
+    _csv_filename_to_pkl_filename,
+)
 from sklearn.utils.estimator_checks import check_estimator
 import sympy
 import pandas as pd
@@ -341,6 +345,7 @@ def test_load_model_simple(self):
             if os.path.exists(file_to_delete):
                 os.remove(file_to_delete)
 
+        pickle_file = rand_dir / "equations.pkl"
         model3 = load(
             model.equation_file_, extra_sympy_mappings={"sq": lambda x: x**2}
         )
@@ -430,6 +435,20 @@ def test_feature_selection_handler(self):
 class TestMiscellaneous(unittest.TestCase):
     """Test miscellaneous functions."""
 
+    def test_csv_to_pkl_conversion(self):
+        """Test that csv filename to pkl filename works as expected."""
+        tmpdir = Path(tempfile.mkdtemp())
+        equation_file = tmpdir / "equations.389479384.28378374.csv"
+        expected_pkl_file = tmpdir / "equations.389479384.28378374.pkl"
+
+        # First, test inputting the paths:
+        test_pkl_file = _csv_filename_to_pkl_filename(equation_file)
+        self.assertEqual(test_pkl_file, str(expected_pkl_file))
+
+        # Next, test inputting the strings.
+        test_pkl_file = _csv_filename_to_pkl_filename(str(equation_file))
+        self.assertEqual(test_pkl_file, str(expected_pkl_file))
+
     def test_deprecation(self):
         """Ensure that deprecation works as expected.
 

From a6bed2c01177ba435c141e1cd540409c3d3e34ec Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Thu, 4 Aug 2022 18:25:42 -0400
Subject: [PATCH 24/40] Fix bug with inplace editing of equation_file_contents_

---
 pysr/sr.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pysr/sr.py b/pysr/sr.py
index d1a209cd6..3a5415554 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -1,3 +1,4 @@
+import copy
 import os
 import sys
 import numpy as np
@@ -1928,7 +1929,9 @@ def get_hof(self):
 
         ret_outputs = []
 
-        for output in self.equation_file_contents_:
+        equation_file_contents = copy.deepcopy(self.equation_file_contents_)
+
+        for output in equation_file_contents:
 
             scores = []
             lastMSE = None

From f5577eac29e49fe913ce12a214b88cba787f2e6d Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Thu, 4 Aug 2022 18:57:30 -0400
Subject: [PATCH 25/40] Reduce precision of tests

---
 test/test.py     | 6 +++---
 test/test_jax.py | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/test.py b/test/test.py
index dd07c601f..dd1ece29f 100644
--- a/test/test.py
+++ b/test/test.py
@@ -140,7 +140,7 @@ def test_multioutput_weighted_with_callable_temp_equation(self):
         # These tests are flaky, so don't fail test:
         try:
             np.testing.assert_almost_equal(
-                model.predict(X.copy())[:, 0], X[:, 0] ** 2, decimal=4
+                model.predict(X.copy())[:, 0], X[:, 0] ** 2, decimal=3
             )
         except AssertionError:
             print("Error in test_multioutput_weighted_with_callable_temp_equation")
@@ -149,7 +149,7 @@ def test_multioutput_weighted_with_callable_temp_equation(self):
 
         try:
             np.testing.assert_almost_equal(
-                model.predict(X.copy())[:, 1], X[:, 1] ** 2, decimal=4
+                model.predict(X.copy())[:, 1], X[:, 1] ** 2, decimal=3
             )
         except AssertionError:
             print("Error in test_multioutput_weighted_with_callable_temp_equation")
@@ -401,7 +401,7 @@ def test_best_lambda(self):
         X = self.X
         y = self.y
         for f in [self.model.predict, self.equations_.iloc[-1]["lambda_format"]]:
-            np.testing.assert_almost_equal(f(X), y, decimal=4)
+            np.testing.assert_almost_equal(f(X), y, decimal=3)
 
 
 class TestFeatureSelection(unittest.TestCase):
diff --git a/test/test_jax.py b/test/test_jax.py
index 58d1a6067..e885a8d3b 100644
--- a/test/test_jax.py
+++ b/test/test_jax.py
@@ -76,7 +76,7 @@ def test_pipeline(self):
         np.testing.assert_almost_equal(
             np.array(jformat["callable"](jnp.array(X), jformat["parameters"])),
             np.square(np.cos(X[:, 1])),  # Select feature 1
-            decimal=4,
+            decimal=3,
         )
 
     def test_feature_selection_custom_operators(self):

From 34f4e3f83fb1f1dd691ad5b57572bf1e7673125e Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Fri, 5 Aug 2022 00:22:07 -0400
Subject: [PATCH 26/40] Change model load to classmethod

---
 pysr/__init__.py |   1 -
 pysr/sr.py       | 217 ++++++++++++++++++++++++-----------------------
 test/test.py     |   8 +-
 3 files changed, 117 insertions(+), 109 deletions(-)

diff --git a/pysr/__init__.py b/pysr/__init__.py
index 210e85cb7..e303becb2 100644
--- a/pysr/__init__.py
+++ b/pysr/__init__.py
@@ -6,7 +6,6 @@
     best_tex,
     best_callable,
     best_row,
-    load,
 )
 from .julia_helpers import install
 from .feynman_problems import Problem, FeynmanProblem
diff --git a/pysr/sr.py b/pysr/sr.py
index 3a5415554..e98d36b67 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -810,6 +810,119 @@ def __init__(
                         f"{k} is not a valid keyword argument for PySRRegressor."
                     )
 
+    @classmethod
+    def from_file(
+        cls,
+        equation_file,
+        *,
+        binary_operators=None,
+        unary_operators=None,
+        n_features_in=None,
+        feature_names_in=None,
+        selection_mask=None,
+        nout=1,
+        **pysr_kwargs,
+    ):
+        """
+        Create a model from a saved model checkpoint or equation file.
+
+        Parameters
+        ----------
+        equation_file : str
+            Path to a pickle file containing a saved model, or a csv file
+            containing equations.
+
+        binary_operators : list[str]
+            The same binary operators used when creating the model.
+            Not needed if loading from a pickle file.
+
+        unary_operators : list[str]
+            The same unary operators used when creating the model.
+            Not needed if loading from a pickle file.
+
+        n_features_in : int
+            Number of features passed to the model.
+            Not needed if loading from a pickle file.
+
+        feature_names_in : list[str]
+            Names of the features passed to the model.
+            Not needed if loading from a pickle file.
+
+        selection_mask : list[bool]
+            If using select_k_features, you must pass `model.selection_mask_` here.
+            Not needed if loading from a pickle file.
+
+        nout : int, default=1
+            Number of outputs of the model.
+            Not needed if loading from a pickle file.
+
+        pysr_kwargs : dict
+            Any other keyword arguments to initialize the PySRRegressor object.
+            These will overwrite those stored in the pickle file.
+            Not needed if loading from a pickle file.
+
+        Returns
+        -------
+        model : PySRRegressor
+            The model with fitted equations.
+        """
+        if os.path.splitext(equation_file)[1] != ".pkl":
+            pkl_filename = _csv_filename_to_pkl_filename(equation_file)
+        else:
+            pkl_filename = equation_file
+
+        # Try to load model from <equation_file>.pkl
+        print(f"Checking if {pkl_filename} exists...")
+        if os.path.exists(pkl_filename):
+            print(f"Loading model from {pkl_filename}")
+            assert binary_operators is None
+            assert unary_operators is None
+            assert n_features_in is None
+            with open(pkl_filename, "rb") as f:
+                model = pkl.load(f)
+            # Update any parameters if necessary, such as
+            # extra_sympy_mappings:
+            model.set_params(**pysr_kwargs)
+            if "equations_" not in model.__dict__ or model.equations_ is None:
+                model.refresh()
+
+            return model
+
+        # Else, we re-create it.
+        print(
+            f"{equation_file} does not exist, "
+            "so we must create the model from scratch."
+        )
+        assert binary_operators is not None
+        assert unary_operators is not None
+        assert n_features_in is not None
+
+        # TODO: copy .bkup file if exists.
+        model = cls(
+            equation_file=equation_file,
+            binary_operators=binary_operators,
+            unary_operators=unary_operators,
+            **pysr_kwargs,
+        )
+
+        model.nout_ = nout
+        model.n_features_in_ = n_features_in
+
+        if feature_names_in is None:
+            model.feature_names_in_ = [f"x{i}" for i in range(n_features_in)]
+        else:
+            assert len(feature_names_in) == n_features_in
+            model.feature_names_in_ = feature_names_in
+
+        if selection_mask is None:
+            model.selection_mask_ = np.ones(n_features_in, dtype=bool)
+        else:
+            model.selection_mask_ = selection_mask
+
+        model.refresh(checkpoint_file=equation_file)
+
+        return model
+
     def __repr__(self):
         """
         Prints all current equations fitted by the model.
@@ -2091,107 +2204,3 @@ def _csv_filename_to_pkl_filename(csv_filename) -> str:
     pkl_basename = base + ".pkl"
 
     return os.path.join(dirname, pkl_basename)
-
-
-def load(
-    equation_file,
-    *,
-    binary_operators=None,
-    unary_operators=None,
-    n_features_in=None,
-    feature_names_in=None,
-    selection_mask=None,
-    nout=1,
-    **pysr_kwargs,
-):
-    """
-    Create a model from equations stored as a csv file
-
-    Parameters
-    ----------
-    equation_file : str
-        Path to a csv file containing equations, or a pickle file
-        containing the model.
-
-    binary_operators : list[str], default=["+", "-", "*", "/"]
-        The same binary operators used when creating the model.
-
-    unary_operators : list[str], default=None
-        The same unary operators used when creating the model.
-
-    n_features_in : int
-        Number of features passed to the model.
-
-    feature_names_in : list[str], default=None
-        Names of the features passed to the model.
-
-    selection_mask : list[bool], default=None
-        If using select_k_features, you must pass `model.selection_mask_` here.
-
-    nout : int, default=1
-        Number of outputs of the model.
-
-    pysr_kwargs : dict
-        Any other keyword arguments to initialize the PySRRegressor object.
-        These will overwrite those stored in the pickle file.
-
-    Returns
-    -------
-    model : PySRRegressor
-        The model with fitted equations.
-    """
-    if os.path.splitext(equation_file)[1] != ".pkl":
-        pkl_filename = _csv_filename_to_pkl_filename(equation_file)
-    else:
-        pkl_filename = equation_file
-
-    # Try to load model from <equation_file>.pkl
-    print(f"Checking if {pkl_filename} exists...")
-    if os.path.exists(pkl_filename):
-        print(f"Loading model from {pkl_filename}")
-        assert binary_operators is None
-        assert unary_operators is None
-        assert n_features_in is None
-        with open(pkl_filename, "rb") as f:
-            model = pkl.load(f)
-        # Update any parameters if necessary, such as
-        # extra_sympy_mappings:
-        model.set_params(**pysr_kwargs)
-        if "equations_" not in model.__dict__ or model.equations_ is None:
-            model.refresh()
-
-        return model
-
-    # Else, we re-create it.
-    print(
-        f"{equation_file} does not exist, " "so we must create the model from scratch."
-    )
-    assert binary_operators is not None
-    assert unary_operators is not None
-    assert n_features_in is not None
-
-    # TODO: copy .bkup file if exists.
-    model = PySRRegressor(
-        equation_file=equation_file,
-        binary_operators=binary_operators,
-        unary_operators=unary_operators,
-        **pysr_kwargs,
-    )
-
-    model.nout_ = nout
-    model.n_features_in_ = n_features_in
-
-    if feature_names_in is None:
-        model.feature_names_in_ = [f"x{i}" for i in range(n_features_in)]
-    else:
-        assert len(feature_names_in) == n_features_in
-        model.feature_names_in_ = feature_names_in
-
-    if selection_mask is None:
-        model.selection_mask_ = np.ones(n_features_in, dtype=bool)
-    else:
-        model.selection_mask_ = selection_mask
-
-    model.refresh(checkpoint_file=equation_file)
-
-    return model
diff --git a/test/test.py b/test/test.py
index dd1ece29f..fcde9ff9e 100644
--- a/test/test.py
+++ b/test/test.py
@@ -4,7 +4,7 @@
 import unittest
 import numpy as np
 from sklearn import model_selection
-from pysr import PySRRegressor, load
+from pysr import PySRRegressor
 from pysr.sr import (
     run_feature_selection,
     _handle_feature_selection,
@@ -300,7 +300,7 @@ def test_load_model(self):
             equation_filename = str(rand_dir / "equation.csv")
             with open(equation_filename + (".bkup" if from_backup else ""), "w") as f:
                 f.write(csv_file_data)
-            model = load(
+            model = PySRRegressor.from_file(
                 equation_filename,
                 n_features_in=5,
                 feature_names_in=["f0", "f1", "f2", "f3", "f4"],
@@ -334,7 +334,7 @@ def test_load_model_simple(self):
 
         # lambda functions are removed from the pickling, so we need
         # to pass it during the loading:
-        model2 = load(
+        model2 = PySRRegressor.from_file(
             model.equation_file_, extra_sympy_mappings={"sq": lambda x: x**2}
         )
 
@@ -346,7 +346,7 @@ def test_load_model_simple(self):
                 os.remove(file_to_delete)
 
         pickle_file = rand_dir / "equations.pkl"
-        model3 = load(
+        model3 = PySRRegressor.from_file(
             model.equation_file_, extra_sympy_mappings={"sq": lambda x: x**2}
         )
         np.testing.assert_allclose(model.predict(self.X), model3.predict(self.X))

From 07217e13f141a25acf9b50e793ac84f399a45ecc Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Fri, 5 Aug 2022 00:26:20 -0400
Subject: [PATCH 27/40] Add assertion for csv filename

---
 pysr/sr.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pysr/sr.py b/pysr/sr.py
index e98d36b67..198867f15 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -2197,6 +2197,8 @@ def run_feature_selection(X, y, select_k_features, random_state=None):
 
 def _csv_filename_to_pkl_filename(csv_filename) -> str:
     # Assume that the csv filename is of the form "foo.csv"
+    assert csv_filename.endswith(".csv")
+
     dirname = str(os.path.dirname(csv_filename))
     basename = str(os.path.basename(csv_filename))
     base = str(os.path.splitext(basename)[0])

From f5a5c8e7b1a58651532a7d8dd2d0322f949ccce0 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Fri, 5 Aug 2022 16:16:04 -0400
Subject: [PATCH 28/40] Fix assertion on csv filenames

---
 pysr/sr.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pysr/sr.py b/pysr/sr.py
index 198867f15..8956d0960 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -2197,7 +2197,7 @@ def run_feature_selection(X, y, select_k_features, random_state=None):
 
 def _csv_filename_to_pkl_filename(csv_filename) -> str:
     # Assume that the csv filename is of the form "foo.csv"
-    assert csv_filename.endswith(".csv")
+    assert str(csv_filename).endswith(".csv")
 
     dirname = str(os.path.dirname(csv_filename))
     basename = str(os.path.basename(csv_filename))

From 9433a8315f8134697e259f5c881f5e3d15ad36bd Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Wed, 10 Aug 2022 10:30:24 +0300
Subject: [PATCH 29/40] Add README example for from_file

---
 README.md | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index a3630781c..fa620155d 100644
--- a/README.md
+++ b/README.md
@@ -162,7 +162,15 @@ This arrow in the `pick` column indicates which equation is currently selected b
 SymPy format (`sympy_format` - which you can also get with `model.sympy()`), and even JAX and PyTorch format 
 (both of which are differentiable - which you can get with `model.jax()` and `model.pytorch()`).
 
-Note that `PySRRegressor` stores the state of the last search, and will restart from where you left off the next time you call `.fit()`. This will cause problems if significant changes are made to the search parameters (like changing the operators). You can run `model.reset()` to reset the state.
+Note that `PySRRegressor` stores the state of the last search, and will restart from where you left off the next time you call `.fit()`, assuming you have set `warm_start=True`.
+This will cause problems if significant changes are made to the search parameters (like changing the operators). You can run `model.reset()` to reset the state.
+
+You will notice that PySR will save two files: `hall_of_fame...csv` and `hall_of_fame...pkl`.
+The csv file is a list of equations and their losses, and the pkl file is a saved state of the model.
+You may load the model from the `pkl` file with:
+```python
+model = PySRRegressor.from_file("hall_of_fame.2022-08-10_100832.281.pkl")
+``` 
 
 There are several other useful features such as denoising (e.g., `denoising=True`),
 feature selection (e.g., `select_k_features=3`).

From 87750391b3c312f7e5398e7c4d4940ab96574e08 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Wed, 10 Aug 2022 11:17:14 +0300
Subject: [PATCH 30/40] Add notes about model loading to options page

---
 docs/options.md | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/docs/options.md b/docs/options.md
index 942f7af7c..914d7490f 100644
--- a/docs/options.md
+++ b/docs/options.md
@@ -16,6 +16,7 @@ may find useful include:
 - LaTeX, SymPy
 - Callable exports: numpy, pytorch, jax
 - `loss`
+- Model loading
 
 These are described below
 
@@ -252,3 +253,20 @@ Can also uses these losses for weighted (weighted-average):
 model = PySRRegressor(..., weights=weights, loss="LPDistLoss{3}()")
 model.fit(..., weights=weights)
 ```
+
+## Model loading
+
+PySR will automatically save a pickle file of the model state
+when you call `model.fit`, once before the search starts,
+and again after the search finishes. The filename will
+have the same base name as the input file, but with a `.pkl` extension.
+You can load the saved model state with:
+```python
+model = PySRRegressor.from_file(pickle_filename)
+```
+If you have a long-running job and would like to load the model
+before completion, you can also do this. In this case, the model
+loading will use the `csv` file to load the equations, since the
+`csv` file is continually updated during the search. Once
+the search completes, the model including its equations will
+be saved to the pickle file, overwriting the existing version.
\ No newline at end of file

From 45bf2c2f0a433b802675fb52621338f79edca7b5 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Wed, 10 Aug 2022 11:44:29 +0300
Subject: [PATCH 31/40] Assume normal csv format in reads

---
 pysr/sr.py      | 8 ++++----
 pysr/version.py | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pysr/sr.py b/pysr/sr.py
index 34a1f5365..1a850bf1e 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -1969,12 +1969,12 @@ def _read_equation_file(self):
                     cur_filename = str(self.equation_file_) + f".out{i}" + ".bkup"
                     if not os.path.exists(cur_filename):
                         cur_filename = str(self.equation_file_) + f".out{i}"
-                    df = pd.read_csv(cur_filename, sep="|")
+                    df = pd.read_csv(cur_filename)
                     # Rename Complexity column to complexity:
                     df.rename(
                         columns={
                             "Complexity": "complexity",
-                            "MSE": "loss",
+                            "Loss": "loss",
                             "Equation": "equation",
                         },
                         inplace=True,
@@ -1985,11 +1985,11 @@ def _read_equation_file(self):
                 filename = str(self.equation_file_) + ".bkup"
                 if not os.path.exists(filename):
                     filename = str(self.equation_file_)
-                all_outputs = [pd.read_csv(filename, sep="|")]
+                all_outputs = [pd.read_csv(filename)]
                 all_outputs[-1].rename(
                     columns={
                         "Complexity": "complexity",
-                        "MSE": "loss",
+                        "Loss": "loss",
                         "Equation": "equation",
                     },
                     inplace=True,
diff --git a/pysr/version.py b/pysr/version.py
index 63083e35d..dfe483cc5 100644
--- a/pysr/version.py
+++ b/pysr/version.py
@@ -1,2 +1,2 @@
 __version__ = "0.9.5"
-__symbolic_regression_jl_version__ = "0.9.7"
+__symbolic_regression_jl_version__ = "0.10.0"

From 6bd5a10033db15ed9daebf79453607329621d0fc Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Wed, 10 Aug 2022 11:45:12 +0300
Subject: [PATCH 32/40] Update docs on expected csv file

---
 pysr/sr.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pysr/sr.py b/pysr/sr.py
index 1a850bf1e..8d535c77b 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -469,7 +469,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         Whether to use a progress bar instead of printing to stdout.
 
     equation_file : str, default=None
-        Where to save the files (.csv separated by |).
+        Where to save the files (with `.csv` extension).
 
     temp_equation_file : bool, default=False
         Whether to put the hall of fame file in the temp directory.

From 593c6741708b84f23705cb1748f072d07c5322c0 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Wed, 10 Aug 2022 11:46:10 +0300
Subject: [PATCH 33/40] Bump version with new csv format

---
 pysr/version.py    |  2 +-
 test/test.py       | 10 +++++-----
 test/test_jax.py   | 12 ++++++------
 test/test_torch.py | 18 +++++++++---------
 4 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/pysr/version.py b/pysr/version.py
index dfe483cc5..9429c9aa6 100644
--- a/pysr/version.py
+++ b/pysr/version.py
@@ -1,2 +1,2 @@
-__version__ = "0.9.5"
+__version__ = "0.10.0"
 __symbolic_regression_jl_version__ = "0.10.0"
diff --git a/test/test.py b/test/test.py
index fcde9ff9e..76fe22c87 100644
--- a/test/test.py
+++ b/test/test.py
@@ -288,10 +288,10 @@ def test_high_dim_selection_early_stop(self):
     def test_load_model(self):
         """See if we can load a ran model from the equation file."""
         csv_file_data = """
-        Complexity|MSE|Equation
-        1|0.19951081|1.9762075
-        3|0.12717344|(f0 + 1.4724599)
-        4|0.104823045|pow_abs(2.2683423, cos(f3))"""
+        Complexity,Loss,Equation
+        1,0.19951081,"1.9762075"
+        3,0.12717344,"(f0 + 1.4724599)"
+        4,0.104823045,"pow_abs(2.2683423, cos(f3))\""""
         # Strip the indents:
         csv_file_data = "\n".join([l.strip() for l in csv_file_data.split("\n")])
 
@@ -379,7 +379,7 @@ def setUp(self):
         self.model.selection_mask_ = None
         self.model.feature_names_in_ = np.array(["x0", "x1"], dtype=object)
         equations["complexity loss equation".split(" ")].to_csv(
-            "equation_file.csv.bkup", sep="|"
+            "equation_file.csv.bkup"
         )
 
         self.model.refresh()
diff --git a/test/test_jax.py b/test/test_jax.py
index e885a8d3b..eb649d4fd 100644
--- a/test/test_jax.py
+++ b/test/test_jax.py
@@ -34,13 +34,13 @@ def test_pipeline_pandas(self):
         equations = pd.DataFrame(
             {
                 "Equation": ["1.0", "cos(x1)", "square(cos(x1))"],
-                "MSE": [1.0, 0.1, 1e-5],
+                "Loss": [1.0, 0.1, 1e-5],
                 "Complexity": [1, 2, 3],
             }
         )
 
-        equations["Complexity MSE Equation".split(" ")].to_csv(
-            "equation_file.csv.bkup", sep="|"
+        equations["Complexity Loss Equation".split(" ")].to_csv(
+            "equation_file.csv.bkup"
         )
 
         model.refresh(checkpoint_file="equation_file.csv")
@@ -61,13 +61,13 @@ def test_pipeline(self):
         equations = pd.DataFrame(
             {
                 "Equation": ["1.0", "cos(x1)", "square(cos(x1))"],
-                "MSE": [1.0, 0.1, 1e-5],
+                "Loss": [1.0, 0.1, 1e-5],
                 "Complexity": [1, 2, 3],
             }
         )
 
-        equations["Complexity MSE Equation".split(" ")].to_csv(
-            "equation_file.csv.bkup", sep="|"
+        equations["Complexity Loss Equation".split(" ")].to_csv(
+            "equation_file.csv.bkup"
         )
 
         model.refresh(checkpoint_file="equation_file.csv")
diff --git a/test/test_torch.py b/test/test_torch.py
index 66fd2757f..c58dc1c96 100644
--- a/test/test_torch.py
+++ b/test/test_torch.py
@@ -49,13 +49,13 @@ def test_pipeline_pandas(self):
         equations = pd.DataFrame(
             {
                 "Equation": ["1.0", "cos(x1)", "square(cos(x1))"],
-                "MSE": [1.0, 0.1, 1e-5],
+                "Loss": [1.0, 0.1, 1e-5],
                 "Complexity": [1, 2, 3],
             }
         )
 
-        equations["Complexity MSE Equation".split(" ")].to_csv(
-            "equation_file.csv.bkup", sep="|"
+        equations["Complexity Loss Equation".split(" ")].to_csv(
+            "equation_file.csv.bkup"
         )
 
         model.refresh(checkpoint_file="equation_file.csv")
@@ -82,13 +82,13 @@ def test_pipeline(self):
         equations = pd.DataFrame(
             {
                 "Equation": ["1.0", "cos(x1)", "square(cos(x1))"],
-                "MSE": [1.0, 0.1, 1e-5],
+                "Loss": [1.0, 0.1, 1e-5],
                 "Complexity": [1, 2, 3],
             }
         )
 
-        equations["Complexity MSE Equation".split(" ")].to_csv(
-            "equation_file.csv.bkup", sep="|"
+        equations["Complexity Loss Equation".split(" ")].to_csv(
+            "equation_file.csv.bkup"
         )
 
         model.refresh(checkpoint_file="equation_file.csv")
@@ -133,13 +133,13 @@ def test_custom_operator(self):
         equations = pd.DataFrame(
             {
                 "Equation": ["1.0", "mycustomoperator(x1)"],
-                "MSE": [1.0, 0.1],
+                "Loss": [1.0, 0.1],
                 "Complexity": [1, 2],
             }
         )
 
-        equations["Complexity MSE Equation".split(" ")].to_csv(
-            "equation_file_custom_operator.csv.bkup", sep="|"
+        equations["Complexity Loss Equation".split(" ")].to_csv(
+            "equation_file_custom_operator.csv.bkup"
         )
 
         model.set_params(

From 9351408c08cef2751fa632e6d334dd7cd09f0d6f Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Wed, 10 Aug 2022 12:36:25 +0300
Subject: [PATCH 34/40] Change "best" model_selection to apply loss threshold

---
 pysr/sr.py | 63 ++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 40 insertions(+), 23 deletions(-)

diff --git a/pysr/sr.py b/pysr/sr.py
index 8d535c77b..bc6cb6d0e 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -205,10 +205,16 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
     Parameters
     ----------
     model_selection : str, default="best"
-        Model selection criterion. Can be 'accuracy' or 'best'.
-        `"accuracy"` selects the candidate model with the lowest loss
-        (highest accuracy). `"best"` selects the candidate model with
-        the lowest sum of normalized loss and complexity.
+        Model selection criterion. Can be 'accuracy', 'best', or 'score'.
+        - `"accuracy"` selects the candidate model with the lowest loss
+          (highest accuracy).
+        - `"score"` selects the candidate model with the highest score.
+          Score is defined as the derivative of the log-loss with
+          respect to complexity - if an expression has a much better 
+          oss at a slightly higher complexity, it is preferred.
+        - `"best"` selects the candidate model with the highest score
+          among expressions with a loss better than at least 1.5x the
+          most accurate model.
 
     binary_operators : list[str], default=["+", "-", "*", "/"]
         List of strings giving the binary operators in Julia's Base.
@@ -469,7 +475,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         Whether to use a progress bar instead of printing to stdout.
 
     equation_file : str, default=None
-        Where to save the files (with `.csv` extension).
+        Where to save the files (.csv extension).
 
     temp_equation_file : bool, default=False
         Whether to put the hall of fame file in the temp directory.
@@ -943,12 +949,7 @@ def __repr__(self):
 
         for i, equations in enumerate(all_equations):
             selected = ["" for _ in range(len(equations))]
-            if self.model_selection == "accuracy":
-                chosen_row = -1
-            elif self.model_selection == "best":
-                chosen_row = equations["score"].idxmax()
-            else:
-                raise NotImplementedError
+            chosen_row = idx_model_selection(equations, self.model_selection)
             selected[chosen_row] = ">>>>"
             repr_equations = pd.DataFrame(
                 dict(
@@ -1091,18 +1092,14 @@ def get_best(self, index=None):
                 return [eq.iloc[i] for eq, i in zip(self.equations_, index)]
             return self.equations_.iloc[index]
 
-        if self.model_selection == "accuracy":
-            if isinstance(self.equations_, list):
-                return [eq.iloc[-1] for eq in self.equations_]
-            return self.equations_.iloc[-1]
-        elif self.model_selection == "best":
-            if isinstance(self.equations_, list):
-                return [eq.iloc[eq["score"].idxmax()] for eq in self.equations_]
-            return self.equations_.iloc[self.equations_["score"].idxmax()]
-        else:
-            raise NotImplementedError(
-                f"{self.model_selection} is not a valid model selection strategy."
-            )
+        if isinstance(self.equations_, list):
+            return [
+                eq.iloc[idx_model_selection(eq, self.model_selection)]
+                for eq in self.equations_
+            ]
+        return self.equations_.iloc[
+            idx_model_selection(self.equations_, self.model_selection)
+        ]
 
     def _setup_equation_file(self):
         """
@@ -2149,6 +2146,26 @@ def get_hof(self):
         return ret_outputs[0]
 
 
+def idx_model_selection(equations: pd.DataFrame, model_selection: str) -> int:
+    """
+    Return the index of the selected expression, given a dataframe of
+    equations and a model selection.
+    """
+    if model_selection == "accuracy":
+        chosen_idx = equations["loss"].idxmin()
+    elif model_selection == "best":
+        threshold = 1.5 * equations["loss"].min()
+        filtered_equations = equations.query(f"loss < {threshold}")
+        chosen_idx = filtered_equations["score"].idxmax()
+    elif model_selection == "score":
+        chosen_idx = equations["score"].idxmax()
+    else:
+        raise NotImplementedError(
+            f"{model_selection} is not a valid model selection strategy."
+        )
+    return chosen_idx
+
+
 def _denoise(X, y, Xresampled=None, random_state=None):
     """Denoise the dataset using a Gaussian process"""
     from sklearn.gaussian_process import GaussianProcessRegressor

From a15823e310bb5be868748a069eb77de626401b65 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Wed, 10 Aug 2022 12:38:21 +0300
Subject: [PATCH 35/40] Reduce precision of JAX tests

---
 test/test_jax.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/test_jax.py b/test/test_jax.py
index eb649d4fd..6dccd5fe7 100644
--- a/test/test_jax.py
+++ b/test/test_jax.py
@@ -49,7 +49,7 @@ def test_pipeline_pandas(self):
         np.testing.assert_almost_equal(
             np.array(jformat["callable"](jnp.array(X), jformat["parameters"])),
             np.square(np.cos(X.values[:, 1])),  # Select feature 1
-            decimal=4,
+            decimal=3,
         )
 
     def test_pipeline(self):
@@ -110,5 +110,5 @@ def test_feature_selection_custom_operators(self):
         np_output = np_prediction(X.values)
         jax_output = jax_prediction(X.values)
 
-        np.testing.assert_almost_equal(y.values, np_output, decimal=4)
-        np.testing.assert_almost_equal(y.values, jax_output, decimal=4)
+        np.testing.assert_almost_equal(y.values, np_output, decimal=3)
+        np.testing.assert_almost_equal(y.values, jax_output, decimal=3)

From 74c500587cf31b51501c9692e987b38181e0335a Mon Sep 17 00:00:00 2001
From: "deepsource-autofix[bot]"
 <62050782+deepsource-autofix[bot]@users.noreply.github.com>
Date: Wed, 10 Aug 2022 09:41:50 +0000
Subject: [PATCH 36/40] Format code with black

---
 pysr/sr.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pysr/sr.py b/pysr/sr.py
index bc6cb6d0e..2572953b2 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -210,7 +210,7 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
           (highest accuracy).
         - `"score"` selects the candidate model with the highest score.
           Score is defined as the derivative of the log-loss with
-          respect to complexity - if an expression has a much better 
+          respect to complexity - if an expression has a much better
           oss at a slightly higher complexity, it is preferred.
         - `"best"` selects the candidate model with the highest score
           among expressions with a loss better than at least 1.5x the

From 8575fba66aad9ff98fedad3fcab11dcf33539e5a Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Wed, 10 Aug 2022 12:46:53 +0300
Subject: [PATCH 37/40] Improve docstring

---
 pysr/sr.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pysr/sr.py b/pysr/sr.py
index bc6cb6d0e..27c285ed2 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -205,7 +205,9 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
     Parameters
     ----------
     model_selection : str, default="best"
-        Model selection criterion. Can be 'accuracy', 'best', or 'score'.
+        Model selection criterion when selecting a final expression from
+        the list of best expression at each complexity.
+        Can be 'accuracy', 'best', or 'score'.
         - `"accuracy"` selects the candidate model with the lowest loss
           (highest accuracy).
         - `"score"` selects the candidate model with the highest score.

From ed5b70a1ee515f53dcfd0b5e147338a6e4712366 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Wed, 10 Aug 2022 14:08:02 +0300
Subject: [PATCH 38/40] Fix model selection for loss=0

---
 pysr/sr.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pysr/sr.py b/pysr/sr.py
index fdba4ce76..3f71b60bb 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -2157,7 +2157,7 @@ def idx_model_selection(equations: pd.DataFrame, model_selection: str) -> int:
         chosen_idx = equations["loss"].idxmin()
     elif model_selection == "best":
         threshold = 1.5 * equations["loss"].min()
-        filtered_equations = equations.query(f"loss < {threshold}")
+        filtered_equations = equations.query(f"loss <= {threshold}")
         chosen_idx = filtered_equations["score"].idxmax()
     elif model_selection == "score":
         chosen_idx = equations["score"].idxmax()

From 175b0245886720b58227fb4710cc90c17ddb944d Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Wed, 10 Aug 2022 14:23:00 +0300
Subject: [PATCH 39/40] Add unit-test for all selection strategies

---
 test/test.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/test/test.py b/test/test.py
index 76fe22c87..f50013cd3 100644
--- a/test/test.py
+++ b/test/test.py
@@ -9,6 +9,7 @@
     run_feature_selection,
     _handle_feature_selection,
     _csv_filename_to_pkl_filename,
+    idx_model_selection,
 )
 from sklearn.utils.estimator_checks import check_estimator
 import sympy
@@ -403,6 +404,20 @@ def test_best_lambda(self):
         for f in [self.model.predict, self.equations_.iloc[-1]["lambda_format"]]:
             np.testing.assert_almost_equal(f(X), y, decimal=3)
 
+    def test_all_selection_strategies(self):
+        equations = pd.DataFrame(
+            dict(
+                loss=[1.0, 0.1, 0.01, 0.001 * 1.4, 0.001],
+                score=[0.5, 1.0, 0.5, 0.5, 0.3],
+            )
+        )
+        idx_accuracy = idx_model_selection(equations, "accuracy")
+        self.assertEqual(idx_accuracy, 4)
+        idx_best = idx_model_selection(equations, "best")
+        self.assertEqual(idx_best, 3)
+        idx_score = idx_model_selection(equations, "score")
+        self.assertEqual(idx_score, 1)
+
 
 class TestFeatureSelection(unittest.TestCase):
     def setUp(self):

From 73d0a989bb10ece021a625cfbf706019d44b39ce Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Wed, 10 Aug 2022 14:55:53 +0300
Subject: [PATCH 40/40] Clean up docstring

---
 pysr/sr.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pysr/sr.py b/pysr/sr.py
index 3f71b60bb..6b8e2c613 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -211,9 +211,9 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         - `"accuracy"` selects the candidate model with the lowest loss
           (highest accuracy).
         - `"score"` selects the candidate model with the highest score.
-          Score is defined as the derivative of the log-loss with
+          Score is defined as the negated derivative of the log-loss with
           respect to complexity - if an expression has a much better
-          oss at a slightly higher complexity, it is preferred.
+          loss at a slightly higher complexity, it is preferred.
         - `"best"` selects the candidate model with the highest score
           among expressions with a loss better than at least 1.5x the
           most accurate model.