From 018c3a129eb0a4e7b9ea3c61082b8fce99191068 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sun, 28 Apr 2024 07:47:55 +0100
Subject: [PATCH 01/14] refactor: sympy typing issue

---
 pysr/export_sympy.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pysr/export_sympy.py b/pysr/export_sympy.py
index eeb50471..d5d9bba8 100644
--- a/pysr/export_sympy.py
+++ b/pysr/export_sympy.py
@@ -30,8 +30,8 @@
     "acosh": lambda x: sympy.acosh(x),
     "acosh_abs": lambda x: sympy.acosh(abs(x) + 1),
     "asinh": sympy.asinh,
-    "atanh": lambda x: sympy.atanh(sympy.Mod(x + 1, 2) - 1),
-    "atanh_clip": lambda x: sympy.atanh(sympy.Mod(x + 1, 2) - 1),
+    "atanh": lambda x: sympy.atanh(sympy.Mod(x + 1, 2) - sympy.S(1)),
+    "atanh_clip": lambda x: sympy.atanh(sympy.Mod(x + 1, 2) - sympy.S(1)),
     "abs": abs,
     "mod": sympy.Mod,
     "erf": sympy.erf,

From 4bcc28064ae1b5dd328bb21a7d7eb29b5e67ac76 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sun, 28 Apr 2024 07:48:20 +0100
Subject: [PATCH 02/14] refactor: create `ArrayLike` type for type checking

---
 pysr/utils.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/pysr/utils.py b/pysr/utils.py
index ca000aae..1b166642 100644
--- a/pysr/utils.py
+++ b/pysr/utils.py
@@ -1,10 +1,16 @@
 import os
 import re
+from pathlib import Path
+from typing import Any, List, TypeVar, Union
 
-from sklearn.utils.validation import _check_feature_names_in
+from numpy.typing import NDArray
+from sklearn.utils.validation import _check_feature_names_in  # type: ignore
 
+T = TypeVar("T", bound=Any)
+ArrayLike = Union[NDArray[T], List[T]]
 
-def _csv_filename_to_pkl_filename(csv_filename: str) -> str:
+
+def _csv_filename_to_pkl_filename(csv_filename: Union[str, Path]) -> Union[str, Path]:
     if os.path.splitext(csv_filename)[1] == ".pkl":
         return csv_filename
 

From cb76a8101f2cc1619d03bcf3baa6e828120c4cbd Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sun, 28 Apr 2024 07:48:39 +0100
Subject: [PATCH 03/14] refactor: declare julia as `Any` to avoid typing issues

---
 pysr/julia_import.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pysr/julia_import.py b/pysr/julia_import.py
index dc881bf2..50a58fa2 100644
--- a/pysr/julia_import.py
+++ b/pysr/julia_import.py
@@ -1,6 +1,7 @@
 import os
 import sys
 import warnings
+from typing import Any
 
 # Check if JuliaCall is already loaded, and if so, warn the user
 # about the relevant environment variables. If not loaded,
@@ -37,6 +38,9 @@
 
 from juliacall import Main as jl  # type: ignore
 
+jl: Any = jl  # type: ignore
+
+
 jl_version = (jl.VERSION.major, jl.VERSION.minor, jl.VERSION.patch)
 
 # Next, automatically load the juliacall extension if we're in a Jupyter notebook

From dca10d6fc6a9f3237095bdea2a363770b1152f01 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sun, 28 Apr 2024 07:49:17 +0100
Subject: [PATCH 04/14] fix: potential issue with non-standard random states

---
 pysr/sr.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pysr/sr.py b/pysr/sr.py
index cb3a05c6..e3fc8449 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -1848,9 +1848,6 @@ def fit(
             self.X_units_ = None
             self.y_units_ = None
 
-        random_state = check_random_state(self.random_state)  # For np random
-        seed = random_state.get_state()[1][0]  # For julia random
-
         self._setup_equation_file()
 
         mutated_params = self._validate_and_set_init_params()
@@ -1878,6 +1875,9 @@ def fit(
                 "More datapoints will lower the search speed."
             )
 
+        random_state = check_random_state(self.random_state)  # For np random
+        seed = random_state.randint(0, 2**32)  # For julia random
+
         # Pre transformations (feature selection and denoising)
         X, y, variable_names, X_units, y_units = self._pre_transform_training_data(
             X, y, Xresampled, variable_names, X_units, y_units, random_state

From 2bd7782a0363ad1a42aedf13f95d44c5c8d984ad Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sun, 28 Apr 2024 07:50:47 +0100
Subject: [PATCH 05/14] refactor: improved type inference in return values

---
 pysr/sr.py | 38 ++++++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/pysr/sr.py b/pysr/sr.py
index e3fc8449..a3f8ff9a 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -2006,11 +2006,13 @@ def predict(self, X, index=None):
         X = self._validate_data(X, reset=False)
 
         try:
-            if self.nout_ > 1:
+            if isinstance(best_equation, list):
+                assert self.nout_ > 1
                 return np.stack(
                     [eq["lambda_format"](X) for eq in best_equation], axis=1
                 )
-            return best_equation["lambda_format"](X)
+            else:
+                return best_equation["lambda_format"](X)
         except Exception as error:
             raise ValueError(
                 "Failed to evaluate the expression. "
@@ -2040,9 +2042,11 @@ def sympy(self, index=None):
         """
         self.refresh()
         best_equation = self.get_best(index=index)
-        if self.nout_ > 1:
+        if isinstance(best_equation, list):
+            assert self.nout_ > 1
             return [eq["sympy_format"] for eq in best_equation]
-        return best_equation["sympy_format"]
+        else:
+            return best_equation["sympy_format"]
 
     def latex(self, index=None, precision=3):
         """
@@ -2102,9 +2106,11 @@ def jax(self, index=None):
         self.set_params(output_jax_format=True)
         self.refresh()
         best_equation = self.get_best(index=index)
-        if self.nout_ > 1:
+        if isinstance(best_equation, list):
+            assert self.nout_ > 1
             return [eq["jax_format"] for eq in best_equation]
-        return best_equation["jax_format"]
+        else:
+            return best_equation["jax_format"]
 
     def pytorch(self, index=None):
         """
@@ -2132,9 +2138,10 @@ def pytorch(self, index=None):
         self.set_params(output_torch_format=True)
         self.refresh()
         best_equation = self.get_best(index=index)
-        if self.nout_ > 1:
+        if isinstance(best_equation, pd.Series):
+            return best_equation["torch_format"]
+        else:
             return [eq["torch_format"] for eq in best_equation]
-        return best_equation["torch_format"]
 
     def _read_equation_file(self):
         """Read the hall of fame file created by `SymbolicRegression.jl`."""
@@ -2233,10 +2240,8 @@ def get_hof(self):
             lastComplexity = 0
             sympy_format = []
             lambda_format = []
-            if self.output_jax_format:
-                jax_format = []
-            if self.output_torch_format:
-                torch_format = []
+            jax_format = []
+            torch_format = []
 
             for _, eqn_row in output.iterrows():
                 eqn = pysr2sympy(
@@ -2348,7 +2353,7 @@ def latex_table(
         """
         self.refresh()
 
-        if self.nout_ > 1:
+        if isinstance(self.equations_, list):
             if indices is not None:
                 assert isinstance(indices, list)
                 assert isinstance(indices[0], list)
@@ -2357,7 +2362,7 @@ def latex_table(
             table_string = sympy2multilatextable(
                 self.equations_, indices=indices, precision=precision, columns=columns
             )
-        else:
+        elif isinstance(self.equations_, pd.DataFrame):
             if indices is not None:
                 assert isinstance(indices, list)
                 assert isinstance(indices[0], int)
@@ -2365,6 +2370,11 @@ def latex_table(
             table_string = sympy2latextable(
                 self.equations_, indices=indices, precision=precision, columns=columns
             )
+        else:
+            raise ValueError(
+                "Invalid type for equations_ to pass to `latex_table`. "
+                "Expected a DataFrame or a list of DataFrames."
+            )
 
         preamble_string = [
             r"\usepackage{breqn}",

From 7909e90241a9ebb3b46ce1a0360f51718b1fa30b Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sun, 28 Apr 2024 07:52:32 +0100
Subject: [PATCH 06/14] refactor: more type declarations

---
 pysr/sr.py | 73 ++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 51 insertions(+), 22 deletions(-)

diff --git a/pysr/sr.py b/pysr/sr.py
index a3f8ff9a..fd870d3c 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -21,9 +21,12 @@
 
 import numpy as np
 import pandas as pd
+from numpy import ndarray
+from numpy.typing import NDArray
 from sklearn.base import BaseEstimator, MultiOutputMixin, RegressorMixin
 from sklearn.utils import check_array, check_consistent_length, check_random_state
-from sklearn.utils.validation import _check_feature_names_in, check_is_fitted
+from sklearn.utils.validation import _check_feature_names_in  # type: ignore
+from sklearn.utils.validation import check_is_fitted
 
 from .denoising import denoise, multi_denoise
 from .deprecated import DEPRECATED_KWARGS
@@ -179,6 +182,21 @@ def _check_assertions(
 
 
 class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
+    equations_: Optional[Union[pd.DataFrame, List[pd.DataFrame]]]
+    n_features_in_: int
+    feature_names_in_: ArrayLike[str]
+    display_feature_names_in_: ArrayLike[str]
+    X_units_: Optional[ArrayLike[str]]
+    y_units_: Optional[Union[str, ArrayLike[str]]]
+    nout_: int
+    selection_mask_: Optional[NDArray[np.bool_]]
+    tempdir_: Path
+    equation_file_: Union[str, Path]
+    julia_state_stream_: Optional[NDArray[np.uint8]]
+    julia_options_stream_: Optional[NDArray[np.uint8]]
+    equation_file_contents_: Optional[List[pd.DataFrame]]
+    show_pickle_warnings_: bool
+
     """
     High-performance symbolic regression algorithm.
 
@@ -603,22 +621,17 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
         Units of each variable in the training dataset, `y`.
     nout_ : int
         Number of output dimensions.
-    selection_mask_ : list[int] of length `select_k_features`
-        List of indices for input features that are selected when
-        `select_k_features` is set.
+    selection_mask_ : ndarray of shape (`n_features_in_`,)
+        Mask of which features of `X` to use when `select_k_features` is set.
     tempdir_ : Path
         Path to the temporary equations directory.
-    equation_file_ : str
+    equation_file_ : Union[str, Path]
         Output equation file name produced by the julia backend.
     julia_state_stream_ : ndarray
         The serialized state for the julia SymbolicRegression.jl backend (after fitting),
         stored as an array of uint8, produced by Julia's Serialization.serialize function.
-    julia_state_
-        The deserialized state.
     julia_options_stream_ : ndarray
         The serialized julia options, stored as an array of uint8,
-    julia_options_
-        The deserialized julia options.
     equation_file_contents_ : list[pandas.DataFrame]
         Contents of the equation file output by the Julia backend.
     show_pickle_warnings_ : bool
@@ -926,7 +939,7 @@ def from_file(
             Names of the features passed to the model.
             Not needed if loading from a pickle file.
         selection_mask : list[bool]
-            If using select_k_features, you must pass `model.selection_mask_` here.
+            If using `select_k_features`, you must pass `model.selection_mask_` here.
             Not needed if loading from a pickle file.
         nout : int
             Number of outputs of the model.
@@ -1124,10 +1137,12 @@ def equations(self):  # pragma: no cover
 
     @property
     def julia_options_(self):
+        """The deserialized julia options."""
         return jl_deserialize(self.julia_options_stream_)
 
     @property
     def julia_state_(self):
+        """The deserialized state."""
         return jl_deserialize(self.julia_state_stream_)
 
     @property
@@ -1140,7 +1155,7 @@ def raw_julia_state_(self):
         )
         return self.julia_state_
 
-    def get_best(self, index=None):
+    def get_best(self, index=None) -> Union[pd.Series, List[pd.Series]]:
         """
         Get best equation using `model_selection`.
 
@@ -1316,7 +1331,15 @@ def _validate_and_set_init_params(self):
 
     def _validate_and_set_fit_params(
         self, X, y, Xresampled, weights, variable_names, X_units, y_units
-    ):
+    ) -> Tuple[
+        ndarray,
+        ndarray,
+        Optional[ndarray],
+        Optional[ndarray],
+        ndarray,
+        Optional[ArrayLike[str]],
+        Optional[Union[str, ArrayLike[str]]],
+    ]:
         """
         Validate the parameters passed to the :term`fit` method.
 
@@ -1336,7 +1359,7 @@ def _validate_and_set_fit_params(
             Weight array of the same shape as `y`.
             Each element is how to weight the mean-square-error loss
             for that particular element of y.
-        variable_names : list[str] of length n_features
+        variable_names : ndarray of length n_features
             Names of each variable in the training dataset, `X`.
         X_units : list[str] of length n_features
             Units of each variable in the training dataset, `X`.
@@ -1392,7 +1415,7 @@ def _validate_and_set_fit_params(
         if weights is not None:
             weights = check_array(weights, ensure_2d=False)
             check_consistent_length(weights, y)
-        X, y = self._validate_data(X=X, y=y, reset=True, multi_output=True)
+        X, y = self._validate_data_X_y(X, y)
         self.feature_names_in_ = _safe_check_feature_names_in(
             self, variable_names, generate_names=False
         )
@@ -1402,10 +1425,10 @@ def _validate_and_set_fit_params(
             self.display_feature_names_in_ = np.array(
                 [f"x{_subscriptify(i)}" for i in range(X.shape[1])]
             )
+            variable_names = self.feature_names_in_
         else:
             self.display_feature_names_in_ = self.feature_names_in_
-
-        variable_names = self.feature_names_in_
+            variable_names = self.feature_names_in_
 
         # Handle multioutput data
         if len(y.shape) == 1 or (len(y.shape) == 2 and y.shape[1] == 1):
@@ -1420,6 +1443,12 @@ def _validate_and_set_fit_params(
 
         return X, y, Xresampled, weights, variable_names, X_units, y_units
 
+    def _validate_data_X_y(self, X, y) -> Tuple[ndarray, ndarray]:
+        return self._validate_data(X=X, y=y, reset=True, multi_output=True)  # type: ignore
+
+    def _validate_data_X(self, X) -> Tuple[ndarray]:
+        return self._validate_data(X=X, reset=False)  # type: ignore
+
     def _pre_transform_training_data(
         self, X, y, Xresampled, variable_names, X_units, y_units, random_state
     ):
@@ -1489,7 +1518,7 @@ def _pre_transform_training_data(
                 self.X_units_ = copy.deepcopy(X_units)
 
             # Re-perform data validation and feature name updating
-            X, y = self._validate_data(X=X, y=y, reset=True, multi_output=True)
+            X, y = self._validate_data_X_y(X, y)
             # Update feature names with selected variable names
             self.feature_names_in_ = _check_feature_names_in(self, variable_names)
             self.display_feature_names_in_ = self.feature_names_in_
@@ -1506,7 +1535,7 @@ def _pre_transform_training_data(
 
         return X, y, variable_names, X_units, y_units
 
-    def _run(self, X, y, mutated_params, weights, seed):
+    def _run(self, X, y, mutated_params, weights, seed: int):
         """
         Run the symbolic regression fitting process on the julia backend.
 
@@ -1784,9 +1813,9 @@ def fit(
         y,
         Xresampled=None,
         weights=None,
-        variable_names: Optional[List[str]] = None,
-        X_units: Optional[List[str]] = None,
-        y_units: Optional[List[str]] = None,
+        variable_names: Optional[ArrayLike[str]] = None,
+        X_units: Optional[ArrayLike[str]] = None,
+        y_units: Optional[Union[str, ArrayLike[str]]] = None,
     ) -> "PySRRegressor":
         """
         Search for equations to fit the dataset and store them in `self.equations_`.
@@ -2003,7 +2032,7 @@ def predict(self, X, index=None):
         # reordered/reindexed to match those of the transformed (denoised and
         # feature selected) X in fit.
         X = X.reindex(columns=self.feature_names_in_)
-        X = self._validate_data(X, reset=False)
+        X = self._validate_data_X(X)
 
         try:
             if isinstance(best_equation, list):

From 7113eed25ccf1a4af7712b87cfef366d98ac464f Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sun, 28 Apr 2024 07:53:14 +0100
Subject: [PATCH 07/14] style: use pandas indexing for return values

---
 pysr/sr.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/pysr/sr.py b/pysr/sr.py
index fd870d3c..98219fba 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -47,6 +47,7 @@
 )
 from .julia_import import SymbolicRegression, jl
 from .utils import (
+    ArrayLike,
     _csv_filename_to_pkl_filename,
     _preprocess_julia_floats,
     _safe_check_feature_names_in,
@@ -1037,7 +1038,7 @@ def __repr__(self):
             all_equations = equations
 
         for i, equations in enumerate(all_equations):
-            selected = ["" for _ in range(len(equations))]
+            selected = pd.Series([""] * len(equations), index=equations.index)
             chosen_row = idx_model_selection(equations, self.model_selection)
             selected[chosen_row] = ">>>>"
             repr_equations = pd.DataFrame(
@@ -1191,12 +1192,13 @@ def get_best(self, index=None) -> Union[pd.Series, List[pd.Series]]:
 
         if isinstance(self.equations_, list):
             return [
-                eq.iloc[idx_model_selection(eq, self.model_selection)]
+                eq.loc[idx_model_selection(eq, self.model_selection)]
                 for eq in self.equations_
             ]
-        return self.equations_.iloc[
-            idx_model_selection(self.equations_, self.model_selection)
-        ]
+        else:
+            return self.equations_.loc[
+                idx_model_selection(self.equations_, self.model_selection)
+            ]
 
     def _setup_equation_file(self):
         """

From 583beafc250c6ba4a6dccbc4f66f337492aa70d0 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sun, 28 Apr 2024 07:53:29 +0100
Subject: [PATCH 08/14] refactor: typings for sympy export

---
 pysr/export_sympy.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/pysr/export_sympy.py b/pysr/export_sympy.py
index d5d9bba8..314a2cbd 100644
--- a/pysr/export_sympy.py
+++ b/pysr/export_sympy.py
@@ -5,6 +5,8 @@
 import sympy
 from sympy import sympify
 
+from .utils import ArrayLike
+
 sympy_mappings = {
     "div": lambda x, y: x / y,
     "mult": lambda x, y: x * y,
@@ -60,13 +62,13 @@
 
 
 def create_sympy_symbols_map(
-    feature_names_in: List[str],
+    feature_names_in: ArrayLike[str],
 ) -> Dict[str, sympy.Symbol]:
     return {variable: sympy.Symbol(variable) for variable in feature_names_in}
 
 
 def create_sympy_symbols(
-    feature_names_in: List[str],
+    feature_names_in: ArrayLike[str],
 ) -> List[sympy.Symbol]:
     return [sympy.Symbol(variable) for variable in feature_names_in]
 
@@ -74,7 +76,7 @@ def create_sympy_symbols(
 def pysr2sympy(
     equation: str,
     *,
-    feature_names_in: Optional[List[str]] = None,
+    feature_names_in: Optional[ArrayLike[str]] = None,
     extra_sympy_mappings: Optional[Dict[str, Callable]] = None,
 ):
     if feature_names_in is None:

From c2379a14c8ff55ee77a9de0ce24a1e4478ffba96 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sun, 28 Apr 2024 07:54:21 +0100
Subject: [PATCH 09/14] build: add mypy to dev environment

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index f2c465e1..72cb167f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,4 +34,5 @@ profile = "black"
 dev-dependencies = [
     "pre-commit>=3.7.0",
     "ipython>=8.23.0",
+    "mypy>=1.10.0",
 ]

From 47ad683300c6ddd5817b4959e8dac1d345f68994 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sun, 28 Apr 2024 07:58:58 +0100
Subject: [PATCH 10/14] refactor: NDArray to ndarray for string type

---
 pysr/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pysr/utils.py b/pysr/utils.py
index 1b166642..91fdffc0 100644
--- a/pysr/utils.py
+++ b/pysr/utils.py
@@ -3,11 +3,11 @@
 from pathlib import Path
 from typing import Any, List, TypeVar, Union
 
-from numpy.typing import NDArray
+from numpy import ndarray
 from sklearn.utils.validation import _check_feature_names_in  # type: ignore
 
 T = TypeVar("T", bound=Any)
-ArrayLike = Union[NDArray[T], List[T]]
+ArrayLike = Union[ndarray, List[T]]
 
 
 def _csv_filename_to_pkl_filename(csv_filename: Union[str, Path]) -> Union[str, Path]:

From 04aa23c6e49f2f6758ff24667c556ede141b9b64 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sun, 28 Apr 2024 08:00:11 +0100
Subject: [PATCH 11/14] style: move attribute types to after docstring

---
 pysr/sr.py | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/pysr/sr.py b/pysr/sr.py
index 98219fba..0b218df5 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -183,21 +183,6 @@ def _check_assertions(
 
 
 class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
-    equations_: Optional[Union[pd.DataFrame, List[pd.DataFrame]]]
-    n_features_in_: int
-    feature_names_in_: ArrayLike[str]
-    display_feature_names_in_: ArrayLike[str]
-    X_units_: Optional[ArrayLike[str]]
-    y_units_: Optional[Union[str, ArrayLike[str]]]
-    nout_: int
-    selection_mask_: Optional[NDArray[np.bool_]]
-    tempdir_: Path
-    equation_file_: Union[str, Path]
-    julia_state_stream_: Optional[NDArray[np.uint8]]
-    julia_options_stream_: Optional[NDArray[np.uint8]]
-    equation_file_contents_: Optional[List[pd.DataFrame]]
-    show_pickle_warnings_: bool
-
     """
     High-performance symbolic regression algorithm.
 
@@ -679,6 +664,21 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
     ```
     """
 
+    equations_: Optional[Union[pd.DataFrame, List[pd.DataFrame]]]
+    n_features_in_: int
+    feature_names_in_: ArrayLike[str]
+    display_feature_names_in_: ArrayLike[str]
+    X_units_: Optional[ArrayLike[str]]
+    y_units_: Optional[Union[str, ArrayLike[str]]]
+    nout_: int
+    selection_mask_: Optional[NDArray[np.bool_]]
+    tempdir_: Path
+    equation_file_: Union[str, Path]
+    julia_state_stream_: Optional[NDArray[np.uint8]]
+    julia_options_stream_: Optional[NDArray[np.uint8]]
+    equation_file_contents_: Optional[List[pd.DataFrame]]
+    show_pickle_warnings_: bool
+
     def __init__(
         self,
         model_selection: Literal["best", "accuracy", "score"] = "best",

From a5eaab9fefaf6afd4eaf3df223deeeb0dc4e0a3d Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sun, 28 Apr 2024 08:08:56 +0100
Subject: [PATCH 12/14] refactor: help with type inference of `get_best`

---
 pysr/sr.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pysr/sr.py b/pysr/sr.py
index 0b218df5..48fd739f 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -2169,10 +2169,10 @@ def pytorch(self, index=None):
         self.set_params(output_torch_format=True)
         self.refresh()
         best_equation = self.get_best(index=index)
-        if isinstance(best_equation, pd.Series):
-            return best_equation["torch_format"]
-        else:
+        if isinstance(best_equation, list):
             return [eq["torch_format"] for eq in best_equation]
+        else:
+            return best_equation["torch_format"]
 
     def _read_equation_file(self):
         """Read the hall of fame file created by `SymbolicRegression.jl`."""

From 526d334020fc925284e94aeb4df997760853435a Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sun, 28 Apr 2024 08:29:15 +0100
Subject: [PATCH 13/14] fix: type inference issue in return value of get_best

---
 pysr/sr.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/pysr/sr.py b/pysr/sr.py
index 48fd739f..54382a24 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -1179,8 +1179,6 @@ def get_best(self, index=None) -> Union[pd.Series, List[pd.Series]]:
             Raised when an invalid model selection strategy is provided.
         """
         check_is_fitted(self, attributes=["equations_"])
-        if self.equations_ is None:
-            raise ValueError("No equations have been generated yet.")
 
         if index is not None:
             if isinstance(self.equations_, list):
@@ -1188,17 +1186,22 @@ def get_best(self, index=None) -> Union[pd.Series, List[pd.Series]]:
                     index, list
                 ), "With multiple output features, index must be a list."
                 return [eq.iloc[i] for eq, i in zip(self.equations_, index)]
-            return self.equations_.iloc[index]
+            elif isinstance(self.equations_, pd.DataFrame):
+                return self.equations_.iloc[index]
+            else:
+                raise ValueError("No equations have been generated yet.")
 
         if isinstance(self.equations_, list):
             return [
                 eq.loc[idx_model_selection(eq, self.model_selection)]
                 for eq in self.equations_
             ]
-        else:
+        elif isinstance(self.equations_, pd.DataFrame):
             return self.equations_.loc[
                 idx_model_selection(self.equations_, self.model_selection)
             ]
+        else:
+            raise ValueError("No equations have been generated yet.")
 
     def _setup_equation_file(self):
         """

From 96e5a0f2b7dec74f44278bb470be2e4d3b909508 Mon Sep 17 00:00:00 2001
From: MilesCranmer <miles.cranmer@gmail.com>
Date: Sun, 28 Apr 2024 08:42:28 +0100
Subject: [PATCH 14/14] fix: upper bound of randint for windows

---
 pysr/sr.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pysr/sr.py b/pysr/sr.py
index 54382a24..f1a254fd 100644
--- a/pysr/sr.py
+++ b/pysr/sr.py
@@ -1910,7 +1910,7 @@ def fit(
             )
 
         random_state = check_random_state(self.random_state)  # For np random
-        seed = random_state.randint(0, 2**32)  # For julia random
+        seed = random_state.randint(0, 2**31 - 1)  # For julia random
 
         # Pre transformations (feature selection and denoising)
         X, y, variable_names, X_units, y_units = self._pre_transform_training_data(
@@ -1962,7 +1962,7 @@ def fit(
 
         return self
 
-    def refresh(self, checkpoint_file=None):
+    def refresh(self, checkpoint_file=None) -> None:
         """
         Update self.equations_ with any new options passed.