From 018c3a129eb0a4e7b9ea3c61082b8fce99191068 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 28 Apr 2024 07:47:55 +0100 Subject: [PATCH 01/14] refactor: sympy typing issue --- pysr/export_sympy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pysr/export_sympy.py b/pysr/export_sympy.py index eeb50471..d5d9bba8 100644 --- a/pysr/export_sympy.py +++ b/pysr/export_sympy.py @@ -30,8 +30,8 @@ "acosh": lambda x: sympy.acosh(x), "acosh_abs": lambda x: sympy.acosh(abs(x) + 1), "asinh": sympy.asinh, - "atanh": lambda x: sympy.atanh(sympy.Mod(x + 1, 2) - 1), - "atanh_clip": lambda x: sympy.atanh(sympy.Mod(x + 1, 2) - 1), + "atanh": lambda x: sympy.atanh(sympy.Mod(x + 1, 2) - sympy.S(1)), + "atanh_clip": lambda x: sympy.atanh(sympy.Mod(x + 1, 2) - sympy.S(1)), "abs": abs, "mod": sympy.Mod, "erf": sympy.erf, From 4bcc28064ae1b5dd328bb21a7d7eb29b5e67ac76 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 28 Apr 2024 07:48:20 +0100 Subject: [PATCH 02/14] refactor: create `ArrayLike` type for type checking --- pysr/utils.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pysr/utils.py b/pysr/utils.py index ca000aae..1b166642 100644 --- a/pysr/utils.py +++ b/pysr/utils.py @@ -1,10 +1,16 @@ import os import re +from pathlib import Path +from typing import Any, List, TypeVar, Union -from sklearn.utils.validation import _check_feature_names_in +from numpy.typing import NDArray +from sklearn.utils.validation import _check_feature_names_in # type: ignore +T = TypeVar("T", bound=Any) +ArrayLike = Union[NDArray[T], List[T]] -def _csv_filename_to_pkl_filename(csv_filename: str) -> str: + +def _csv_filename_to_pkl_filename(csv_filename: Union[str, Path]) -> Union[str, Path]: if os.path.splitext(csv_filename)[1] == ".pkl": return csv_filename From cb76a8101f2cc1619d03bcf3baa6e828120c4cbd Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 28 Apr 2024 07:48:39 +0100 Subject: [PATCH 03/14] refactor: declare julia as `Any` to avoid typing issues --- pysr/julia_import.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pysr/julia_import.py b/pysr/julia_import.py index dc881bf2..50a58fa2 100644 --- a/pysr/julia_import.py +++ b/pysr/julia_import.py @@ -1,6 +1,7 @@ import os import sys import warnings +from typing import Any # Check if JuliaCall is already loaded, and if so, warn the user # about the relevant environment variables. If not loaded, @@ -37,6 +38,9 @@ from juliacall import Main as jl # type: ignore +jl: Any = jl # type: ignore + + jl_version = (jl.VERSION.major, jl.VERSION.minor, jl.VERSION.patch) # Next, automatically load the juliacall extension if we're in a Jupyter notebook From dca10d6fc6a9f3237095bdea2a363770b1152f01 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 28 Apr 2024 07:49:17 +0100 Subject: [PATCH 04/14] fix: potential issue with non-standard random states --- pysr/sr.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pysr/sr.py b/pysr/sr.py index cb3a05c6..e3fc8449 100644 --- a/pysr/sr.py +++ b/pysr/sr.py @@ -1848,9 +1848,6 @@ def fit( self.X_units_ = None self.y_units_ = None - random_state = check_random_state(self.random_state) # For np random - seed = random_state.get_state()[1][0] # For julia random - self._setup_equation_file() mutated_params = self._validate_and_set_init_params() @@ -1878,6 +1875,9 @@ def fit( "More datapoints will lower the search speed." ) + random_state = check_random_state(self.random_state) # For np random + seed = random_state.randint(0, 2**32) # For julia random + # Pre transformations (feature selection and denoising) X, y, variable_names, X_units, y_units = self._pre_transform_training_data( X, y, Xresampled, variable_names, X_units, y_units, random_state From 2bd7782a0363ad1a42aedf13f95d44c5c8d984ad Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 28 Apr 2024 07:50:47 +0100 Subject: [PATCH 05/14] refactor: improved type inference in return values --- pysr/sr.py | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/pysr/sr.py b/pysr/sr.py index e3fc8449..a3f8ff9a 100644 --- a/pysr/sr.py +++ b/pysr/sr.py @@ -2006,11 +2006,13 @@ def predict(self, X, index=None): X = self._validate_data(X, reset=False) try: - if self.nout_ > 1: + if isinstance(best_equation, list): + assert self.nout_ > 1 return np.stack( [eq["lambda_format"](X) for eq in best_equation], axis=1 ) - return best_equation["lambda_format"](X) + else: + return best_equation["lambda_format"](X) except Exception as error: raise ValueError( "Failed to evaluate the expression. " @@ -2040,9 +2042,11 @@ def sympy(self, index=None): """ self.refresh() best_equation = self.get_best(index=index) - if self.nout_ > 1: + if isinstance(best_equation, list): + assert self.nout_ > 1 return [eq["sympy_format"] for eq in best_equation] - return best_equation["sympy_format"] + else: + return best_equation["sympy_format"] def latex(self, index=None, precision=3): """ @@ -2102,9 +2106,11 @@ def jax(self, index=None): self.set_params(output_jax_format=True) self.refresh() best_equation = self.get_best(index=index) - if self.nout_ > 1: + if isinstance(best_equation, list): + assert self.nout_ > 1 return [eq["jax_format"] for eq in best_equation] - return best_equation["jax_format"] + else: + return best_equation["jax_format"] def pytorch(self, index=None): """ @@ -2132,9 +2138,10 @@ def pytorch(self, index=None): self.set_params(output_torch_format=True) self.refresh() best_equation = self.get_best(index=index) - if self.nout_ > 1: + if isinstance(best_equation, pd.Series): + return best_equation["torch_format"] + else: return [eq["torch_format"] for eq in best_equation] - return best_equation["torch_format"] def _read_equation_file(self): """Read the hall of fame file created by `SymbolicRegression.jl`.""" @@ -2233,10 +2240,8 @@ def get_hof(self): lastComplexity = 0 sympy_format = [] lambda_format = [] - if self.output_jax_format: - jax_format = [] - if self.output_torch_format: - torch_format = [] + jax_format = [] + torch_format = [] for _, eqn_row in output.iterrows(): eqn = pysr2sympy( @@ -2348,7 +2353,7 @@ def latex_table( """ self.refresh() - if self.nout_ > 1: + if isinstance(self.equations_, list): if indices is not None: assert isinstance(indices, list) assert isinstance(indices[0], list) @@ -2357,7 +2362,7 @@ def latex_table( table_string = sympy2multilatextable( self.equations_, indices=indices, precision=precision, columns=columns ) - else: + elif isinstance(self.equations_, pd.DataFrame): if indices is not None: assert isinstance(indices, list) assert isinstance(indices[0], int) @@ -2365,6 +2370,11 @@ def latex_table( table_string = sympy2latextable( self.equations_, indices=indices, precision=precision, columns=columns ) + else: + raise ValueError( + "Invalid type for equations_ to pass to `latex_table`. " + "Expected a DataFrame or a list of DataFrames." + ) preamble_string = [ r"\usepackage{breqn}", From 7909e90241a9ebb3b46ce1a0360f51718b1fa30b Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 28 Apr 2024 07:52:32 +0100 Subject: [PATCH 06/14] refactor: more type declarations --- pysr/sr.py | 73 ++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 51 insertions(+), 22 deletions(-) diff --git a/pysr/sr.py b/pysr/sr.py index a3f8ff9a..fd870d3c 100644 --- a/pysr/sr.py +++ b/pysr/sr.py @@ -21,9 +21,12 @@ import numpy as np import pandas as pd +from numpy import ndarray +from numpy.typing import NDArray from sklearn.base import BaseEstimator, MultiOutputMixin, RegressorMixin from sklearn.utils import check_array, check_consistent_length, check_random_state -from sklearn.utils.validation import _check_feature_names_in, check_is_fitted +from sklearn.utils.validation import _check_feature_names_in # type: ignore +from sklearn.utils.validation import check_is_fitted from .denoising import denoise, multi_denoise from .deprecated import DEPRECATED_KWARGS @@ -179,6 +182,21 @@ def _check_assertions( class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator): + equations_: Optional[Union[pd.DataFrame, List[pd.DataFrame]]] + n_features_in_: int + feature_names_in_: ArrayLike[str] + display_feature_names_in_: ArrayLike[str] + X_units_: Optional[ArrayLike[str]] + y_units_: Optional[Union[str, ArrayLike[str]]] + nout_: int + selection_mask_: Optional[NDArray[np.bool_]] + tempdir_: Path + equation_file_: Union[str, Path] + julia_state_stream_: Optional[NDArray[np.uint8]] + julia_options_stream_: Optional[NDArray[np.uint8]] + equation_file_contents_: Optional[List[pd.DataFrame]] + show_pickle_warnings_: bool + """ High-performance symbolic regression algorithm. @@ -603,22 +621,17 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator): Units of each variable in the training dataset, `y`. nout_ : int Number of output dimensions. - selection_mask_ : list[int] of length `select_k_features` - List of indices for input features that are selected when - `select_k_features` is set. + selection_mask_ : ndarray of shape (`n_features_in_`,) + Mask of which features of `X` to use when `select_k_features` is set. tempdir_ : Path Path to the temporary equations directory. - equation_file_ : str + equation_file_ : Union[str, Path] Output equation file name produced by the julia backend. julia_state_stream_ : ndarray The serialized state for the julia SymbolicRegression.jl backend (after fitting), stored as an array of uint8, produced by Julia's Serialization.serialize function. - julia_state_ - The deserialized state. julia_options_stream_ : ndarray The serialized julia options, stored as an array of uint8, - julia_options_ - The deserialized julia options. equation_file_contents_ : list[pandas.DataFrame] Contents of the equation file output by the Julia backend. show_pickle_warnings_ : bool @@ -926,7 +939,7 @@ def from_file( Names of the features passed to the model. Not needed if loading from a pickle file. selection_mask : list[bool] - If using select_k_features, you must pass `model.selection_mask_` here. + If using `select_k_features`, you must pass `model.selection_mask_` here. Not needed if loading from a pickle file. nout : int Number of outputs of the model. @@ -1124,10 +1137,12 @@ def equations(self): # pragma: no cover @property def julia_options_(self): + """The deserialized julia options.""" return jl_deserialize(self.julia_options_stream_) @property def julia_state_(self): + """The deserialized state.""" return jl_deserialize(self.julia_state_stream_) @property @@ -1140,7 +1155,7 @@ def raw_julia_state_(self): ) return self.julia_state_ - def get_best(self, index=None): + def get_best(self, index=None) -> Union[pd.Series, List[pd.Series]]: """ Get best equation using `model_selection`. @@ -1316,7 +1331,15 @@ def _validate_and_set_init_params(self): def _validate_and_set_fit_params( self, X, y, Xresampled, weights, variable_names, X_units, y_units - ): + ) -> Tuple[ + ndarray, + ndarray, + Optional[ndarray], + Optional[ndarray], + ndarray, + Optional[ArrayLike[str]], + Optional[Union[str, ArrayLike[str]]], + ]: """ Validate the parameters passed to the :term`fit` method. @@ -1336,7 +1359,7 @@ def _validate_and_set_fit_params( Weight array of the same shape as `y`. Each element is how to weight the mean-square-error loss for that particular element of y. - variable_names : list[str] of length n_features + variable_names : ndarray of length n_features Names of each variable in the training dataset, `X`. X_units : list[str] of length n_features Units of each variable in the training dataset, `X`. @@ -1392,7 +1415,7 @@ def _validate_and_set_fit_params( if weights is not None: weights = check_array(weights, ensure_2d=False) check_consistent_length(weights, y) - X, y = self._validate_data(X=X, y=y, reset=True, multi_output=True) + X, y = self._validate_data_X_y(X, y) self.feature_names_in_ = _safe_check_feature_names_in( self, variable_names, generate_names=False ) @@ -1402,10 +1425,10 @@ def _validate_and_set_fit_params( self.display_feature_names_in_ = np.array( [f"x{_subscriptify(i)}" for i in range(X.shape[1])] ) + variable_names = self.feature_names_in_ else: self.display_feature_names_in_ = self.feature_names_in_ - - variable_names = self.feature_names_in_ + variable_names = self.feature_names_in_ # Handle multioutput data if len(y.shape) == 1 or (len(y.shape) == 2 and y.shape[1] == 1): @@ -1420,6 +1443,12 @@ def _validate_and_set_fit_params( return X, y, Xresampled, weights, variable_names, X_units, y_units + def _validate_data_X_y(self, X, y) -> Tuple[ndarray, ndarray]: + return self._validate_data(X=X, y=y, reset=True, multi_output=True) # type: ignore + + def _validate_data_X(self, X) -> Tuple[ndarray]: + return self._validate_data(X=X, reset=False) # type: ignore + def _pre_transform_training_data( self, X, y, Xresampled, variable_names, X_units, y_units, random_state ): @@ -1489,7 +1518,7 @@ def _pre_transform_training_data( self.X_units_ = copy.deepcopy(X_units) # Re-perform data validation and feature name updating - X, y = self._validate_data(X=X, y=y, reset=True, multi_output=True) + X, y = self._validate_data_X_y(X, y) # Update feature names with selected variable names self.feature_names_in_ = _check_feature_names_in(self, variable_names) self.display_feature_names_in_ = self.feature_names_in_ @@ -1506,7 +1535,7 @@ def _pre_transform_training_data( return X, y, variable_names, X_units, y_units - def _run(self, X, y, mutated_params, weights, seed): + def _run(self, X, y, mutated_params, weights, seed: int): """ Run the symbolic regression fitting process on the julia backend. @@ -1784,9 +1813,9 @@ def fit( y, Xresampled=None, weights=None, - variable_names: Optional[List[str]] = None, - X_units: Optional[List[str]] = None, - y_units: Optional[List[str]] = None, + variable_names: Optional[ArrayLike[str]] = None, + X_units: Optional[ArrayLike[str]] = None, + y_units: Optional[Union[str, ArrayLike[str]]] = None, ) -> "PySRRegressor": """ Search for equations to fit the dataset and store them in `self.equations_`. @@ -2003,7 +2032,7 @@ def predict(self, X, index=None): # reordered/reindexed to match those of the transformed (denoised and # feature selected) X in fit. X = X.reindex(columns=self.feature_names_in_) - X = self._validate_data(X, reset=False) + X = self._validate_data_X(X) try: if isinstance(best_equation, list): From 7113eed25ccf1a4af7712b87cfef366d98ac464f Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 28 Apr 2024 07:53:14 +0100 Subject: [PATCH 07/14] style: use pandas indexing for return values --- pysr/sr.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pysr/sr.py b/pysr/sr.py index fd870d3c..98219fba 100644 --- a/pysr/sr.py +++ b/pysr/sr.py @@ -47,6 +47,7 @@ ) from .julia_import import SymbolicRegression, jl from .utils import ( + ArrayLike, _csv_filename_to_pkl_filename, _preprocess_julia_floats, _safe_check_feature_names_in, @@ -1037,7 +1038,7 @@ def __repr__(self): all_equations = equations for i, equations in enumerate(all_equations): - selected = ["" for _ in range(len(equations))] + selected = pd.Series([""] * len(equations), index=equations.index) chosen_row = idx_model_selection(equations, self.model_selection) selected[chosen_row] = ">>>>" repr_equations = pd.DataFrame( @@ -1191,12 +1192,13 @@ def get_best(self, index=None) -> Union[pd.Series, List[pd.Series]]: if isinstance(self.equations_, list): return [ - eq.iloc[idx_model_selection(eq, self.model_selection)] + eq.loc[idx_model_selection(eq, self.model_selection)] for eq in self.equations_ ] - return self.equations_.iloc[ - idx_model_selection(self.equations_, self.model_selection) - ] + else: + return self.equations_.loc[ + idx_model_selection(self.equations_, self.model_selection) + ] def _setup_equation_file(self): """ From 583beafc250c6ba4a6dccbc4f66f337492aa70d0 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 28 Apr 2024 07:53:29 +0100 Subject: [PATCH 08/14] refactor: typings for sympy export --- pysr/export_sympy.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pysr/export_sympy.py b/pysr/export_sympy.py index d5d9bba8..314a2cbd 100644 --- a/pysr/export_sympy.py +++ b/pysr/export_sympy.py @@ -5,6 +5,8 @@ import sympy from sympy import sympify +from .utils import ArrayLike + sympy_mappings = { "div": lambda x, y: x / y, "mult": lambda x, y: x * y, @@ -60,13 +62,13 @@ def create_sympy_symbols_map( - feature_names_in: List[str], + feature_names_in: ArrayLike[str], ) -> Dict[str, sympy.Symbol]: return {variable: sympy.Symbol(variable) for variable in feature_names_in} def create_sympy_symbols( - feature_names_in: List[str], + feature_names_in: ArrayLike[str], ) -> List[sympy.Symbol]: return [sympy.Symbol(variable) for variable in feature_names_in] @@ -74,7 +76,7 @@ def create_sympy_symbols( def pysr2sympy( equation: str, *, - feature_names_in: Optional[List[str]] = None, + feature_names_in: Optional[ArrayLike[str]] = None, extra_sympy_mappings: Optional[Dict[str, Callable]] = None, ): if feature_names_in is None: From c2379a14c8ff55ee77a9de0ce24a1e4478ffba96 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 28 Apr 2024 07:54:21 +0100 Subject: [PATCH 09/14] build: add mypy to dev environment --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index f2c465e1..72cb167f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,4 +34,5 @@ profile = "black" dev-dependencies = [ "pre-commit>=3.7.0", "ipython>=8.23.0", + "mypy>=1.10.0", ] From 47ad683300c6ddd5817b4959e8dac1d345f68994 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 28 Apr 2024 07:58:58 +0100 Subject: [PATCH 10/14] refactor: NDArray to ndarray for string type --- pysr/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pysr/utils.py b/pysr/utils.py index 1b166642..91fdffc0 100644 --- a/pysr/utils.py +++ b/pysr/utils.py @@ -3,11 +3,11 @@ from pathlib import Path from typing import Any, List, TypeVar, Union -from numpy.typing import NDArray +from numpy import ndarray from sklearn.utils.validation import _check_feature_names_in # type: ignore T = TypeVar("T", bound=Any) -ArrayLike = Union[NDArray[T], List[T]] +ArrayLike = Union[ndarray, List[T]] def _csv_filename_to_pkl_filename(csv_filename: Union[str, Path]) -> Union[str, Path]: From 04aa23c6e49f2f6758ff24667c556ede141b9b64 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 28 Apr 2024 08:00:11 +0100 Subject: [PATCH 11/14] style: move attribute types to after docstring --- pysr/sr.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/pysr/sr.py b/pysr/sr.py index 98219fba..0b218df5 100644 --- a/pysr/sr.py +++ b/pysr/sr.py @@ -183,21 +183,6 @@ def _check_assertions( class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator): - equations_: Optional[Union[pd.DataFrame, List[pd.DataFrame]]] - n_features_in_: int - feature_names_in_: ArrayLike[str] - display_feature_names_in_: ArrayLike[str] - X_units_: Optional[ArrayLike[str]] - y_units_: Optional[Union[str, ArrayLike[str]]] - nout_: int - selection_mask_: Optional[NDArray[np.bool_]] - tempdir_: Path - equation_file_: Union[str, Path] - julia_state_stream_: Optional[NDArray[np.uint8]] - julia_options_stream_: Optional[NDArray[np.uint8]] - equation_file_contents_: Optional[List[pd.DataFrame]] - show_pickle_warnings_: bool - """ High-performance symbolic regression algorithm. @@ -679,6 +664,21 @@ class PySRRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator): ``` """ + equations_: Optional[Union[pd.DataFrame, List[pd.DataFrame]]] + n_features_in_: int + feature_names_in_: ArrayLike[str] + display_feature_names_in_: ArrayLike[str] + X_units_: Optional[ArrayLike[str]] + y_units_: Optional[Union[str, ArrayLike[str]]] + nout_: int + selection_mask_: Optional[NDArray[np.bool_]] + tempdir_: Path + equation_file_: Union[str, Path] + julia_state_stream_: Optional[NDArray[np.uint8]] + julia_options_stream_: Optional[NDArray[np.uint8]] + equation_file_contents_: Optional[List[pd.DataFrame]] + show_pickle_warnings_: bool + def __init__( self, model_selection: Literal["best", "accuracy", "score"] = "best", From a5eaab9fefaf6afd4eaf3df223deeeb0dc4e0a3d Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 28 Apr 2024 08:08:56 +0100 Subject: [PATCH 12/14] refactor: help with type inference of `get_best` --- pysr/sr.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pysr/sr.py b/pysr/sr.py index 0b218df5..48fd739f 100644 --- a/pysr/sr.py +++ b/pysr/sr.py @@ -2169,10 +2169,10 @@ def pytorch(self, index=None): self.set_params(output_torch_format=True) self.refresh() best_equation = self.get_best(index=index) - if isinstance(best_equation, pd.Series): - return best_equation["torch_format"] - else: + if isinstance(best_equation, list): return [eq["torch_format"] for eq in best_equation] + else: + return best_equation["torch_format"] def _read_equation_file(self): """Read the hall of fame file created by `SymbolicRegression.jl`.""" From 526d334020fc925284e94aeb4df997760853435a Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 28 Apr 2024 08:29:15 +0100 Subject: [PATCH 13/14] fix: type inference issue in return value of get_best --- pysr/sr.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pysr/sr.py b/pysr/sr.py index 48fd739f..54382a24 100644 --- a/pysr/sr.py +++ b/pysr/sr.py @@ -1179,8 +1179,6 @@ def get_best(self, index=None) -> Union[pd.Series, List[pd.Series]]: Raised when an invalid model selection strategy is provided. """ check_is_fitted(self, attributes=["equations_"]) - if self.equations_ is None: - raise ValueError("No equations have been generated yet.") if index is not None: if isinstance(self.equations_, list): @@ -1188,17 +1186,22 @@ def get_best(self, index=None) -> Union[pd.Series, List[pd.Series]]: index, list ), "With multiple output features, index must be a list." return [eq.iloc[i] for eq, i in zip(self.equations_, index)] - return self.equations_.iloc[index] + elif isinstance(self.equations_, pd.DataFrame): + return self.equations_.iloc[index] + else: + raise ValueError("No equations have been generated yet.") if isinstance(self.equations_, list): return [ eq.loc[idx_model_selection(eq, self.model_selection)] for eq in self.equations_ ] - else: + elif isinstance(self.equations_, pd.DataFrame): return self.equations_.loc[ idx_model_selection(self.equations_, self.model_selection) ] + else: + raise ValueError("No equations have been generated yet.") def _setup_equation_file(self): """ From 96e5a0f2b7dec74f44278bb470be2e4d3b909508 Mon Sep 17 00:00:00 2001 From: MilesCranmer Date: Sun, 28 Apr 2024 08:42:28 +0100 Subject: [PATCH 14/14] fix: upper bound of randint for windows --- pysr/sr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pysr/sr.py b/pysr/sr.py index 54382a24..f1a254fd 100644 --- a/pysr/sr.py +++ b/pysr/sr.py @@ -1910,7 +1910,7 @@ def fit( ) random_state = check_random_state(self.random_state) # For np random - seed = random_state.randint(0, 2**32) # For julia random + seed = random_state.randint(0, 2**31 - 1) # For julia random # Pre transformations (feature selection and denoising) X, y, variable_names, X_units, y_units = self._pre_transform_training_data( @@ -1962,7 +1962,7 @@ def fit( return self - def refresh(self, checkpoint_file=None): + def refresh(self, checkpoint_file=None) -> None: """ Update self.equations_ with any new options passed.