Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix TypeError when a variable name matches a builtin python function #558

Merged
merged 7 commits into from
Mar 22, 2024
16 changes: 14 additions & 2 deletions pysr/export_sympy.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,17 +57,29 @@
}


def create_sympy_symbols_map(
feature_names_in: List[str],
) -> Dict[str, sympy.Symbol]:
return {variable: sympy.Symbol(variable) for variable in feature_names_in}


def create_sympy_symbols(
feature_names_in: List[str],
) -> List[sympy.Symbol]:
return [sympy.Symbol(variable) for variable in feature_names_in]


def pysr2sympy(
equation: str, *, extra_sympy_mappings: Optional[Dict[str, Callable]] = None
equation: str,
*,
feature_names_in: Optional[List[str]] = None,
extra_sympy_mappings: Optional[Dict[str, Callable]] = None,
):
if feature_names_in is None:
feature_names_in = []
local_sympy_mappings = {
**(extra_sympy_mappings if extra_sympy_mappings else {}),
**create_sympy_symbols_map(feature_names_in),
**(extra_sympy_mappings if extra_sympy_mappings is not None else {}),
**sympy_mappings,
}

Expand Down
1 change: 1 addition & 0 deletions pysr/sr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2226,6 +2226,7 @@ def get_hof(self):
for _, eqn_row in output.iterrows():
eqn = pysr2sympy(
eqn_row["equation"],
feature_names_in=self.feature_names_in_,
extra_sympy_mappings=self.extra_sympy_mappings,
)
sympy_format.append(eqn)
Expand Down
7 changes: 5 additions & 2 deletions pysr/test/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ def test_warm_start_set_at_init(self):
regressor = PySRRegressor(warm_start=True, max_evals=10)
regressor.fit(self.X, y)

def test_noisy(self):
def test_noisy_builtin_variable_names(self):
y = self.X[:, [0, 1]] ** 2 + self.rstate.randn(self.X.shape[0], 1) * 0.05
model = PySRRegressor(
# Test that passing a single operator works:
Expand All @@ -289,9 +289,12 @@ def test_noisy(self):
model.set_params(model_selection="best")
# Also try without a temp equation file:
model.set_params(temp_equation_file=False)
model.fit(self.X, y)
# We also test builtin variable names
model.fit(self.X, y, variable_names=["exec", "hash", "x3", "x4", "x5"])
self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
self.assertLessEqual(model.get_best()[1]["loss"], 1e-2)
self.assertIn("exec", model.latex()[0])
self.assertIn("hash", model.latex()[1])

def test_pandas_resample_with_nested_constraints(self):
X = pd.DataFrame(
Expand Down