From 3759dc82a54c203a647151ea609dbe2d5a1c0587 Mon Sep 17 00:00:00 2001 From: ZlaTanskY Date: Fri, 25 Nov 2022 15:52:55 +0100 Subject: [PATCH 1/2] fix: mutable objects copied locally --- cobra/preprocessing/preprocessor.py | 3 +++ tests/preprocessing/test_preprocessor.py | 31 ++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/cobra/preprocessing/preprocessor.py b/cobra/preprocessing/preprocessor.py index e03d352..f40ab5e 100644 --- a/cobra/preprocessing/preprocessor.py +++ b/cobra/preprocessing/preprocessor.py @@ -293,6 +293,9 @@ def transform(self, data: pd.DataFrame, continuous_vars: list, start = time.time() + # Ensure to operate on separate copy of data + data = data.copy() + if not self._is_fitted: msg = ("This {} instance is not fitted yet. Call 'fit' with " "appropriate arguments before using this method.") diff --git a/tests/preprocessing/test_preprocessor.py b/tests/preprocessing/test_preprocessor.py index 0dd6694..a21e03e 100644 --- a/tests/preprocessing/test_preprocessor.py +++ b/tests/preprocessing/test_preprocessor.py @@ -1,9 +1,11 @@ from contextlib import contextmanager from typing import Any +from unittest.mock import MagicMock import pytest import numpy as np import pandas as pd +from pytest_mock import MockerFixture from cobra.preprocessing.preprocessor import PreProcessor @@ -146,3 +148,32 @@ def test_get_variable_list(self, continuous_vars: list, discrete_vars) assert actual == expected + + @staticmethod + def mock_transform(df: pd.DataFrame, args): + """Mock the transform method.""" + df["new_column"] = "Hello World" + return df + + def test_mutable_train_data_fit_transform(self, mocker: MockerFixture): + """Test if the train_data input is not changed when performing fit_transform.""" + train_data = pd.DataFrame([[1, "2", 3], [10, "20", 30], [100, "200", 300]], columns=["foo", "bar", "baz"]) + preprocessor = PreProcessor.from_params( + model_type="classification", + n_bins=10, + weight= 0.8 + ) + preprocessor._categorical_data_processor = MagicMock() + preprocessor._categorical_data_processor.transform = self.mock_transform + preprocessor._discretizer = MagicMock() + preprocessor._discretizer.transform = self.mock_transform + preprocessor._target_encoder = MagicMock() + preprocessor._target_encoder.transform = self.mock_transform + + _ = preprocessor.fit_transform( + train_data, + continuous_vars=["foo"], + discrete_vars=["bar"], + target_column_name=["baz"] + ) + assert "new_column" not in train_data.columns From 1ffe9ed7b8ecf5a8592e7359369d8f0de69130a3 Mon Sep 17 00:00:00 2001 From: ZlaTanskY Date: Fri, 25 Nov 2022 15:56:40 +0100 Subject: [PATCH 2/2] chore: assert the actual result --- tests/preprocessing/test_preprocessor.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/preprocessing/test_preprocessor.py b/tests/preprocessing/test_preprocessor.py index a21e03e..7d4d46f 100644 --- a/tests/preprocessing/test_preprocessor.py +++ b/tests/preprocessing/test_preprocessor.py @@ -170,10 +170,11 @@ def test_mutable_train_data_fit_transform(self, mocker: MockerFixture): preprocessor._target_encoder = MagicMock() preprocessor._target_encoder.transform = self.mock_transform - _ = preprocessor.fit_transform( + result = preprocessor.fit_transform( train_data, continuous_vars=["foo"], discrete_vars=["bar"], target_column_name=["baz"] ) assert "new_column" not in train_data.columns + assert "new_column" in result.columns