handle floating point values more accurate #277

StrikerRUS · 2020-07-26T00:20:43Z

I run all tests with the increased test dataset fraction (0.6) and compared results with ones obtained from master but with changed dataset splitting routine to ensure that inputs are the same in both cases.

diff --git a/tests/e2e/test_e2e.py b/tests/e2e/test_e2e.py
index b2aacb5..2644547 100644
--- a/tests/e2e/test_e2e.py
+++ b/tests/e2e/test_e2e.py
@@ -38,7 +38,7 @@ CLASSIFICATION = pytest.mark.clf
 
 
 # Set of helper functions to make parametrization less verbose.
-def regression(model, test_fraction=0.02):
+def regression(model, test_fraction=0.6):
     return (
         model,
         utils.get_regression_model_trainer(test_fraction),
@@ -46,7 +46,7 @@ def regression(model, test_fraction=0.02):
     )
 
 
-def classification(model, test_fraction=0.02):
+def classification(model, test_fraction=0.6):
     return (
         model,
         utils.get_classification_model_trainer(test_fraction),
@@ -54,7 +54,7 @@ def classification(model, test_fraction=0.02):
     )
 
 
-def classification_binary(model, test_fraction=0.02):
+def classification_binary(model, test_fraction=0.6):
     return (
         model,
         utils.get_binary_classification_model_trainer(test_fraction),
@@ -62,7 +62,7 @@ def classification_binary(model, test_fraction=0.02):
     )
 
 
-def regression_random(model, test_fraction=0.02):
+def regression_random(model, test_fraction=0.6):
     return (
         model,
         utils.get_regression_random_data_model_trainer(test_fraction),
@@ -70,7 +70,7 @@ def regression_random(model, test_fraction=0.02):
     )
 
 
-def classification_random(model, test_fraction=0.02):
+def classification_random(model, test_fraction=0.6):
     return (
         model,
         utils.get_classification_random_data_model_trainer(test_fraction),
@@ -78,7 +78,7 @@ def classification_random(model, test_fraction=0.02):
     )
 
 
-def classification_binary_random(model, test_fraction=0.02):
+def classification_binary_random(model, test_fraction=0.6):
     return (
         model,
         utils.get_classification_binary_random_data_model_trainer(
@@ -87,7 +87,7 @@ def classification_binary_random(model, test_fraction=0.02):
     )
 
 
-def regression_bounded(model, test_fraction=0.02):
+def regression_bounded(model, test_fraction=0.6):
     return (
         model,
         utils.get_bounded_regression_model_trainer(test_fraction),
@@ -228,11 +228,11 @@ STATSMODELS_LINEAR_REGULARIZED_PARAMS = dict(method="elastic_net",
 
         # XGBoost (tree method "hist")
         regression(xgboost.XGBRegressor(**XGBOOST_HIST_PARAMS),
-                   test_fraction=0.2),
+                   test_fraction=0.6),
         classification(xgboost.XGBClassifier(**XGBOOST_HIST_PARAMS),
-                       test_fraction=0.2),
+                       test_fraction=0.6),
         classification_binary(xgboost.XGBClassifier(**XGBOOST_HIST_PARAMS),
-                              test_fraction=0.2),
+                              test_fraction=0.6),
 
         # XGBoost (LINEAR)
         regression(xgboost.XGBRegressor(**XGBOOST_PARAMS_LINEAR)),
diff --git a/tests/utils.py b/tests/utils.py
index 46bae2d..f72b1a3 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -14,7 +14,7 @@ from lightning.impl.base import BaseClassifier as LightBaseClassifier
 from sklearn import datasets
 from sklearn.base import BaseEstimator, RegressorMixin, clone
 from sklearn.ensemble._forest import ForestClassifier
-from sklearn.utils import shuffle
+from sklearn.model_selection import train_test_split
 from sklearn.linear_model._base import LinearClassifierMixin
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.svm import SVC, NuSVC
@@ -70,25 +70,17 @@ class ModelTrainer:
         np.random.seed(seed=7)
         if dataset_name == "boston":
             self.name = "train_model_regression"
-            dataset = datasets.load_boston()
-            self.X, self.y = shuffle(
-                dataset.data, dataset.target, random_state=13)
+            self.X, self.y = datasets.load_boston(True)
         elif dataset_name == "boston_y_bounded":
             self.name = "train_model_regression_bounded"
-            dataset = datasets.load_boston()
-            self.X, self.y = shuffle(
-                dataset.data, dataset.target, random_state=13)
+            self.X, self.y = datasets.load_boston(True)
             self.y = np.arctan(self.y) / np.pi + 0.5  # (0; 1)
         elif dataset_name == "iris":
             self.name = "train_model_classification"
-            dataset = datasets.load_iris()
-            self.X, self.y = shuffle(
-                dataset.data, dataset.target, random_state=13)
+            self.X, self.y = datasets.load_iris(True)
         elif dataset_name == "breast_cancer":
             self.name = "train_model_classification_binary"
-            dataset = datasets.load_breast_cancer()
-            self.X, self.y = shuffle(
-                dataset.data, dataset.target, random_state=13)
+            self.X, self.y = datasets.load_breast_cancer(True)
         elif dataset_name == "regression_rnd":
             self.name = "train_model_regression_random_data"
             N = 1000
@@ -107,9 +99,9 @@ class ModelTrainer:
         else:
             raise ValueError("Unknown dataset name: {}".format(dataset_name))
 
-        offset = int(self.X.shape[0] * (1 - test_fraction))
-        self.X_train, self.y_train = self.X[:offset], self.y[:offset]
-        self.X_test, self.y_test = self.X[offset:], self.y[offset:]
+        (self.X_train, self.X_test,
+         self.y_train, self.y_test) = train_test_split(
+            self.X, self.y, test_size=test_fraction, random_state=13)
 
     @classmethod
     def get_instance(cls, dataset_name, test_fraction=0.02):

Seems that at least it doesn't make things worse, but even decreases the number of failed tests sometimes.

coveralls · 2020-07-26T00:22:56Z

Coverage increased (+0.007%) to 96.536% when pulling aeb4301 on floats_improvement into eef38e7 on master.

StrikerRUS · 2020-07-26T00:27:48Z

m2cgen/assemblers/boosting.py

@@ -151,7 +151,7 @@ def __init__(self, model):

    def _assemble_tree(self, tree):
        if "leaf" in tree:
-            return ast.NumVal(tree["leaf"])
+            return ast.NumVal(tree["leaf"], dtype=np.float32)


https://github.com/dmlc/xgboost/blob/1d22a9be1cdeb53dfa9322c92541bc50e82f3c43/src/tree/tree_model.cc#L316

https://github.com/dmlc/xgboost/blob/1d22a9be1cdeb53dfa9322c92541bc50e82f3c43/include/xgboost/tree_model.h#L152-L155

https://github.com/dmlc/xgboost/blob/1d22a9be1cdeb53dfa9322c92541bc50e82f3c43/include/xgboost/base.h#L110-L111

Interestingly that weight and bias are also float internally:

https://github.com/dmlc/xgboost/blob/1d22a9be1cdeb53dfa9322c92541bc50e82f3c43/src/gbm/gblinear_model.h#L81-L82

https://github.com/dmlc/xgboost/blob/1d22a9be1cdeb53dfa9322c92541bc50e82f3c43/src/gbm/gblinear_model.h#L90-L91

But at Python side they are loaded into double numpy array:
https://github.com/dmlc/xgboost/blob/12110c900eff0aaa06045ecf717e6c5a36a164d5/python-package/xgboost/sklearn.py#L717-L718

https://github.com/dmlc/xgboost/blob/12110c900eff0aaa06045ecf717e6c5a36a164d5/python-package/xgboost/sklearn.py#L748

StrikerRUS · 2020-07-26T00:35:20Z

m2cgen/assemblers/tree.py

-        # all thresholds into float32.
-        threshold_num_val = ast.NumVal(threshold, dtype=np.float32)
-
+        threshold_num_val = ast.NumVal(self._tree.threshold[node_id])


Refer to #190 (review).

Now threshold matches original type in scikit-learn (double).

StrikerRUS · 2020-07-26T00:37:16Z

m2cgen/interpreters/utils.py

+
+
+def format_float(value):
+    return np.format_float_positional(value, unique=True, trim="0")


Maybe format_float_scientific will be better: https://numpy.org/doc/stable/reference/generated/numpy.format_float_scientific.html. But I'm not sure how many languages support scientific notation.

StrikerRUS · 2020-07-26T00:42:33Z

tests/e2e/test_e2e.py

+            y_pred_executed = np.array(
+                y_pred_executed, dtype=y_pred_true.dtype, copy=False)


Quite often different packages not only cast input values in predict method, but also return result with different types. For instance, XGBoost always returns float: https://github.com/dmlc/xgboost/blob/12110c900eff0aaa06045ecf717e6c5a36a164d5/python-package/xgboost/core.py#L1373

StrikerRUS · 2020-07-26T00:54:34Z

tests/utils.py

+        if isinstance(estimator, (BaseDecisionTree, BaseForest)):
+            self.X_test = self.X_test.astype(np.float32, copy=False)


BaseDecisionTree

https://github.com/scikit-learn/scikit-learn/blob/4a60ec129d3088d095b30cf54a670fd596ca4cc8/sklearn/tree/_classes.py#L410-L413

https://github.com/scikit-learn/scikit-learn/blob/4a60ec129d3088d095b30cf54a670fd596ca4cc8/sklearn/tree/_classes.py#L386

https://github.com/scikit-learn/scikit-learn/blob/4a60ec129d3088d095b30cf54a670fd596ca4cc8/sklearn/tree/_classes.py#L60

https://github.com/scikit-learn/scikit-learn/blob/4a60ec129d3088d095b30cf54a670fd596ca4cc8/sklearn/tree/_tree.pyx#L53

BaseForest

https://github.com/scikit-learn/scikit-learn/blob/4a60ec129d3088d095b30cf54a670fd596ca4cc8/sklearn/ensemble/_forest.py#L659-L662

https://github.com/scikit-learn/scikit-learn/blob/4a60ec129d3088d095b30cf54a670fd596ca4cc8/sklearn/ensemble/_forest.py#L673

https://github.com/scikit-learn/scikit-learn/blob/4a60ec129d3088d095b30cf54a670fd596ca4cc8/sklearn/ensemble/_forest.py#L421 (same as for BaseDecisionTree)

Interestingly that XGBoost also cast all inputs to float during predict.

https://github.com/dmlc/xgboost/blob/12110c900eff0aaa06045ecf717e6c5a36a164d5/python-package/xgboost/sklearn.py#L595-L596

https://github.com/dmlc/xgboost/blob/12110c900eff0aaa06045ecf717e6c5a36a164d5/python-package/xgboost/core.py#L433-L438

https://github.com/dmlc/xgboost/blob/12110c900eff0aaa06045ecf717e6c5a36a164d5/python-package/xgboost/data.py#L493-L495

https://github.com/dmlc/xgboost/blob/12110c900eff0aaa06045ecf717e6c5a36a164d5/python-package/xgboost/data.py#L135

https://github.com/dmlc/xgboost/blob/12110c900eff0aaa06045ecf717e6c5a36a164d5/python-package/xgboost/data.py#L115-L116

https://github.com/dmlc/xgboost/blob/12110c900eff0aaa06045ecf717e6c5a36a164d5/python-package/xgboost/data.py#L138

But including it here makes a lot of tests to fail...

I'm a little concerned that here we treat a symptom not the cause. By casting the input vector to float32 and them passing it as strings into estimators we don't exactly reproduce the actual environment, where casted values will be transformed back to doubles because of the score function signature. What do you think? Am I overthinking this?

I'm a little concerned that here we treat a symptom not the cause.

Yeah, you're absolutely right! To fix the root cause we should support multiple floating types in target languages. In this PR I just propose to make our tests a little bit fairer. Native libraries do double -> float conversion and in our tests we perform double -> float -> double. Unfortunately, I'm not a numerical expert but it seems that float -> double is a safe conversion: https://stackoverflow.com/questions/29648271/convert-float-double-float.

StrikerRUS · 2020-07-26T01:01:45Z

tests/utils.py

+        elif isinstance(estimator, BaseLibSVM):
+            self.X_test = self.X_test.astype(np.float64, copy=False)


https://github.com/scikit-learn/scikit-learn/blob/4a60ec129d3088d095b30cf54a670fd596ca4cc8/sklearn/svm/_base.py#L333

https://github.com/scikit-learn/scikit-learn/blob/4a60ec129d3088d095b30cf54a670fd596ca4cc8/sklearn/svm/_base.py#L464-L466

StrikerRUS · 2020-07-26T01:02:29Z

tests/utils.py

+        return np.float64(items[0])
    else:
-        return [float(i) for i in items]
+        return [np.float64(i) for i in items]


Let's use numpy types across all codebase for the consistency.

izeigerman · 2020-07-28T16:16:01Z

This is some amazing investigation (as always)! I'll take a look soon. Thank you 👍

izeigerman

Looks great overall but I don't understand why numbers in tests shifted so dramatically.

izeigerman · 2020-08-03T16:24:39Z

tests/assemblers/test_xgboost.py

            ast.IfExpr(
                ast.CompExpr(
-                    ast.FeatureRef(5),
-                    ast.NumVal(6.79699993),
+                    ast.FeatureRef(12),


Not sure I understand why the feature index changed here.

Oh, all tests have changed due to new train/test splitting routine which is now done by scikit-learn function (train_test_split). I found it easier to change random_state param only in one place compared to manual shuffleing in multiple places (see the diff in my opening comment).

Oh, stupid me. I overlooked that change. It's a bit harder to compare apples to apples because of this though. Do you by chance remember whether there were any changes to expected values before you updated the splitting logic? I'm trying to identify the impact of this update.

Sorry, updating splitting logic was the first step as I wanted to play around with random_state. Let's choose easier way. Let me split this PR into two.

Done in aeb4301.

Thank you, this is so much better!

izeigerman

Awesome, this looks great. Thanks a lot 👍

handle floating point values more accurate

8f272bc

StrikerRUS commented Jul 26, 2020

View reviewed changes

izeigerman requested changes Aug 3, 2020

View reviewed changes

StrikerRUS added 2 commits August 4, 2020 22:00

Merge branch 'master' into floats_improvement

87ee455

revert new train/test splitting routine

aeb4301

izeigerman approved these changes Aug 5, 2020

View reviewed changes

izeigerman merged commit 4b3fb61 into master Aug 5, 2020

izeigerman deleted the floats_improvement branch August 5, 2020 16:08

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

handle floating point values more accurate #277

handle floating point values more accurate #277

StrikerRUS commented Jul 26, 2020

coveralls commented Jul 26, 2020 •

edited

StrikerRUS Jul 26, 2020

StrikerRUS Jul 26, 2020

StrikerRUS Jul 26, 2020 •

edited

StrikerRUS Jul 26, 2020

StrikerRUS Jul 26, 2020 •

edited

StrikerRUS Jul 26, 2020

StrikerRUS Jul 26, 2020

izeigerman Aug 3, 2020

StrikerRUS Aug 4, 2020

StrikerRUS Jul 26, 2020

StrikerRUS Jul 26, 2020

izeigerman commented Jul 28, 2020

izeigerman left a comment

izeigerman Aug 3, 2020

StrikerRUS Aug 4, 2020 •

edited

izeigerman Aug 4, 2020

StrikerRUS Aug 4, 2020

StrikerRUS Aug 4, 2020

izeigerman Aug 5, 2020

izeigerman left a comment



		def format_float(value):
		return np.format_float_positional(value, unique=True, trim="0")

		y_pred_executed = np.array(
		y_pred_executed, dtype=y_pred_true.dtype, copy=False)

		if isinstance(estimator, (BaseDecisionTree, BaseForest)):
		self.X_test = self.X_test.astype(np.float32, copy=False)

		elif isinstance(estimator, BaseLibSVM):
		self.X_test = self.X_test.astype(np.float64, copy=False)

handle floating point values more accurate #277

handle floating point values more accurate #277

Conversation

StrikerRUS commented Jul 26, 2020

coveralls commented Jul 26, 2020 • edited

Choose a reason for hiding this comment

Choose a reason for hiding this comment

StrikerRUS Jul 26, 2020 • edited

Choose a reason for hiding this comment

Choose a reason for hiding this comment

StrikerRUS Jul 26, 2020 • edited

Choose a reason for hiding this comment

Choose a reason for hiding this comment

BaseDecisionTree

BaseForest

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

izeigerman commented Jul 28, 2020

izeigerman left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

StrikerRUS Aug 4, 2020 • edited

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

izeigerman left a comment

Choose a reason for hiding this comment

coveralls commented Jul 26, 2020 •

edited

StrikerRUS Jul 26, 2020 •

edited

StrikerRUS Jul 26, 2020 •

edited

`BaseDecisionTree`

`BaseForest`

StrikerRUS Aug 4, 2020 •

edited