deselected_tests.yaml

# This file lists node ids (in pytest sense) of sklearn tests that
# are to be deselected during test discovery step.
#
# Deselection can be predicated on the version of scikit-learn used.
# Use - node_id cond, or - node_id cond1,cond2  where cond is OPver.
# Supported OPs are >=, <=, ==, !=, >, <
# For example,
#    - tests/test_isotonic.py::test_permutation_invariance >0.18,<=0.19
#  will exclude deselection in versions 0.18.1, and 0.18.2 only

deselected_tests:

  # Deselecting 5 SVC related tests where duplicate samples end up being support vectors
  # See: https://github.com/scikit-learn/scikit-learn/issues/12738
  - svm/tests/test_svm.py::test_svc_clone_with_callable_kernel
  - svm/tests/test_svm.py::test_precomputed
  - svm/tests/test_sparse.py::test_sparse_realdata
  - svm/tests/test_sparse.py::test_svc_iris

  # Passed from DAAL2020.4. Need to delete after release.
  - ensemble/tests/test_bagging.py::test_sparse_classification
  - svm/tests/test_sparse.py::test_unsorted_indices

  # Bitwise comparison of probabilities using a print.
  - metrics/tests/test_classification.py

  # Max absolute difference: 0.04 for rocauc, and 0.01 for precision_recallq
  - metrics/tests/test_ranking.py::test_roc_curve_hard
  - metrics/tests/test_ranking.py::test_precision_recall_curve

  - model_selection/tests/test_search.py::test_search_cv_results_rank_tie_breaking

  # test_k_means_fit_predict is known to sporadically fail for float32 inputs in multithreaded runs
  # See: https://github.com/IntelPython/daal4py/issues/25
  - cluster/tests/test_k_means.py::test_k_means_fit_predict

  # test_non_uniform_strategies fails due to differences in handling of vacuous clusters after update
  # See https://github.com/IntelPython/daal4py/issues/69
  - preprocessing/tests/test_discretization.py::test_nonuniform_strategies >=0.20.3
  - cluster/tests/test_k_means.py::test_relocated_clusters
  - cluster/tests/test_k_means.py::test_kmeans_relocated_clusters

  # DAAL does not check convergence for tol == 0.0 for ease of benchmarking
  - cluster/tests/test_k_means.py::test_kmeans_convergence
  - cluster/tests/test_k_means.py::test_kmeans_verbose

  # For Newton-CG solver, solution computed in float32 disagrees with that of float64 a little more than
  # the test expects, see https://github.com/scikit-learn/scikit-learn/pull/13645
  - linear_model/tests/test_logistic.py::test_dtype_match

  # _hist_gradient_boosting tests (added in 0.21.0) times out in CircleCI
  - ensemble/_hist_gradient_boosting/tests >=0.21.0,<0.21.1

  # https://github.com/scikit-learn/scikit-learn/pull/15857
  - ensemble/tests/test_stacking.py::test_stacking_cv_influence >=0.22

  # This fails on certain platforms. Weighted data do not go through DAAL,
  # unweighted do. Since convergence is not accomplished (comment in te test
  # suggests that), coefficients are slightly different, resulting in 1 prediction
  # disagreement.
  - ensemble/tests/test_stacking.py::test_stacking_with_sample_weight >=0.22.1

  # test is unwarranted, but upstream refuses to change it
  # https://github.com/scikit-learn/scikit-learn/pull/15856
  - svm/tests/test_svm.py::test_svm_equivalence_sample_weight_C >=0.22

  #
  - tests/test_pipeline.py::test_pipeline_memory

  # docstrings do not document daal_model_ attribute set by fit for
  # LinearRegression, Ridge and SVC
  - tests/test_docstring_parameters.py::test_fit_docstring_attributes

  # Insufficient accuracy of "coefs" and "intercept" in Elastic Net for multi-target problem
  # https://github.com/oneapi-src/oneDAL/issues/494
  - linear_model/tests/test_coordinate_descent.py::test_enet_multitarget >=0.21

  # Insufficient accuracy of objective function in Elastic Net in case warm_start
  # https://github.com/oneapi-src/oneDAL/issues/495
  - linear_model/tests/test_coordinate_descent.py::test_warm_start_convergence_with_regularizer_decrement >=0.21

  # DAAL doesn't support sample_weight (back to Sklearn), insufficient accuracy (similar to previous cases)
  - linear_model/tests/test_coordinate_descent.py::test_enet_sample_weight_consistency >=0.23

  # We have difference (Max absolute difference: 1.97215226e-31) in computing of log_proba with np.log(y_proba)
  # Looks like using IDP NumPy is the cause for the failure due to use of more aggressive compiler optimization when compile NumPy UFunc loops
  - neural_network/tests/test_mlp.py::test_predict_proba_multilabel

  # On small datasets, the regression coefficients for multi-target problem differ from scikit-learn. Coefficients matches for first label only.
  # For big data the coefficients are close.
  # See: https://github.com/IntelPython/daal4py/issues/275
  - linear_model/tests/test_ridge.py::test_ridge_cv_individual_penalties

  # Different number of iterations because of instability of kmeans
  # https://github.com/IntelPython/daal4py/issues/277
  - cluster/tests/test_k_means.py::test_kmeans_elkan_results