JuliaAI · ablaom · May 19, 2024 · May 9, 2024 · May 12, 2024 · May 13, 2024
diff --git a/Project.toml b/Project.toml
@@ -84,8 +84,9 @@ PartitionedLS = "19f41c5e-8610-11e9-2f2a-0d67e7c5027f"
 SIRUS = "cdeec39e-fb35-4959-aadb-a1dd5dede958"
 SelfOrganizingMaps = "ba4b7379-301a-4be0-bee6-171e4e152787"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
+Suppressor = "fd094767-a336-5f1f-9728-57cf17d0bbfb"
 SymbolicRegression = "8254be44-1295-4e6a-a16d-46603ac705cb"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["BetaML", "CatBoost", "EvoLinear", "EvoTrees", "Imbalance", "InteractiveUtils", "LightGBM", "MLJClusteringInterface", "MLJDecisionTreeInterface", "MLJFlux", "MLJGLMInterface", "MLJLIBSVMInterface", "MLJLinearModels", "MLJMultivariateStatsInterface", "MLJNaiveBayesInterface", "MLJScikitLearnInterface", "MLJTSVDInterface", "MLJTestInterface", "MLJTestIntegration", "MLJText", "MLJXGBoostInterface", "Markdown", "NearestNeighborModels", "OneRule", "OutlierDetectionNeighbors", "OutlierDetectionPython", "ParallelKMeans", "PartialLeastSquaresRegressor", "PartitionedLS", "SelfOrganizingMaps", "SIRUS", "SymbolicRegression", "StableRNGs", "Test"]
+test = ["BetaML", "CatBoost", "EvoLinear", "EvoTrees", "Imbalance", "InteractiveUtils", "LightGBM", "MLJClusteringInterface", "MLJDecisionTreeInterface", "MLJFlux", "MLJGLMInterface", "MLJLIBSVMInterface", "MLJLinearModels", "MLJMultivariateStatsInterface", "MLJNaiveBayesInterface", "MLJScikitLearnInterface", "MLJTSVDInterface", "MLJTestInterface", "MLJTestIntegration", "MLJText", "MLJXGBoostInterface", "Markdown", "NearestNeighborModels", "OneRule", "OutlierDetectionNeighbors", "OutlierDetectionPython", "ParallelKMeans", "PartialLeastSquaresRegressor", "PartitionedLS", "SelfOrganizingMaps", "SIRUS", "SymbolicRegression", "StableRNGs", "Suppressor","Test"]
diff --git a/test/integration.jl b/test/integration.jl
@@ -1,10 +1,22 @@
 using MLJTestIntegration, MLJModels, MLJ, Test, Markdown
 import MLJTestIntegration as MTI
 import Pkg.TOML as TOML
+using Suppressor
 
 const JULIA_TEST_LEVEL = 4
 const OTHER_TEST_LEVEL = 3
 
+# # IMPORTANT
+
+# There are two main ways to flag a problem model for integration test purposes.
+
+# - Adding to `FILTER_GIVEN_ISSUE` means the model is allowed to fail silently, unless
+#  tests pass, a fact that will be reported in the log.
+
+# - Adding to `PATHOLOGIES` completely excludes the model from testing.
+
+# Obviously the first method is strongly preferred.
+
 
 # # RECORD OF OUTSTANDING ISSUES
 
@@ -15,10 +27,6 @@ FILTER_GIVEN_ISSUE = Dict(
         model.package_name == "DecisionTree") ||
         (model.name == "COFDetector" &&
         model.package_name == "OutlierDetectionNeighbors"),
-    "https://github.com/JuliaAI/CatBoost.jl/pull/28 (waiting for 0.3.3 release)" =>
-        model -> model.name == "CatBoostRegressor",
-    "LOCIDetector too slow to train!" =>
-        model -> model.name == "LOCIDetector",
     "https://github.com/JuliaML/LIBSVM.jl/issues/98" =>
         model -> model.name == "LinearSVC" &&
         model.package_name == "LIBSVM",
@@ -30,24 +38,27 @@ FILTER_GIVEN_ISSUE = Dict(
     "https://github.com/sylvaticus/BetaML.jl/issues/65" =>
         model -> model.name in ["KMeans", "KMedoids"] &&
         model.package_name == "BetaML",
-    "https://github.com/JuliaAI/MLJTSVDInterface.jl/pull/17" =>
-        model -> model.name == "TSVDTransformer",
     "https://github.com/JuliaAI/MLJ.jl/issues/1074" =>
         model -> model.name == "AutoEncoderMLJ",
-    "https://github.com/sylvaticus/BetaML.jl/issues/64" =>
-        model -> model.name =="GaussianMixtureClusterer" && model.package_name=="BetaML",
      "https://github.com/rikhuijzer/SIRUS.jl/issues/78" =>
         model -> model.package_name == "SIRUS",
     "https://github.com/lalvim/PartialLeastSquaresRegressor.jl/issues/29 "*
         "(still need release > 2.2.0)" =>
         model -> model.package_name == "PartialLeastSquaresRegressor",
-    "MLJScikitLearnInterface - multiple issues, hangs tests, WIP" =>
-        model -> model.package_name == "MLJScikitLearnInterface",
+    "MLJScikitLearnInterface - multiple issues, WIP" =>
+        model -> model.package_name == "MLJScikitLearnInterface" &&
+        model.name in [
+            "MultiTaskElasticNetCVRegressor",
+            "MultiTaskElasticNetRegressor",
+            "MultiTaskLassoCVRegressor",
+            "MultiTaskLassoRegressor",
+        ]
 )
 
+
 # # LOG OUTSTANDING ISSUES TO STDOUT
 
-const MODELS= models();
+const MODELS = models();
 const JULIA_MODELS = filter(m->m.is_pure_julia, MODELS);
 const OTHER_MODELS = setdiff(MODELS, JULIA_MODELS);
 
@@ -127,7 +138,7 @@ for model in WITHOUT_DATASETS
 end
 
 # Additionally exclude some models for which the inferred datasets have a model-specific
-# pathololgy that prevents a valid test:
+# pathology that prevents valid generic test.
 
 PATHOLOGIES = filter(MODELS) do model
     # in the subsampling occuring in stacking, we get a Cholesky
@@ -138,7 +149,11 @@ PATHOLOGIES = filter(MODELS) do model
         # in tuned_pipe_evaluation C library gives "Incorrect parameter: specified nu is
         # infeasible":
         (model.name in ["NuSVC", "ProbabilisticNuSVC"] &&
-        model.package_name == "LIBSVM")
+        model.package_name == "LIBSVM") ||
+        # too slow to train!
+        (model.name == "LOCIDetector" && model.package_name == "OutlierDetectionPython") ||
+        # TO REDUCE TESTING TIME
+        model.package_name == "MLJScikitLearnInterface"
 end
 
 WITHOUT_DATASETS = vcat(WITHOUT_DATASETS, PATHOLOGIES)
@@ -178,43 +193,54 @@ MLJTestIntegration.test(MODELS, (nothing, ), level=1, throw=true, verbosity=0);
 
 # # JULIA TESTS
 
-options = (
-    level = JULIA_TEST_LEVEL,
-    verbosity = 0, # bump to 2 to debug
-    throw = false,
-)
-@testset "level 4 tests" begin
-    println()
-    for model in JULIA_MODELS
-
-        # exclusions:
-        model in WITHOUT_DATASETS && continue
-        model in EXCLUDED_BY_ISSUE && continue
-
-        print("\rTesting $(model.name) ($(model.package_name))                       ")
-        @test isempty(MLJTestIntegration.test(model; mod=@__MODULE__, options...))
+const INFO_TEST_NOW_PASSING =
+    "The model above now passes tests.\nConsider removing from "*
+    "`FILTER_GIVEN_ISSUE` in test/integration.jl."
+
+problems = []
+
+const nmodels = length(JULIA_MODELS) + length(OTHER_MODELS)
+i = 0
+println()
+for (model_set, level) in [
+    (:JULIA_MODELS, JULIA_TEST_LEVEL),
+    (:OTHER_MODELS, OTHER_TEST_LEVEL),
+    ]
+    set = eval(model_set)
+    options = (
+        ; level,
+        verbosity = 0, # bump to 2 to debug
+        throw = false,
+    )
+    @testset "$model_set tests" begin
+        for model in set
+            global i += 1
+            progress = string("(", round(i/nmodels*100, digits=1), "%) Testing: ")
+
+            # exclusions:
+            model in WITHOUT_DATASETS && continue
+
+            notice = "$(model.name) ($(model.package_name))"
+            print("\r", progress, notice, "                       ")
+
+            okay = @suppress isempty(MLJTestIntegration.test(
+                model;
+                mod=@__MODULE__,
+                options...,
+            ))
+            if model in EXCLUDED_BY_ISSUE
+                okay && (println(); @info INFO_TEST_NOW_PASSING)
+            else
+                okay || push!(problems, notice)
+            end
+        end
     end
 end
 
+okay = isempty(problems)
+okay || print("Integration tests failed for these models: \n $problems")
+println()
 
-# # NON-JULIA TESTS
-
-options = (
-    level = OTHER_TEST_LEVEL,
-    verbosity = 0, # bump to 2 to debug
-    throw = false,
-)
-@testset "level 3 tests" begin
-    println()
-    for model in OTHER_MODELS
-
-        # exclusions:
-        model in WITHOUT_DATASETS && continue
-        model in EXCLUDED_BY_ISSUE && continue
-
-        print("\rTesting $(model.name) ($(model.package_name))                       ")
-        @test isempty(MLJTestIntegration.test(model; mod=@__MODULE__, options...))
-    end
-end
+@test okay
 
 true