Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Suppress model-generated warnings in integration tests #1115

Merged
merged 9 commits into from
May 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,9 @@ PartitionedLS = "19f41c5e-8610-11e9-2f2a-0d67e7c5027f"
SIRUS = "cdeec39e-fb35-4959-aadb-a1dd5dede958"
SelfOrganizingMaps = "ba4b7379-301a-4be0-bee6-171e4e152787"
StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
Suppressor = "fd094767-a336-5f1f-9728-57cf17d0bbfb"
SymbolicRegression = "8254be44-1295-4e6a-a16d-46603ac705cb"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["BetaML", "CatBoost", "EvoLinear", "EvoTrees", "Imbalance", "InteractiveUtils", "LightGBM", "MLJClusteringInterface", "MLJDecisionTreeInterface", "MLJFlux", "MLJGLMInterface", "MLJLIBSVMInterface", "MLJLinearModels", "MLJMultivariateStatsInterface", "MLJNaiveBayesInterface", "MLJScikitLearnInterface", "MLJTSVDInterface", "MLJTestInterface", "MLJTestIntegration", "MLJText", "MLJXGBoostInterface", "Markdown", "NearestNeighborModels", "OneRule", "OutlierDetectionNeighbors", "OutlierDetectionPython", "ParallelKMeans", "PartialLeastSquaresRegressor", "PartitionedLS", "SelfOrganizingMaps", "SIRUS", "SymbolicRegression", "StableRNGs", "Test"]
test = ["BetaML", "CatBoost", "EvoLinear", "EvoTrees", "Imbalance", "InteractiveUtils", "LightGBM", "MLJClusteringInterface", "MLJDecisionTreeInterface", "MLJFlux", "MLJGLMInterface", "MLJLIBSVMInterface", "MLJLinearModels", "MLJMultivariateStatsInterface", "MLJNaiveBayesInterface", "MLJScikitLearnInterface", "MLJTSVDInterface", "MLJTestInterface", "MLJTestIntegration", "MLJText", "MLJXGBoostInterface", "Markdown", "NearestNeighborModels", "OneRule", "OutlierDetectionNeighbors", "OutlierDetectionPython", "ParallelKMeans", "PartialLeastSquaresRegressor", "PartitionedLS", "SelfOrganizingMaps", "SIRUS", "SymbolicRegression", "StableRNGs", "Suppressor","Test"]
120 changes: 73 additions & 47 deletions test/integration.jl
Original file line number Diff line number Diff line change
@@ -1,10 +1,22 @@
using MLJTestIntegration, MLJModels, MLJ, Test, Markdown
import MLJTestIntegration as MTI
import Pkg.TOML as TOML
using Suppressor

const JULIA_TEST_LEVEL = 4
const OTHER_TEST_LEVEL = 3

# # IMPORTANT

# There are two main ways to flag a problem model for integration test purposes.

# - Adding to `FILTER_GIVEN_ISSUE` means the model is allowed to fail silently, unless
# tests pass, a fact that will be reported in the log.

# - Adding to `PATHOLOGIES` completely excludes the model from testing.

# Obviously the first method is strongly preferred.


# # RECORD OF OUTSTANDING ISSUES

Expand All @@ -15,10 +27,6 @@ FILTER_GIVEN_ISSUE = Dict(
model.package_name == "DecisionTree") ||
(model.name == "COFDetector" &&
model.package_name == "OutlierDetectionNeighbors"),
"https://github.com/JuliaAI/CatBoost.jl/pull/28 (waiting for 0.3.3 release)" =>
model -> model.name == "CatBoostRegressor",
"LOCIDetector too slow to train!" =>
model -> model.name == "LOCIDetector",
"https://github.com/JuliaML/LIBSVM.jl/issues/98" =>
model -> model.name == "LinearSVC" &&
model.package_name == "LIBSVM",
Expand All @@ -30,24 +38,27 @@ FILTER_GIVEN_ISSUE = Dict(
"https://github.com/sylvaticus/BetaML.jl/issues/65" =>
model -> model.name in ["KMeans", "KMedoids"] &&
model.package_name == "BetaML",
"https://github.com/JuliaAI/MLJTSVDInterface.jl/pull/17" =>
model -> model.name == "TSVDTransformer",
"https://github.com/JuliaAI/MLJ.jl/issues/1074" =>
model -> model.name == "AutoEncoderMLJ",
"https://github.com/sylvaticus/BetaML.jl/issues/64" =>
model -> model.name =="GaussianMixtureClusterer" && model.package_name=="BetaML",
"https://github.com/rikhuijzer/SIRUS.jl/issues/78" =>
model -> model.package_name == "SIRUS",
"https://github.com/lalvim/PartialLeastSquaresRegressor.jl/issues/29 "*
"(still need release > 2.2.0)" =>
model -> model.package_name == "PartialLeastSquaresRegressor",
"MLJScikitLearnInterface - multiple issues, hangs tests, WIP" =>
model -> model.package_name == "MLJScikitLearnInterface",
"MLJScikitLearnInterface - multiple issues, WIP" =>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we currently have issues with Scikitlearn?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually only the ones flagged under "PATHOLOGIES". But running the tests for all the sk models doubles the test time (something like 4 hours).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As noted above, I'll open an issue flagging these as disabled.

model -> model.package_name == "MLJScikitLearnInterface" &&
model.name in [
"MultiTaskElasticNetCVRegressor",
"MultiTaskElasticNetRegressor",
"MultiTaskLassoCVRegressor",
"MultiTaskLassoRegressor",
]
)


# # LOG OUTSTANDING ISSUES TO STDOUT

const MODELS= models();
const MODELS = models();
const JULIA_MODELS = filter(m->m.is_pure_julia, MODELS);
const OTHER_MODELS = setdiff(MODELS, JULIA_MODELS);

Expand Down Expand Up @@ -127,7 +138,7 @@ for model in WITHOUT_DATASETS
end

# Additionally exclude some models for which the inferred datasets have a model-specific
# pathololgy that prevents a valid test:
# pathology that prevents valid generic test.

PATHOLOGIES = filter(MODELS) do model
# in the subsampling occuring in stacking, we get a Cholesky
Expand All @@ -138,7 +149,11 @@ PATHOLOGIES = filter(MODELS) do model
# in tuned_pipe_evaluation C library gives "Incorrect parameter: specified nu is
# infeasible":
(model.name in ["NuSVC", "ProbabilisticNuSVC"] &&
model.package_name == "LIBSVM")
model.package_name == "LIBSVM") ||
# too slow to train!
(model.name == "LOCIDetector" && model.package_name == "OutlierDetectionPython") ||
# TO REDUCE TESTING TIME
model.package_name == "MLJScikitLearnInterface"
end

WITHOUT_DATASETS = vcat(WITHOUT_DATASETS, PATHOLOGIES)
Expand Down Expand Up @@ -178,43 +193,54 @@ MLJTestIntegration.test(MODELS, (nothing, ), level=1, throw=true, verbosity=0);

# # JULIA TESTS

options = (
level = JULIA_TEST_LEVEL,
verbosity = 0, # bump to 2 to debug
throw = false,
)
@testset "level 4 tests" begin
println()
for model in JULIA_MODELS

# exclusions:
model in WITHOUT_DATASETS && continue
model in EXCLUDED_BY_ISSUE && continue

print("\rTesting $(model.name) ($(model.package_name)) ")
@test isempty(MLJTestIntegration.test(model; mod=@__MODULE__, options...))
const INFO_TEST_NOW_PASSING =
"The model above now passes tests.\nConsider removing from "*
"`FILTER_GIVEN_ISSUE` in test/integration.jl."

problems = []

const nmodels = length(JULIA_MODELS) + length(OTHER_MODELS)
i = 0
println()
for (model_set, level) in [
(:JULIA_MODELS, JULIA_TEST_LEVEL),
(:OTHER_MODELS, OTHER_TEST_LEVEL),
]
set = eval(model_set)
options = (
; level,
verbosity = 0, # bump to 2 to debug
throw = false,
)
@testset "$model_set tests" begin
for model in set
global i += 1
progress = string("(", round(i/nmodels*100, digits=1), "%) Testing: ")

# exclusions:
model in WITHOUT_DATASETS && continue

notice = "$(model.name) ($(model.package_name))"
print("\r", progress, notice, " ")

okay = @suppress isempty(MLJTestIntegration.test(
model;
mod=@__MODULE__,
options...,
))
if model in EXCLUDED_BY_ISSUE
okay && (println(); @info INFO_TEST_NOW_PASSING)
else
okay || push!(problems, notice)
end
end
end
end

okay = isempty(problems)
okay || print("Integration tests failed for these models: \n $problems")
println()

# # NON-JULIA TESTS

options = (
level = OTHER_TEST_LEVEL,
verbosity = 0, # bump to 2 to debug
throw = false,
)
@testset "level 3 tests" begin
println()
for model in OTHER_MODELS

# exclusions:
model in WITHOUT_DATASETS && continue
model in EXCLUDED_BY_ISSUE && continue

print("\rTesting $(model.name) ($(model.package_name)) ")
@test isempty(MLJTestIntegration.test(model; mod=@__MODULE__, options...))
end
end
@test okay

true
Loading