Skip to content

Commit

Permalink
Merge pull request #135 from giordano/test-logs
Browse files Browse the repository at this point in the history
Improve test coverage of tasks.jl
  • Loading branch information
ablaom committed May 21, 2019
2 parents 52d0443 + 936b183 commit d636720
Show file tree
Hide file tree
Showing 8 changed files with 121 additions and 57 deletions.
14 changes: 8 additions & 6 deletions src/tasks.jl
Expand Up @@ -68,11 +68,12 @@ See also scitype, scitype_union, scitypes
"""
coerce(T::Type{Continuous}, y::AbstractVector{<:Number}) = float(y)
function coerce(T::Type{Continuous}, y::AbstractVector{Union{<:Number,Missing}})
function coerce(T::Type{Continuous}, y::V) where {N<:Number,
V<:AbstractVector{Union{N,Missing}}}
_coerce_missing_warn(T)
return float(y)
end
function coerce(T::Type{Continuous}, y::AbstractVector)
function coerce(T::Type{Continuous}, y::AbstractVector{S}) where S
for el in y
if ismissing(el)
_coerce_missing_warn(T)
Expand All @@ -87,11 +88,12 @@ _int(::Missing) = missing
_int(x) = Int(x)

coerce(T::Type{Count}, y::AbstractVector{<:Integer}) = y
function coerce(T::Type{Count}, y::AbstractVector{Union{<:Real,Missing}})
function coerce(T::Type{Count}, y::V) where {R<:Real,
V<:AbstractVector{Union{R,Missing}}}
_coerce_missing_warn(T)
return convert(Vector{Missing,Int}, y)
return convert(Vector{Union{Missing,Int}}, y)
end
function coerce(T::Type{Count}, y::AbstractVector)
function coerce(T::Type{Count}, y::V) where {S,V<:AbstractVector{S}}
for el in y
if ismissing(el)
_coerce_missing_warn(T)
Expand Down Expand Up @@ -217,7 +219,7 @@ ignored.
Return the input data in form to be used in models.
See also scitype, scitype_union, scitypes
See also [`scitype`](@ref), [`scitype_union`](@ref), [`scitypes`](@ref).
"""
unsupervised(; data=nothing, types=Dict(), kwargs...) =
Expand Down
5 changes: 3 additions & 2 deletions test/Constant.jl
Expand Up @@ -14,7 +14,8 @@ y = [1.0, 1.0, 2.0, 2.0]

model = ConstantRegressor(distribution_type=
Distributions.Normal{Float64})
fitresult, cache, report = MLJ.fit(model, 1, X, y)
fitresult, cache, report =
@test_logs (:info, r"Fitted a constant probability distribution") MLJ.fit(model, 1, X, y)

d=Distributions.Normal(1.5, 0.5)
@test fitresult == d
Expand All @@ -32,7 +33,7 @@ yraw = ["Perry", "Antonia", "Perry", "Skater"]
y = categorical(yraw)

model = ConstantClassifier()
fitresult, cache, report = MLJ.fit(model, 1, X, y)
fitresult, cache, report = @test_logs (:info, r"probabilities") MLJ.fit(model, 1, X, y)
d = MLJ.UnivariateFinite([y[1], y[2], y[4]], [0.5, 0.25, 0.25])
@test fitresult == d

Expand Down
2 changes: 1 addition & 1 deletion test/KNN.jl
Expand Up @@ -30,7 +30,7 @@ info(knn)

X, y = X_and_y(load_boston())
knnM = machine(knn, X, y)
fit!(knnM)
@test_logs (:info, r"Training") fit!(knnM)
predict(knnM, MLJ.selectrows(X, 1:10))

end
Expand Down
12 changes: 9 additions & 3 deletions test/Transformers.jl
Expand Up @@ -108,7 +108,10 @@ X = DataFrame(name=identity.(categorical(["Ben", "John", "Mary", "John"], ordere

t = OneHotEncoder()
info(t)
fitresult, cache, nothing = MLJBase.fit(t, 1, X)
fitresult, cache, _ =
@test_logs((:info, r"Spawned 4 sub-features"),
(:info, r"Spawned 4 sub-features"),
MLJBase.fit(t, 1, X))
Xt = transform(t, fitresult, X)
@test Xt.name__John == float.([false, true, false, true])
@test Xt.height == X.height
Expand All @@ -119,7 +122,10 @@ Xt = transform(t, fitresult, X)
:favourite_number__7, :favourite_number__10, :age)

# test that *entire* pool of categoricals is used in fit, including unseen levels:
fitresult_small, cache, nothing = MLJBase.fit(t, 1, MLJBase.selectrows(X,1:2))
fitresult_small, cache, _ =
@test_logs((:info, r"Spawned 2 sub-features"),
(:info, r"Spawned 2 sub-features"),
MLJBase.fit(t, 1, MLJBase.selectrows(X,1:2)))
Xtsmall = transform(t, fitresult_small, X)
@test Xt == Xtsmall

Expand All @@ -130,7 +136,7 @@ Xtsmall = transform(t, fitresult_small, X)

# test exclusion of ordered factors:
t = OneHotEncoder(ordered_factor=false)
fitresult, cache, nothing = MLJBase.fit(t, 1, X)
fitresult, cache, _ = MLJBase.fit(t, 1, X)
Xt = transform(t, fitresult, X)
@test :name in MLJ.schema(Xt).names
@test :favourite_number__5 in MLJ.schema(Xt).names
Expand Down
8 changes: 4 additions & 4 deletions test/loading.jl
Expand Up @@ -11,10 +11,10 @@ pkgs = keys(MLJ.metadata())
@test "DecisionTreeClassifier" in models()["DecisionTree"]
@test "ConstantClassifier" in models()["MLJ"]

## if you put these back, need to add DecisionTree and MLJModels to
## [extras] and [targets]:
# @load DecisionTreeClassifier
# @test @isdefined DecisionTreeClassifier
@load DecisionTreeClassifier
@test @isdefined DecisionTreeClassifier
@load DecisionTreeRegressor pkg=DecisionTree
@test @isdefined DecisionTreeRegressor

end # module
true
8 changes: 4 additions & 4 deletions test/machines.jl
Expand Up @@ -11,22 +11,22 @@ X, y = task();
train, test = partition(eachindex(y), 0.7);

t = Machine(KNNRegressor(K=4), X, y)
fit!(t, rows=train)
fit!(t)
@test_logs (:info, r"Training") fit!(t, rows=train)
@test_logs (:info, r"Training") fit!(t)

predict(t, X[test,:])
@test rms(predict(t, X[test,:]), y[test]) < std(y)

mach = machine(ConstantRegressor(), task)
fit!(mach)
@test_logs (:info, r"Training") (:info, r"Fitted") fit!(mach)
yhat = predict_mean(mach, X)

n = nrows(X)
rms(yhat, y) std(y)*sqrt(1 - 1/n)

# test an unsupervised univariate case:
mach = machine(UnivariateStandardizer(), float.(1:5))
fit!(mach)
@test_logs (:info, r"Training") fit!(mach)


end # module
Expand Down
42 changes: 35 additions & 7 deletions test/networks.jl
Expand Up @@ -26,10 +26,10 @@ Xs = source(Xtrain)
ys = source(ytrain)

knn1 = machine(knn_, Xs, ys)
fit!(knn1, verbosity=3)
@test_logs (:info, r"Training") fit!(knn1, verbosity=3)
knn_.K = 5
fit!(knn1, rows=train[1:end-10], verbosity=2)
fit!(knn1, verbosity=2)
@test_logs (:info, r"Training") fit!(knn1, rows=train[1:end-10], verbosity=2)
@test_logs (:info, r"Training") fit!(knn1, verbosity=2)
yhat = predict(knn1, Xs)
yhat(X_frame[test,:])
rms(yhat(X_frame[test,:]), y[test])
Expand Down Expand Up @@ -77,11 +77,39 @@ zhat = predict(knn, Xa)
yhat = inverse_transform(uscale, zhat)

# fit-through training:
fit!(yhat, rows=1:50, verbosity=2)
fit!(yhat, rows=:, verbosity=2) # will retrain
fit!(yhat, verbosity=2) # will not retrain; nothing changed
@test_logs((:info, r"Training"),
(:info, r"Training"),
(:info, r"Features standarized: "),
(:info, r" *:Crim"),
(:info, r" *:Zn"),
(:info, r" *:Indus"),
(:info, r" *:NOx"),
(:info, r" *:Rm"),
(:info, r" *:Age"),
(:info, r" *:Dis"),
(:info, r" *:Rad"),
(:info, r" *:Tax"),
(:info, r" *:PTRatio"),
(:info, r" *:Black"),
(:info, r" *:LStat"),
(:info, r"Training"),
fit!(yhat, rows=1:50, verbosity=2))
@test_logs(# will retrain
(:info, r"Not retraining"),
(:info, r"Not retraining"),
(:info, r"Not retraining"),
fit!(yhat, rows=:, verbosity=2))
@test_logs(# will not retrain; nothing changed
(:info, r"Not retraining"),
(:info, r"Not retraining"),
(:info, r"Not retraining"),
fit!(yhat, verbosity=2))
knn_.K =4
fit!(yhat, verbosity=2) # will retrain; new hyperparameter
@test_logs(# will retrain; new hyperparameter
(:info, r"Not retraining"),
(:info, r"Not retraining"),
(:info, r"Training"),
fit!(yhat, verbosity=2))
@test !MLJ.is_stale(XX) # sources always fresh

rms(yhat(X_frame[test,:]), y[test])
Expand Down
87 changes: 57 additions & 30 deletions test/tasks.jl
Expand Up @@ -8,30 +8,48 @@ using CategoricalArrays

# shuffle!(::SupervisedTask):
X=(x=10:10:44, y=1:4, z=collect("abcd"))
task = SupervisedTask(data=X, target=:y, is_probabilistic=true)
task0=deepcopy(task)
rng = MersenneTwister(1234)
shuffle!(rng, task0)
@test task.X != task0.X
@test task.y != task0.y
@test MLJ.selectrows(task.X, task0.y) == task0.X
task1=deepcopy(task)
Random.seed!(1234)
rng = MersenneTwister(1234)
task1=shuffle(task)
shuffle!(rng, task1)
@test task.X != task1.X
@test task.y != task1.y
@test MLJ.selectrows(task.X, task1.y) == task1.X
task = @test_logs((:warn, r"An Unknown"), (:info, r"is_probabilistic = true"),
SupervisedTask(data=X, target=:y, is_probabilistic=true))

@testset "Shuffling" begin
task0=deepcopy(task)
rng = MersenneTwister(1234)
shuffle!(rng, task0)
@test task.X != task0.X
@test task.y != task0.y
@test MLJ.selectrows(task.X, task0.y) == task0.X

# task indexing:
task2 = task[:]
@test count(fieldnames(typeof(task))) do fld
getproperty(task2, fld) != getproperty(task, fld)
end == 0
@test task[2:3].X.z == ['b', 'c']
@test task[2:3].y == [2, 3]
Random.seed!(1234)
rng = MersenneTwister(1234)
task1=shuffle(task)
shuffle!(rng, task1)
@test task.X != task1.X
@test task.y != task1.y
@test MLJ.selectrows(task.X, task1.y) == task1.X

Random.seed!(1234)
task0_a = deepcopy(task)
shuffle!(task0_a)
@test task0.X == task0_a.X
@test task0.y == task0_a.y
@test MLJ.selectrows(task.X, task0_a.y) == task0_a.X

Random.seed!(1234)
rng = MersenneTwister(1234)
task1_a = shuffle(rng, shuffle(task))
@test task1.X == task1_a.X
@test task1.y == task1_a.y
@test MLJ.selectrows(task.X, task1_a.y) == task1_a.X
end

@testset "Indexing" begin
task2 = task[:]
@test count(fieldnames(typeof(task))) do fld
getproperty(task2, fld) != getproperty(task, fld)
end == 0
@test task[2:3].X.z == ['b', 'c']
@test task[2:3].y == [2, 3]
end

@testset "Type coercion" begin
types = Dict(:x => Continuous, :z => Multiclass)
Expand Down Expand Up @@ -68,6 +86,12 @@ end == 0
X_coerced = coerce(Dict(), task.X)
@test X_coerced.x === task.X.x
@test X_coerced.z === task.X.z
z = categorical(task.X.z)
@test coerce(Multiclass, z) === z
z = categorical(task.X.z, true, ordered = false)
@test coerce(Multiclass, z) === z
z = categorical(task.X.z, true, ordered = true)
@test coerce(OrderedFactor, z) === z
# missing values
y_coerced = @test_logs((:warn, r"Missing values encountered"),
coerce(Continuous, [4, 7, missing]))
Expand Down Expand Up @@ -100,14 +124,17 @@ end == 0

end

# task constructors:
df = (x=10:10:44, y=1:4, z=collect("abcd"), w=[1.0, 3.0, missing])
types = Dict(:x => Continuous, :z => Multiclass, :w => Count)
task = @test_logs((:warn, r"Missing values encountered"), (:info, r"\n"),
supervised(data=df, types=types, target=:y, ignore=:y, is_probabilistic=false))
@test scitype_union(task.X.x) <: Continuous
@test scitype_union(task.X.w) === Union{Count, Missing}
@test scitype_union(task.y) <: Count
@testset "Constructors" begin
df = (x=10:10:44, y=1:4, z=collect("abcd"), w=[1.0, 3.0, missing])
types = Dict(:x => Continuous, :z => Multiclass, :w => Count)
task = @test_logs((:warn, r"Missing values encountered"), (:info, r"\n"),
supervised(data=df, types=types, target=:y, ignore=:y, is_probabilistic=false))
@test scitype_union(task.X.x) <: Continuous
@test scitype_union(task.X.w) === Union{Count, Missing}
@test scitype_union(task.y) <: Count
@test_logs((:info, r"\nis_probabilistic = true"),
supervised(task.X, task.y, is_probabilistic=true))
end

end # module
true

0 comments on commit d636720

Please sign in to comment.