From 3a401b9adb9b04c8645cb5e893ddd5f5702cece5 Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Wed, 4 Oct 2017 10:08:57 +0200 Subject: [PATCH] Add specialized == and isequal() methods These are equivalent to the AbstractArray{>:Null} fallback provided by Nulls, but more efficient when comparing two CategoricalArrays. --- src/array.jl | 34 +++++++++++++++++++++--- test/13_arraycommon.jl | 60 +++++++++++++++++++++++++++++++++--------- 2 files changed, 77 insertions(+), 17 deletions(-) diff --git a/src/array.jl b/src/array.jl index d38bdc67..5b1cfc8c 100644 --- a/src/array.jl +++ b/src/array.jl @@ -304,22 +304,48 @@ convert(::Type{CategoricalArray{T, N}}, A::CategoricalArray{T, N}) where {T, N} convert(::Type{CategoricalArray{T}}, A::CategoricalArray{T}) where {T} = A convert(::Type{CategoricalArray}, A::CategoricalArray) = A -function Base.:(==)(A::CategoricalArray, B::CategoricalArray) +function Base.:(==)(A::CategoricalArray{S}, B::CategoricalArray{T}) where {S, T} if size(A) != size(B) return false end + anynull = false if A.pool === B.pool - for (a, b) in zip(A.refs, B.refs) - if a != b + @inbounds for (a, b) in zip(A.refs, B.refs) + if a == 0 || b == 0 + (S >: Null || T >: Null) && (anynull = true) + elseif a != b return false end end else - for (a, b) in zip(A, B) + @inbounds for (a, b) in zip(A, B) + eq = (a == b) + if eq === false + return false + elseif S >: Null || T >: Null + anynull |= isnull(eq) + end + end + end + return anynull ? null : true +end + +function Base.isequal(A::CategoricalArray, B::CategoricalArray) + if size(A) != size(B) + return false + end + if A.pool === B.pool + @inbounds for (a, b) in zip(A.refs, B.refs) if a != b return false end end + else + @inbounds for (a, b) in zip(A, B) + if !isequal(a, b) + return false + end + end end return true end diff --git a/test/13_arraycommon.jl b/test/13_arraycommon.jl index 68273133..4c9e268b 100644 --- a/test/13_arraycommon.jl +++ b/test/13_arraycommon.jl @@ -455,21 +455,55 @@ for T in (Int, Union{Int, Null}) end # Test == -ca1 = CategoricalArray([1, 2, 3]) -ca2 = CategoricalArray{Union{Int, Null}}([1, 2, 3]) -ca3 = CategoricalArray([1, 2, null]) -ca4 = CategoricalArray([4, 3, 2]) -ca5 = CategoricalArray([1 2; 3 4]) - -@test ca1 == copy(ca1) -@test ca2 == copy(ca2) -@test ca3 ≅ copy(ca3) -@test ca4 == copy(ca4) -@test ca5 == copy(ca5) -@test ca1 == ca2 -@test ca1 ≇ ca3 +a1 = [1, 2, 3] +a2 = Union{Int, Null}[1, 2, 3] +a3 = [1, 2, null] +a4 = [4, 3, 2] +a5 = [1 2; 3 4] +ca1 = CategoricalArray(a1) +ca2 = CategoricalArray{Union{Int, Null}}(a2) +ca2b = CategoricalArray{Union{Int, Null}, 1, UInt32}(ca2.refs, ca2.pool) +ca3 = CategoricalArray(a3) +ca3b = CategoricalArray{Union{Int, Null}, 1, UInt32}(ca3.refs, ca2.pool) +ca4 = CategoricalArray(a4) +ca5 = CategoricalArray(a5) + +@test ca1 == copy(ca1) == a1 +@test ca2 == copy(ca2) == a2 +@test isnull(ca3 == copy(ca3)) && isnull(ca3 == ca3b) && isnull(ca3 == a3) +@test ca4 == copy(ca4) == a4 +@test ca5 == copy(ca5) == a5 +@test ca1 == ca2 == a2 +@test isnull(ca1 != ca3) && isnull(ca1 != a3) @test ca1 != ca4 +@test ca1 != a4 +@test a1 != ca4 @test ca1 != ca5 +@test ca1 != a5 +@test a1 != ca5 +@test ca3 != ca4 +@test ca3 != a4 +@test a3 != ca4 +@test isnull(ca2b != ca3b) + +# Test isequal +@test ca1 ≅ copy(ca1) ≅ a1 +@test ca2 ≅ copy(ca2) ≅ a2 +@test ca3 ≅ copy(ca3) ≅ ca3b ≅ a3 +@test ca4 ≅ copy(ca4) ≅ a4 +@test ca5 ≅ copy(ca5) ≅ a5 +@test ca1 ≅ ca2 ≅ a2 +@test ca1 ≇ ca3 && ca1 ≇ a3 +@test ca1 ≇ ca4 +@test ca1 ≇ a4 +@test a1 ≇ ca4 +@test ca1 ≇ ca5 +@test ca1 ≇ a5 +@test a1 ≇ ca5 +@test ca3 ≇ ca4 +@test ca3 ≇ a4 +@test a3 ≇ ca4 +@test ca2b ≇ ca3b # Test summary() @test summary(CategoricalArray([1, 2, 3])) ==