Skip to content

Commit

Permalink
Add specialized == and isequal() methods
Browse files Browse the repository at this point in the history
These are equivalent to the AbstractArray{>:Null} fallback provided by Nulls,
but more efficient when comparing two CategoricalArrays.
  • Loading branch information
nalimilan committed Oct 5, 2017
1 parent d67da23 commit 3a401b9
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 17 deletions.
34 changes: 30 additions & 4 deletions src/array.jl
Expand Up @@ -304,22 +304,48 @@ convert(::Type{CategoricalArray{T, N}}, A::CategoricalArray{T, N}) where {T, N}
convert(::Type{CategoricalArray{T}}, A::CategoricalArray{T}) where {T} = A
convert(::Type{CategoricalArray}, A::CategoricalArray) = A

function Base.:(==)(A::CategoricalArray, B::CategoricalArray)
function Base.:(==)(A::CategoricalArray{S}, B::CategoricalArray{T}) where {S, T}
if size(A) != size(B)
return false
end
anynull = false
if A.pool === B.pool
for (a, b) in zip(A.refs, B.refs)
if a != b
@inbounds for (a, b) in zip(A.refs, B.refs)
if a == 0 || b == 0
(S >: Null || T >: Null) && (anynull = true)
elseif a != b
return false
end
end
else
for (a, b) in zip(A, B)
@inbounds for (a, b) in zip(A, B)
eq = (a == b)
if eq === false
return false
elseif S >: Null || T >: Null
anynull |= isnull(eq)
end
end
end
return anynull ? null : true
end

function Base.isequal(A::CategoricalArray, B::CategoricalArray)
if size(A) != size(B)
return false
end
if A.pool === B.pool
@inbounds for (a, b) in zip(A.refs, B.refs)
if a != b
return false
end
end
else
@inbounds for (a, b) in zip(A, B)
if !isequal(a, b)
return false
end
end
end
return true
end
Expand Down
60 changes: 47 additions & 13 deletions test/13_arraycommon.jl
Expand Up @@ -455,21 +455,55 @@ for T in (Int, Union{Int, Null})
end

# Test ==
ca1 = CategoricalArray([1, 2, 3])
ca2 = CategoricalArray{Union{Int, Null}}([1, 2, 3])
ca3 = CategoricalArray([1, 2, null])
ca4 = CategoricalArray([4, 3, 2])
ca5 = CategoricalArray([1 2; 3 4])

@test ca1 == copy(ca1)
@test ca2 == copy(ca2)
@test ca3 copy(ca3)
@test ca4 == copy(ca4)
@test ca5 == copy(ca5)
@test ca1 == ca2
@test ca1 ca3
a1 = [1, 2, 3]
a2 = Union{Int, Null}[1, 2, 3]
a3 = [1, 2, null]
a4 = [4, 3, 2]
a5 = [1 2; 3 4]
ca1 = CategoricalArray(a1)
ca2 = CategoricalArray{Union{Int, Null}}(a2)
ca2b = CategoricalArray{Union{Int, Null}, 1, UInt32}(ca2.refs, ca2.pool)
ca3 = CategoricalArray(a3)
ca3b = CategoricalArray{Union{Int, Null}, 1, UInt32}(ca3.refs, ca2.pool)
ca4 = CategoricalArray(a4)
ca5 = CategoricalArray(a5)

@test ca1 == copy(ca1) == a1
@test ca2 == copy(ca2) == a2
@test isnull(ca3 == copy(ca3)) && isnull(ca3 == ca3b) && isnull(ca3 == a3)
@test ca4 == copy(ca4) == a4
@test ca5 == copy(ca5) == a5
@test ca1 == ca2 == a2
@test isnull(ca1 != ca3) && isnull(ca1 != a3)
@test ca1 != ca4
@test ca1 != a4
@test a1 != ca4
@test ca1 != ca5
@test ca1 != a5
@test a1 != ca5
@test ca3 != ca4
@test ca3 != a4
@test a3 != ca4
@test isnull(ca2b != ca3b)

# Test isequal
@test ca1 copy(ca1) a1
@test ca2 copy(ca2) a2
@test ca3 copy(ca3) ca3b a3
@test ca4 copy(ca4) a4
@test ca5 copy(ca5) a5
@test ca1 ca2 a2
@test ca1 ca3 && ca1 a3
@test ca1 ca4
@test ca1 a4
@test a1 ca4
@test ca1 ca5
@test ca1 a5
@test a1 ca5
@test ca3 ca4
@test ca3 a4
@test a3 ca4
@test ca2b ca3b

# Test summary()
@test summary(CategoricalArray([1, 2, 3])) ==
Expand Down

0 comments on commit 3a401b9

Please sign in to comment.