Skip to content

Commit

Permalink
Fix join when mixing NullableArray and Array{Nullable} (#1089)
Browse files Browse the repository at this point in the history
Also return a NullableCategoricalArray from sharepools() since
the code currently doesn't check that no null values are present.
anyway this function is internal and the change imposes no overhead.
  • Loading branch information
nalimilan committed Oct 4, 2016
1 parent 400da84 commit e1c5014
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 11 deletions.
14 changes: 3 additions & 11 deletions src/abstractdataframe/join.jl
Expand Up @@ -130,6 +130,7 @@ sharepools{S,N}(v1::AbstractArray{S,N},
v2::Union{CategoricalArray{S,N}, NullableCategoricalArray{S,N}}) =
sharepools(oftype(v2, v1), v2)

# TODO: write an optimized version for (Nullable)CategoricalArray
function sharepools(v1::AbstractArray,
v2::AbstractArray)
## Return two categorical arrays that share the same pool.
Expand Down Expand Up @@ -179,19 +180,10 @@ function sharepools(v1::AbstractArray,
end

pool = CategoricalPool(pool)
return (CategoricalArray(refs1, pool),
CategoricalArray(refs2, pool))
return (NullableCategoricalArray(refs1, pool),
NullableCategoricalArray(refs2, pool))
end

sharepools(v1::NullableArray, v2::NullableArray) =
sharepools(NullableCategoricalArray(v1), NullableCategoricalArray(v2))

sharepools(v1::AbstractArray, v2::NullableArray) =
sharepools(v1, NullableCategoricalArray(v2))

sharepools(v1::NullableArray, v2::AbstractArray) =
sharepools(NullableCategoricalArray(v2), v1)

function sharepools(df1::AbstractDataFrame, df2::AbstractDataFrame)
# This method exists to allow merge to work with multiple columns.
# It takes the columns of each DataFrame and returns a categorical array
Expand Down
9 changes: 9 additions & 0 deletions test/join.jl
Expand Up @@ -98,4 +98,13 @@ module TestJoin
categorical!(df1, :A)
categorical!(df1, :B)
join(df1, df1, on = [:A, :B], kind = :inner)

# Test that Array{Nullable} works when combined with NullableArray (#1088)
df = DataFrame(Name = Nullable{String}["A", "B", "C"],
Mass = [1.5, 2.2, 1.1])
df2 = DataFrame(Name = ["A", "B", "C", "A"],
Quantity = [3, 3, 2, 4])
@test join(df2, df, on=:Name, kind=:left) == DataFrame(Name = ["A", "A", "B", "C"],
Quantity = [3, 4, 3, 2],
Mass = [1.5, 1.5, 2.2, 1.1])
end

0 comments on commit e1c5014

Please sign in to comment.