Fix join when mixing NullableArray and Array{Nullable} (#1089)

Also return a NullableCategoricalArray from sharepools() since the code currently doesn't check that no null values are present. anyway this function is internal and the change imposes no overhead.
JuliaData · Oct 4, 2016 · e1c5014 · e1c5014
1 parent 400da84
commit e1c5014
Show file tree

Hide file tree

Showing 2 changed files with 12 additions and 11 deletions.
diff --git a/src/abstractdataframe/join.jl b/src/abstractdataframe/join.jl
@@ -130,6 +130,7 @@ sharepools{S,N}(v1::AbstractArray{S,N},
                 v2::Union{CategoricalArray{S,N}, NullableCategoricalArray{S,N}}) =
     sharepools(oftype(v2, v1), v2)
 
+# TODO: write an optimized version for (Nullable)CategoricalArray
 function sharepools(v1::AbstractArray,
                     v2::AbstractArray)
     ## Return two categorical arrays that share the same pool.
@@ -179,19 +180,10 @@ function sharepools(v1::AbstractArray,
     end
 
     pool = CategoricalPool(pool)
-    return (CategoricalArray(refs1, pool),
-            CategoricalArray(refs2, pool))
+    return (NullableCategoricalArray(refs1, pool),
+            NullableCategoricalArray(refs2, pool))
 end
 
-sharepools(v1::NullableArray, v2::NullableArray) =
-    sharepools(NullableCategoricalArray(v1), NullableCategoricalArray(v2))
-
-sharepools(v1::AbstractArray, v2::NullableArray) =
-    sharepools(v1, NullableCategoricalArray(v2))
-
-sharepools(v1::NullableArray, v2::AbstractArray) =
-    sharepools(NullableCategoricalArray(v2), v1)
-
 function sharepools(df1::AbstractDataFrame, df2::AbstractDataFrame)
     # This method exists to allow merge to work with multiple columns.
     # It takes the columns of each DataFrame and returns a categorical array

diff --git a/test/join.jl b/test/join.jl
@@ -98,4 +98,13 @@ module TestJoin
     categorical!(df1, :A)
     categorical!(df1, :B)
     join(df1, df1, on = [:A, :B], kind = :inner)
+
+    # Test that Array{Nullable} works when combined with NullableArray (#1088)
+    df = DataFrame(Name = Nullable{String}["A", "B", "C"],
+                   Mass = [1.5, 2.2, 1.1])
+    df2 = DataFrame(Name = ["A", "B", "C", "A"],
+                    Quantity = [3, 3, 2, 4])
+    @test join(df2, df, on=:Name, kind=:left) == DataFrame(Name = ["A", "A", "B", "C"],
+                                                           Quantity = [3, 4, 3, 2],
+                                                           Mass = [1.5, 1.5, 2.2, 1.1])
 end