From ccd30da1abb04ba07b5a6a6d7fd7616ab55a55f4 Mon Sep 17 00:00:00 2001 From: Cameron Prybol Date: Tue, 17 Oct 2017 12:51:01 -0700 Subject: [PATCH] Ensure levels are maintained when joining df's with Categorical cols --- src/abstractdataframe/join.jl | 2 ++ test/join.jl | 12 ++++++++++++ 2 files changed, 14 insertions(+) diff --git a/src/abstractdataframe/join.jl b/src/abstractdataframe/join.jl index 3df256e601..880f559e94 100644 --- a/src/abstractdataframe/join.jl +++ b/src/abstractdataframe/join.jl @@ -76,11 +76,13 @@ function compose_joined_table(joiner::DataFrameJoiner, kind::Symbol, for (i, col) in enumerate(columns(joiner.dfl)) cols[i] = _similar(col, nrow) fillcolumn!(cols[i], col, all_orig_left_ixs) + isa(col, CategoricalArray) && levels!(cols[i], levels(col)) end for (i, col) in enumerate(columns(dfr_noon)) cols[i+ncleft] = _similar(col, nrow) fillcolumn!(cols[i+ncleft], col, all_orig_right_ixs) permute!(cols[i+ncleft], right_perm) + isa(col, CategoricalArray) && levels!(cols[i+ncleft], levels(col)) end res = DataFrame(cols, vcat(names(joiner.dfl), names(dfr_noon))) diff --git a/test/join.jl b/test/join.jl index 62b7e53850..8ee360b45c 100644 --- a/test/join.jl +++ b/test/join.jl @@ -325,4 +325,16 @@ module TestJoin @test all(isa.(o(on).columns, [CategoricalVector{Union{T, Null}} for T in (Int, Float64)])) end + + @testset "maintain Categorical levels ordering on join" begin + A = DataFrame(a = [1,2,3], b = ["a", "b", "c"]); + B = DataFrame(b = ["a", "b", "c"], c = levels!(categorical(["a", "b", "b"]), ["b", "a"])); + @test levels(join(A, B, on=:b)[:c]) == ["b", "a"] + @test levels(join(B, A, on=:b)[:c]) == ["b", "a"] + @test levels(join(A, B, on=:b, kind=:inner)[:c]) == ["b", "a"] + @test levels(join(A, B, on=:b, kind=:left)[:c]) == ["b", "a"] + @test levels(join(A, B, on=:b, kind=:right)[:c]) == ["b", "a"] + @test levels(join(A, B, on=:b, kind=:outer)[:c]) == ["b", "a"] + @test levels(join(B, A, on=:b, kind = :semi)[:c]) == ["b", "a"] + end end