Merge 872bc98 into a2e8ea9

JuliaData · Mar 6, 2018 · 89cabae · 89cabae
2 parents a2e8ea9 + 872bc98
commit 89cabae
Show file tree

Hide file tree

Showing 2 changed files with 84 additions and 57 deletions.
diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl
@@ -16,7 +16,7 @@ DataFrame(t::Type, nrows::Integer, ncols::Integer) # an empty DataFrame of arbit
 DataFrame(column_eltypes::Vector, names::Vector, nrows::Integer; makeunique::Bool=false)
 DataFrame(column_eltypes::Vector, cnames::Vector, categorical::Vector, nrows::Integer;
           makeunique::Bool=false)
-DataFrame(ds::Vector{AbstractDict})
+DataFrame(ds::AbstractDict)
 ```
 
 **Arguments**
@@ -33,7 +33,7 @@ DataFrame(ds::Vector{AbstractDict})
 * `column_eltypes` : elemental type of each column
 * `categorical` : `Vector{Bool}` indicating which columns should be converted to
                   `CategoricalVector`
-* `ds` : a vector of Associatives
+* `ds` : `AbstractDict` of columns
 
 Each column in `columns` should be the same length.
 
@@ -620,7 +620,7 @@ Base.setindex!(df::DataFrame, x::Nothing, col_ind::Int) = delete!(df, col_ind)
 
 ##############################################################################
 ##
-## Mutating Associative methods
+## Mutating AbstractDict methods
 ##
 ##############################################################################
 
@@ -721,7 +721,7 @@ merge!(df::DataFrame, others::AbstractDataFrame...)
 For every column `c` with name `n` in `others` sequentially perform `df[n] = c`.
 In particular, if there are duplicate column names present in `df` and `others`
 the last encountered column will be retained.
-This behavior is identical with how `merge!` works for any `Associative` type.
+This behavior is identical with how `merge!` works for any `AbstractDict` type.
 Use `join` if you want to join two `DataFrame`s.
 
 **Arguments**
@@ -952,28 +952,19 @@ Base.convert(::Type{DataFrame}, d::AbstractDict) = DataFrame(d)
 ##
 ##############################################################################
 
-function Base.push!(df::DataFrame, associative::AbstractDict{Symbol,Any})
+function Base.push!(df::DataFrame, dict::AbstractDict)
     i = 1
     for nm in _names(df)
         try
-            push!(df[nm], associative[nm])
-        catch
-            #clean up partial row
-            for j in 1:(i - 1)
-                pop!(df[_names(df)[j]])
+            val = get(dict, nm) do
+                v = dict[string(nm)]
+                Base.depwarn("push!(::DataFrame, ::AbstractDict) with " *
+                             "AbstractDict keys other than Symbol is deprecated",
+                             :push!)
+                v
             end
-            msg = "Error adding value to column :$nm."
-            throw(ArgumentError(msg))
-        end
-        i += 1
-    end
-end
-
-function Base.push!(df::DataFrame, associative::AbstractDict)
-    i = 1
-    for nm in _names(df)
-        try
-            val = get(() -> associative[string(nm)], associative, nm)
+            # after deprecation replace above call by
+            # val = dict[nm]
             push!(df[nm], val)
         catch
             #clean up partial row

diff --git a/test/dataframe.jl b/test/dataframe.jl
@@ -205,41 +205,63 @@ module TestDataFrame
     @test hash(convert(DataFrame, [1 2; 3 4])) != hash(convert(DataFrame, [1 3; 2 4]))
     @test hash(convert(DataFrame, [1 2; 3 4])) == hash(convert(DataFrame, [1 2; 3 4]), zero(UInt))
 
-    # push!(df, row)
-    df=DataFrame( first=[1,2,3], second=["apple","orange","pear"] )
-
-    dfb= DataFrame( first=[1,2], second=["apple","orange"] )
-    push!(dfb, Any[3,"pear"])
-    @test df == dfb
-
-    dfb= DataFrame( first=[1,2], second=["apple","orange"] )
-    push!(dfb, (3,"pear"))
-    @test df == dfb
-
-    dfb= DataFrame( first=[1,2], second=["apple","orange"] )
-    @test_throws ArgumentError push!(dfb, (33.33,"pear"))
-
-    dfb= DataFrame( first=[1,2], second=["apple","orange"] )
-    @test_throws ArgumentError push!(dfb, ("coconut",22))
-
-    dfb= DataFrame( first=[1,2], second=["apple","orange"] )
-    push!(dfb, Dict(:first=>3, :second=>"pear"))
-    @test df == dfb
-
-    df=DataFrame( first=[1,2,3], second=["apple","orange","banana"] )
-    dfb= DataFrame( first=[1,2], second=["apple","orange"] )
-    push!(dfb, Dict("first"=>3, "second"=>"banana"))
-    @test df == dfb
-
-    df0= DataFrame( first=[1,2], second=["apple","orange"] )
-    dfb= DataFrame( first=[1,2], second=["apple","orange"] )
-    @test_throws ArgumentError push!(dfb, Dict(:first=>true, :second=>false))
-    @test df0 == dfb
-
-    df0= DataFrame( first=[1,2], second=["apple","orange"] )
-    dfb= DataFrame( first=[1,2], second=["apple","orange"] )
-    @test_throws ArgumentError push!(dfb, Dict("first"=>"chicken", "second"=>"stuff"))
-    @test df0 == dfb
+    @testset "push!(df, row)" begin
+        df=DataFrame( first=[1,2,3], second=["apple","orange","pear"] )
+
+        dfb= DataFrame( first=[1,2], second=["apple","orange"] )
+        dfc= DataFrame( first=[1,2], second=["apple","orange"] )
+        push!(dfb, Any[3,"pear"])
+        @test df == dfb
+
+        dfb= DataFrame( first=[1,2], second=["apple","orange"] )
+        push!(dfb, (3,"pear"))
+        @test df == dfb
+
+        dfb= DataFrame( first=[1,2], second=["apple","orange"] )
+        @test_throws ArgumentError push!(dfb, (33.33,"pear"))
+        @test dfc == dfb
+
+        dfb= DataFrame( first=[1,2], second=["apple","orange"] )
+        @test_throws ArgumentError push!(dfb, (1,"2",3))
+        @test dfc == dfb
+
+        dfb= DataFrame( first=[1,2], second=["apple","orange"] )
+        @test_throws ArgumentError push!(dfb, ("coconut",22))
+        @test dfc == dfb
+
+        dfb= DataFrame( first=[1,2], second=["apple","orange"] )
+        @test_throws ArgumentError push!(dfb, (11,22))
+        @test dfc == dfb
+
+        dfb= DataFrame( first=[1,2], second=["apple","orange"] )
+        push!(dfb, Dict(:first=>3, :second=>"pear"))
+        @test df == dfb
+
+        df=DataFrame( first=[1,2,3], second=["apple","orange","banana"] )
+        dfb= DataFrame( first=[1,2], second=["apple","orange"] )
+        push!(dfb, Dict(:first=>3, :second=>"banana"))
+        @test df == dfb
+
+        df0= DataFrame( first=[1,2], second=["apple","orange"] )
+        dfb= DataFrame( first=[1,2], second=["apple","orange"] )
+        @test_throws ArgumentError push!(dfb, Dict(:first=>true, :second=>false))
+        @test df0 == dfb
+
+        df0= DataFrame( first=[1,2], second=["apple","orange"] )
+        dfb= DataFrame( first=[1,2], second=["apple","orange"] )
+        @test_throws ArgumentError push!(dfb, Dict(:first=>"chicken", :second=>"stuff"))
+        @test df0 == dfb
+
+        df0=DataFrame( first=[1,2,3], second=["apple","orange","pear"] )
+        dfb=DataFrame( first=[1,2,3], second=["apple","orange","pear"] )
+        @test_throws ArgumentError push!(dfb, Dict(:first=>"chicken", :second=>1))
+        @test df0 == dfb
+
+        df0=DataFrame( first=["1","2","3"], second=["apple","orange","pear"] )
+        dfb=DataFrame( first=["1","2","3"], second=["apple","orange","pear"] )
+        @test_throws ArgumentError push!(dfb, Dict(:first=>"chicken", :second=>1))
+        @test df0 == dfb
+    end
 
     # delete!
     df = DataFrame(a=1, b=2, c=3, d=4, e=5)
@@ -405,6 +427,20 @@ module TestDataFrame
     @test findfirst(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}},
                     categorical!(deepcopy(df), 1).columns) == 1
 
+    @testset "categorical!" begin
+        df = DataFrame([["a", "b"], ['a', 'b'], [true, false], 1:2, ["x", "y"]])
+        @test all(map(<:, eltypes(categorical!(df)),
+                      [CategoricalArrays.CategoricalString,
+                       Char, Bool, Int,
+                       CategoricalArrays.CategoricalString]))
+        @test all(map(<:, eltypes(categorical!(df, names(df))),
+                      [CategoricalArrays.CategoricalString,
+                       CategoricalArrays.CategoricalValue{Char},
+                       CategoricalArrays.CategoricalValue{Bool},
+                       CategoricalArrays.CategoricalValue{Int},
+                       CategoricalArrays.CategoricalString]))
+    end
+
     @testset "unstack promotion to support missing values" begin
         df = DataFrame(Any[repeat(1:2, inner=4), repeat('a':'d', outer=2), collect(1:8)],
                        [:id, :variable, :value])