make eachcol default to false

JuliaData · Dec 4, 2018 · 3f89718 · 3f89718
1 parent f0efb17
commit 3f89718
Show file tree

Hide file tree

Showing 12 changed files with 91 additions and 121 deletions.
diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl
@@ -202,7 +202,7 @@ eltypes(df)
 ```
 
 """
-eltypes(df::AbstractDataFrame) = eltype.(columns(df))
+eltypes(df::AbstractDataFrame) = eltype.(eachcol(df))
 
 Base.size(df::AbstractDataFrame) = (nrow(df), ncol(df))
 function Base.size(df::AbstractDataFrame, i::Integer)
@@ -241,7 +241,7 @@ that is different than the number of rows present in `df`.
 """
 function Base.similar(df::AbstractDataFrame, rows::Integer = size(df, 1))
     rows < 0 && throw(ArgumentError("the number of rows must be positive"))
-    DataFrame(Any[similar(x, rows) for x in columns(df)], copy(index(df)))
+    DataFrame(Any[similar(x, rows) for x in eachcol(df)], copy(index(df)))
 end
 
 ##############################################################################
@@ -432,7 +432,7 @@ function StatsBase.describe(df::AbstractDataFrame; stats::Union{Symbol,AbstractV
     data[:variable] = names(df)
 
     # An array of Dicts for summary statistics
-    column_stats_dicts = [get_stats(col) for col in columns(df)]
+    column_stats_dicts = [get_stats(col) for col in eachcol(df)]
     for stat in stats
         # for each statistic, loop through the columns array to find values
         # letting the comprehension choose the appropriate type
@@ -794,7 +794,7 @@ function Base.convert(::Type{Matrix{T}}, df::AbstractDataFrame) where T
     n, p = size(df)
     res = Matrix{T}(undef, n, p)
     idx = 1
-    for (name, col) in zip(names(df), columns(df))
+    for (name, col) in eachcol(df, true)
         try
             copyto!(res, idx, col)
         catch err

diff --git a/src/abstractdataframe/iteration.jl b/src/abstractdataframe/iteration.jl
@@ -49,13 +49,12 @@ struct DataFrameColumns{T<:AbstractDataFrame, V} <: AbstractVector{V}
 end
 
 """
-    eachcol(df::AbstractDataFrame, names::Bool=true)
+    eachcol(df::AbstractDataFrame, names::Bool=false)
 
 Return a `DataFrameColumns` that iterates an `AbstractDataFrame` column by column.
-If `names` is equal to `true` (currently the default, in the future the default
-will be set to `false`) iteration returns a pair consisting of column name
+If `names` is equal to `true` iteration returns a pair consisting of column name
 and column vector.
-If `names` is equal to `false` then column vectors are yielded.
+If `names` is equal to `false` (the default) then column vectors are yielded.
 
 **Examples**
 
@@ -70,17 +69,17 @@ julia> df = DataFrame(x=1:4, y=11:14)
 │ 3   │ 3     │ 13    │
 │ 4   │ 4     │ 14    │
 
+julia> collect(eachcol(df))
+2-element Array{AbstractArray{T,1} where T,1}:
+ [1, 2, 3, 4]
+ [11, 12, 13, 14]
+
 julia> collect(eachcol(df, true))
 2-element Array{Pair{Symbol,AbstractArray{T,1} where T},1}:
  :x => [1, 2, 3, 4]
  :y => [11, 12, 13, 14]
 
-julia> collect(eachcol(df, false))
-2-element Array{AbstractArray{T,1} where T,1}:
- [1, 2, 3, 4]
- [11, 12, 13, 14]
-
-julia> sum.(eachcol(df, false))
+julia> sum.(eachcol(df))
 2-element Array{Int64,1}:
  10
  50
@@ -93,25 +92,14 @@ julia> map(eachcol(df, false)) do col
  3
 ```
 """
-@inline function eachcol(df::T, names::Bool) where T<: AbstractDataFrame
+@inline function eachcol(df::T, names::Bool=false) where T<: AbstractDataFrame
     if names
         DataFrameColumns{T, Pair{Symbol, AbstractVector}}(df)
     else
         DataFrameColumns{T, AbstractVector}(df)
     end
 end
 
-# TODO: remove this method after deprecation
-# and add default argument value above
-function eachcol(df::AbstractDataFrame)
-    Base.depwarn("In the future eachcol will have names argument set to false by default", :eachcol)
-    eachcol(df, true)
-end
-
-# TODO: remove this method after deprecation
-# this is left to make sure we do not forget to properly fix columns calls
-columns(df::AbstractDataFrame) = eachcol(df, false)
-
 Base.size(itr::DataFrameColumns) = (size(itr.df, 2),)
 Base.IndexStyle(::Type{<:DataFrameColumns}) = Base.IndexLinear()
 
@@ -178,7 +166,7 @@ julia> mapcols(x -> x.^2, df)
 function mapcols(f::Union{Function,Type}, df::AbstractDataFrame)
     # note: `f` must return a consistent length
     res = DataFrame()
-    for (n, v) in eachcol(df)
+    for (n, v) in eachcol(df, true)
         res[n] = f(v)
     end
     res

diff --git a/src/abstractdataframe/join.jl b/src/abstractdataframe/join.jl
@@ -89,13 +89,13 @@ function compose_joined_table(joiner::DataFrameJoiner, kind::Symbol,
     cols = Vector{AbstractVector}(undef, ncleft + ncol(dfr_noon))
     # inner and left joins preserve non-missingness of the left frame
     _similar_left = kind == :inner || kind == :left ? similar : similar_missing
-    for (i, col) in enumerate(columns(joiner.dfl))
+    for (i, col) in enumerate(eachcol(joiner.dfl))
         cols[i] = _similar_left(col, nrow)
         copyto!(cols[i], view(col, all_orig_left_ixs))
     end
     # inner and right joins preserve non-missingness of the right frame
     _similar_right = kind == :inner || kind == :right ? similar : similar_missing
-    for (i, col) in enumerate(columns(dfr_noon))
+    for (i, col) in enumerate(eachcol(dfr_noon))
         cols[i+ncleft] = _similar_right(col, nrow)
         copyto!(cols[i+ncleft], view(col, all_orig_right_ixs))
         permute!(cols[i+ncleft], right_perm)
@@ -407,7 +407,7 @@ end
 function crossjoin(df1::AbstractDataFrame, df2::AbstractDataFrame; makeunique::Bool=false)
     r1, r2 = size(df1, 1), size(df2, 1)
     colindex = merge(index(df1), index(df2), makeunique=makeunique)
-    cols = Any[[repeat(c, inner=r2) for c in columns(df1)];
-               [repeat(c, outer=r1) for c in columns(df2)]]
+    cols = Any[[repeat(c, inner=r2) for c in eachcol(df1)];
+               [repeat(c, outer=r1) for c in eachcol(df2)]]
     DataFrame(cols, colindex)
 end
diff --git a/src/dataframerow/dataframerow.jl b/src/dataframerow/dataframerow.jl
@@ -148,7 +148,7 @@ function isequal_row(df1::AbstractDataFrame, r1::Int, df2::AbstractDataFrame, r2
     elseif !(ncol(df1) == ncol(df2))
         throw(ArgumentError("Rows of the tables that have different number of columns cannot be compared. Got $(ncol(df1)) and $(ncol(df2)) columns"))
     end
-    @inbounds for (col1, col2) in zip(columns(df1), columns(df2))
+    @inbounds for (col1, col2) in zip(eachcol(df1), eachcol(df2))
         isequal(col1[r1], col2[r2]) || return false
     end
     return true

diff --git a/src/deprecated.jl b/src/deprecated.jl
@@ -10,7 +10,7 @@ import Base: @deprecate
 
 import Base: keys, values, insert!
 @deprecate keys(df::AbstractDataFrame) names(df)
-@deprecate values(df::AbstractDataFrame) columns(df)
+@deprecate values(df::AbstractDataFrame) eachcol(df)
 @deprecate insert!(df::DataFrame, df2::AbstractDataFrame) (foreach(col -> df[col] = df2[col], names(df2)); df)
 
 @deprecate pool categorical
@@ -1741,7 +1741,7 @@ end
 function hashrows(df::SubDataFrame, skipmissing::Bool)
     rhashes = zeros(UInt, nrow(df))
     missings = fill(false, skipmissing ? nrow(df) : 0)
-    cols = columns(df)
+    cols = eachcol(df)
     for i in 1:ncol(df)
         hashrows_col!(rhashes, missings, view(parent(df)[i], rows(df)), i == 1)
     end

diff --git a/src/groupeddataframe/grouping.jl b/src/groupeddataframe/grouping.jl
@@ -424,7 +424,7 @@ function _combine(f::Any, gd::GroupedDataFrame)
         fun = last(f)
     elseif f isa Pair
         df = gd.parent[collect(first(f))]
-        incols = NamedTuple{Tuple(names(df))}(columns(df))
+        incols = NamedTuple{Tuple(names(df))}(eachcol(df))
         fun = last(f)
     else
         incols = nothing

diff --git a/test/cat.jl b/test/cat.jl
@@ -1,6 +1,5 @@
 module TestCat
     using Test, Random, DataFrames
-    using DataFrames: columns
     const ≅ = isequal
 
     #
@@ -165,13 +164,13 @@ module TestCat
     @testset "vcat mixed coltypes" begin
         df = vcat(DataFrame([[1]], [:x]), DataFrame([[1.0]], [:x]))
         @test df == DataFrame([[1.0, 1.0]], [:x])
-        @test typeof.(columns(df)) == [Vector{Float64}]
+        @test typeof.(eachcol(df)) == [Vector{Float64}]
         df = vcat(DataFrame([[1]], [:x]), DataFrame([["1"]], [:x]))
         @test df == DataFrame([[1, "1"]], [:x])
-        @test typeof.(columns(df)) == [Vector{Any}]
+        @test typeof.(eachcol(df)) == [Vector{Any}]
         df = vcat(DataFrame([Union{Missing, Int}[1]], [:x]), DataFrame([[1]], [:x]))
         @test df == DataFrame([[1, 1]], [:x])
-        @test typeof.(columns(df)) == [Vector{Union{Missing, Int}}]
+        @test typeof.(eachcol(df)) == [Vector{Union{Missing, Int}}]
         df = vcat(DataFrame([CategoricalArray([1])], [:x]), DataFrame([[1]], [:x]))
         @test df == DataFrame([[1, 1]], [:x])
         @test df[:x] isa Vector{Int}
@@ -186,14 +185,14 @@ module TestCat
         df = vcat(DataFrame([Union{Int, Missing}[1]], [:x]),
                   DataFrame([["1"]], [:x]))
         @test df == DataFrame([[1, "1"]], [:x])
-        @test typeof.(columns(df)) == [Vector{Any}]
+        @test typeof.(eachcol(df)) == [Vector{Any}]
         df = vcat(DataFrame([CategoricalArray([1])], [:x]),
                   DataFrame([CategoricalArray(["1"])], [:x]))
         @test df == DataFrame([[1, "1"]], [:x])
         @test df[:x] isa CategoricalVector{Any}
         df = vcat(DataFrame([trues(1)], [:x]), DataFrame([[false]], [:x]))
         @test df == DataFrame([[true, false]], [:x])
-        @test typeof.(columns(df)) == [Vector{Bool}]
+        @test typeof.(eachcol(df)) == [Vector{Bool}]
     end
 
     @testset "vcat out of order" begin

diff --git a/test/constructors.jl b/test/constructors.jl
@@ -1,7 +1,6 @@
 module TestConstructors
     using Test, DataFrames
     using DataFrames: Index, _columns, index
-    using DataFrames: columns
     const ≅ = isequal
 
     #
@@ -116,13 +115,13 @@ module TestConstructors
     @testset "column types" begin
         df = DataFrame(A = 1:3, B = 2:4, C = 3:5)
         answer = [Array{Int,1}, Array{Int,1}, Array{Int,1}]
-        @test map(typeof, columns(df)) == answer
+        @test map(typeof, eachcol(df)) == answer
         df[:D] = [4, 5, missing]
         push!(answer, Vector{Union{Int, Missing}})
-        @test map(typeof, columns(df)) == answer
+        @test map(typeof, eachcol(df)) == answer
         df[:E] = 'c'
         push!(answer, Vector{Char})
-        @test map(typeof, columns(df)) == answer
+        @test map(typeof, eachcol(df)) == answer
     end
 
     @testset "categorical constructor" begin

diff --git a/test/dataframe.jl b/test/dataframe.jl
@@ -1,7 +1,6 @@
 module TestDataFrame
     using Dates, DataFrames, LinearAlgebra, Statistics, Random, Test
     using DataFrames: _columns
-    using DataFrames: columns
     const ≅ = isequal
     const ≇ = !isequal
 
@@ -73,20 +72,20 @@ module TestDataFrame
                             c = CategoricalArray{Union{Float64, Missing}}(undef, 2))
         # https://github.com/JuliaData/Missings.jl/issues/66
         # @test missingdf ≅ similar(df, 2)
-        @test typeof.(columns(similar(df, 2))) == typeof.(columns(missingdf))
+        @test typeof.(eachcol(similar(df, 2))) == typeof.(eachcol(missingdf))
         @test size(similar(df, 2)) == size(missingdf)
     end
 
     @testset "Associative methods" begin
         df = DataFrame(a=[1, 2], b=[3.0, 4.0])
         @test haskey(df, :a)
         @test !haskey(df, :c)
-        @test get(df, :a, -1) === columns(df)[1]
+        @test get(df, :a, -1) === eachcol(df)[1]
         @test get(df, :c, -1) == -1
         @test !isempty(df)
 
         @test empty!(df) === df
-        @test isempty(columns(df))
+        @test isempty(eachcol(df))
         @test isempty(df)
         @test isempty(DataFrame(a=[], b=[]))
 
@@ -524,11 +523,11 @@ module TestDataFrame
 
     df = DataFrame(A = Vector{Union{Int, Missing}}(1:3), B = Vector{Union{Int, Missing}}(4:6))
     DRT = CategoricalArrays.DefaultRefType
-    @test all(c -> isa(c, Vector{Union{Int, Missing}}), columns(categorical!(deepcopy(df))))
+    @test all(c -> isa(c, Vector{Union{Int, Missing}}), eachcol(categorical!(deepcopy(df))))
     @test all(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}},
-              columns(categorical!(deepcopy(df), [1,2])))
+              eachcol(categorical!(deepcopy(df), [1,2])))
     @test all(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}},
-              columns(categorical!(deepcopy(df), [:A,:B])))
+              eachcol(categorical!(deepcopy(df), [:A,:B])))
     @test findfirst(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}},
                     _columns(categorical!(deepcopy(df), [:A]))) == 1
     @test findfirst(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}},
@@ -561,7 +560,7 @@ module TestDataFrame
                                 Union{Int, Missing}[2, 6], Union{Int, Missing}[3, 7],
                                 Union{Int, Missing}[4, 8]], [:id, :a, :b, :c, :d])
         @test isa(udf[1], Vector{Int})
-        @test all(isa.(columns(udf)[2:end], Vector{Union{Int, Missing}}))
+        @test all(isa.(eachcol(udf)[2:end], Vector{Union{Int, Missing}}))
         df = DataFrame([categorical(repeat(1:2, inner=4)),
                            categorical(repeat('a':'d', outer=2)), categorical(1:8)],
                        [:id, :variable, :value])
@@ -571,7 +570,7 @@ module TestDataFrame
                                 Union{Int, Missing}[2, 6], Union{Int, Missing}[3, 7],
                                 Union{Int, Missing}[4, 8]], [:id, :a, :b, :c, :d])
         @test isa(udf[1], CategoricalVector{Int})
-        @test all(isa.(columns(udf)[2:end], CategoricalVector{Union{Int, Missing}}))
+        @test all(isa.(eachcol(udf)[2:end], CategoricalVector{Union{Int, Missing}}))
     end
 
     @testset "duplicate entries in unstack warnings" begin
@@ -718,14 +717,14 @@ module TestDataFrame
         df = DataFrame([CategoricalArray(1:10),
                         CategoricalArray(string.('a':'j'))])
         allowmissing!(df)
-        @test all(x->x <: CategoricalVector, typeof.(columns(df)))
+        @test all(x->x <: CategoricalVector, typeof.(eachcol(df)))
         @test eltypes(df)[1] <: Union{CategoricalValue{Int}, Missing}
         @test eltypes(df)[2] <: Union{CategoricalString, Missing}
         df[1,2] = missing
         @test_throws MissingException disallowmissing!(df)
         df[1,2] = "a"
         disallowmissing!(df)
-        @test all(x->x <: CategoricalVector, typeof.(columns(df)))
+        @test all(x->x <: CategoricalVector, typeof.(eachcol(df)))
         @test eltypes(df)[1] <: CategoricalValue{Int}
         @test eltypes(df)[2] <: CategoricalString
     end
@@ -735,12 +734,12 @@ module TestDataFrame
                        b = CategoricalArray(["foo"]),
                        c = [0.0],
                        d = CategoricalArray([0.0]))
-        @test typeof.(columns(similar(df))) == typeof.(columns(df))
+        @test typeof.(eachcol(similar(df))) == typeof.(eachcol(df))
         @test size(similar(df)) == size(df)
 
         rows = size(df, 1) + 5
         @test size(similar(df, rows)) == (rows, size(df, 2))
-        @test typeof.(columns(similar(df, rows))) == typeof.(columns(df))
+        @test typeof.(eachcol(similar(df, rows))) == typeof.(eachcol(df))
 
         e = @test_throws ArgumentError similar(df, -1)
         @test e.value.msg == "the number of rows must be positive"

diff --git a/test/dataframerow.jl b/test/dataframerow.jl
@@ -1,6 +1,5 @@
 module TestDataFrameRow
     using Test, DataFrames
-    using DataFrames: columns
 
     df = DataFrame(a=Union{Int, Missing}[1, 2, 3, 1, 2, 2],
                    b=[2.0, missing, 1.2, 2.0, missing, missing],
@@ -46,7 +45,7 @@ module TestDataFrameRow
     @test hash(DataFrameRow(df, 2)) != hash(DataFrameRow(df, 6))
 
     # check that hashrows() function generates the same hashes as DataFrameRow
-    df_rowhashes, _ = DataFrames.hashrows(Tuple(columns(df)), false)
+    df_rowhashes, _ = DataFrames.hashrows(Tuple(eachcol(df)), false)
     @test df_rowhashes == [hash(dr) for dr in eachrow(df)]
 
     # test incompatible frames