Skip to content

Commit

Permalink
make eachcol default to false
Browse files Browse the repository at this point in the history
  • Loading branch information
bkamins committed Dec 4, 2018
1 parent f0efb17 commit 3f89718
Show file tree
Hide file tree
Showing 12 changed files with 91 additions and 121 deletions.
8 changes: 4 additions & 4 deletions src/abstractdataframe/abstractdataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ eltypes(df)
```
"""
eltypes(df::AbstractDataFrame) = eltype.(columns(df))
eltypes(df::AbstractDataFrame) = eltype.(eachcol(df))

Base.size(df::AbstractDataFrame) = (nrow(df), ncol(df))
function Base.size(df::AbstractDataFrame, i::Integer)
Expand Down Expand Up @@ -241,7 +241,7 @@ that is different than the number of rows present in `df`.
"""
function Base.similar(df::AbstractDataFrame, rows::Integer = size(df, 1))
rows < 0 && throw(ArgumentError("the number of rows must be positive"))
DataFrame(Any[similar(x, rows) for x in columns(df)], copy(index(df)))
DataFrame(Any[similar(x, rows) for x in eachcol(df)], copy(index(df)))
end

##############################################################################
Expand Down Expand Up @@ -432,7 +432,7 @@ function StatsBase.describe(df::AbstractDataFrame; stats::Union{Symbol,AbstractV
data[:variable] = names(df)

# An array of Dicts for summary statistics
column_stats_dicts = [get_stats(col) for col in columns(df)]
column_stats_dicts = [get_stats(col) for col in eachcol(df)]
for stat in stats
# for each statistic, loop through the columns array to find values
# letting the comprehension choose the appropriate type
Expand Down Expand Up @@ -794,7 +794,7 @@ function Base.convert(::Type{Matrix{T}}, df::AbstractDataFrame) where T
n, p = size(df)
res = Matrix{T}(undef, n, p)
idx = 1
for (name, col) in zip(names(df), columns(df))
for (name, col) in eachcol(df, true)
try
copyto!(res, idx, col)
catch err
Expand Down
34 changes: 11 additions & 23 deletions src/abstractdataframe/iteration.jl
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,12 @@ struct DataFrameColumns{T<:AbstractDataFrame, V} <: AbstractVector{V}
end

"""
eachcol(df::AbstractDataFrame, names::Bool=true)
eachcol(df::AbstractDataFrame, names::Bool=false)
Return a `DataFrameColumns` that iterates an `AbstractDataFrame` column by column.
If `names` is equal to `true` (currently the default, in the future the default
will be set to `false`) iteration returns a pair consisting of column name
If `names` is equal to `true` iteration returns a pair consisting of column name
and column vector.
If `names` is equal to `false` then column vectors are yielded.
If `names` is equal to `false` (the default) then column vectors are yielded.
**Examples**
Expand All @@ -70,17 +69,17 @@ julia> df = DataFrame(x=1:4, y=11:14)
│ 3 │ 3 │ 13 │
│ 4 │ 4 │ 14 │
julia> collect(eachcol(df))
2-element Array{AbstractArray{T,1} where T,1}:
[1, 2, 3, 4]
[11, 12, 13, 14]
julia> collect(eachcol(df, true))
2-element Array{Pair{Symbol,AbstractArray{T,1} where T},1}:
:x => [1, 2, 3, 4]
:y => [11, 12, 13, 14]
julia> collect(eachcol(df, false))
2-element Array{AbstractArray{T,1} where T,1}:
[1, 2, 3, 4]
[11, 12, 13, 14]
julia> sum.(eachcol(df, false))
julia> sum.(eachcol(df))
2-element Array{Int64,1}:
10
50
Expand All @@ -93,25 +92,14 @@ julia> map(eachcol(df, false)) do col
3
```
"""
@inline function eachcol(df::T, names::Bool) where T<: AbstractDataFrame
@inline function eachcol(df::T, names::Bool=false) where T<: AbstractDataFrame
if names
DataFrameColumns{T, Pair{Symbol, AbstractVector}}(df)
else
DataFrameColumns{T, AbstractVector}(df)
end
end

# TODO: remove this method after deprecation
# and add default argument value above
function eachcol(df::AbstractDataFrame)
Base.depwarn("In the future eachcol will have names argument set to false by default", :eachcol)
eachcol(df, true)
end

# TODO: remove this method after deprecation
# this is left to make sure we do not forget to properly fix columns calls
columns(df::AbstractDataFrame) = eachcol(df, false)

Base.size(itr::DataFrameColumns) = (size(itr.df, 2),)
Base.IndexStyle(::Type{<:DataFrameColumns}) = Base.IndexLinear()

Expand Down Expand Up @@ -178,7 +166,7 @@ julia> mapcols(x -> x.^2, df)
function mapcols(f::Union{Function,Type}, df::AbstractDataFrame)
# note: `f` must return a consistent length
res = DataFrame()
for (n, v) in eachcol(df)
for (n, v) in eachcol(df, true)
res[n] = f(v)
end
res
Expand Down
8 changes: 4 additions & 4 deletions src/abstractdataframe/join.jl
Original file line number Diff line number Diff line change
Expand Up @@ -89,13 +89,13 @@ function compose_joined_table(joiner::DataFrameJoiner, kind::Symbol,
cols = Vector{AbstractVector}(undef, ncleft + ncol(dfr_noon))
# inner and left joins preserve non-missingness of the left frame
_similar_left = kind == :inner || kind == :left ? similar : similar_missing
for (i, col) in enumerate(columns(joiner.dfl))
for (i, col) in enumerate(eachcol(joiner.dfl))
cols[i] = _similar_left(col, nrow)
copyto!(cols[i], view(col, all_orig_left_ixs))
end
# inner and right joins preserve non-missingness of the right frame
_similar_right = kind == :inner || kind == :right ? similar : similar_missing
for (i, col) in enumerate(columns(dfr_noon))
for (i, col) in enumerate(eachcol(dfr_noon))
cols[i+ncleft] = _similar_right(col, nrow)
copyto!(cols[i+ncleft], view(col, all_orig_right_ixs))
permute!(cols[i+ncleft], right_perm)
Expand Down Expand Up @@ -407,7 +407,7 @@ end
function crossjoin(df1::AbstractDataFrame, df2::AbstractDataFrame; makeunique::Bool=false)
r1, r2 = size(df1, 1), size(df2, 1)
colindex = merge(index(df1), index(df2), makeunique=makeunique)
cols = Any[[repeat(c, inner=r2) for c in columns(df1)];
[repeat(c, outer=r1) for c in columns(df2)]]
cols = Any[[repeat(c, inner=r2) for c in eachcol(df1)];
[repeat(c, outer=r1) for c in eachcol(df2)]]
DataFrame(cols, colindex)
end
2 changes: 1 addition & 1 deletion src/dataframerow/dataframerow.jl
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ function isequal_row(df1::AbstractDataFrame, r1::Int, df2::AbstractDataFrame, r2
elseif !(ncol(df1) == ncol(df2))
throw(ArgumentError("Rows of the tables that have different number of columns cannot be compared. Got $(ncol(df1)) and $(ncol(df2)) columns"))
end
@inbounds for (col1, col2) in zip(columns(df1), columns(df2))
@inbounds for (col1, col2) in zip(eachcol(df1), eachcol(df2))
isequal(col1[r1], col2[r2]) || return false
end
return true
Expand Down
4 changes: 2 additions & 2 deletions src/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import Base: @deprecate

import Base: keys, values, insert!
@deprecate keys(df::AbstractDataFrame) names(df)
@deprecate values(df::AbstractDataFrame) columns(df)
@deprecate values(df::AbstractDataFrame) eachcol(df)
@deprecate insert!(df::DataFrame, df2::AbstractDataFrame) (foreach(col -> df[col] = df2[col], names(df2)); df)

@deprecate pool categorical
Expand Down Expand Up @@ -1741,7 +1741,7 @@ end
function hashrows(df::SubDataFrame, skipmissing::Bool)
rhashes = zeros(UInt, nrow(df))
missings = fill(false, skipmissing ? nrow(df) : 0)
cols = columns(df)
cols = eachcol(df)
for i in 1:ncol(df)
hashrows_col!(rhashes, missings, view(parent(df)[i], rows(df)), i == 1)
end
Expand Down
2 changes: 1 addition & 1 deletion src/groupeddataframe/grouping.jl
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ function _combine(f::Any, gd::GroupedDataFrame)
fun = last(f)
elseif f isa Pair
df = gd.parent[collect(first(f))]
incols = NamedTuple{Tuple(names(df))}(columns(df))
incols = NamedTuple{Tuple(names(df))}(eachcol(df))
fun = last(f)
else
incols = nothing
Expand Down
11 changes: 5 additions & 6 deletions test/cat.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
module TestCat
using Test, Random, DataFrames
using DataFrames: columns
const = isequal

#
Expand Down Expand Up @@ -165,13 +164,13 @@ module TestCat
@testset "vcat mixed coltypes" begin
df = vcat(DataFrame([[1]], [:x]), DataFrame([[1.0]], [:x]))
@test df == DataFrame([[1.0, 1.0]], [:x])
@test typeof.(columns(df)) == [Vector{Float64}]
@test typeof.(eachcol(df)) == [Vector{Float64}]
df = vcat(DataFrame([[1]], [:x]), DataFrame([["1"]], [:x]))
@test df == DataFrame([[1, "1"]], [:x])
@test typeof.(columns(df)) == [Vector{Any}]
@test typeof.(eachcol(df)) == [Vector{Any}]
df = vcat(DataFrame([Union{Missing, Int}[1]], [:x]), DataFrame([[1]], [:x]))
@test df == DataFrame([[1, 1]], [:x])
@test typeof.(columns(df)) == [Vector{Union{Missing, Int}}]
@test typeof.(eachcol(df)) == [Vector{Union{Missing, Int}}]
df = vcat(DataFrame([CategoricalArray([1])], [:x]), DataFrame([[1]], [:x]))
@test df == DataFrame([[1, 1]], [:x])
@test df[:x] isa Vector{Int}
Expand All @@ -186,14 +185,14 @@ module TestCat
df = vcat(DataFrame([Union{Int, Missing}[1]], [:x]),
DataFrame([["1"]], [:x]))
@test df == DataFrame([[1, "1"]], [:x])
@test typeof.(columns(df)) == [Vector{Any}]
@test typeof.(eachcol(df)) == [Vector{Any}]
df = vcat(DataFrame([CategoricalArray([1])], [:x]),
DataFrame([CategoricalArray(["1"])], [:x]))
@test df == DataFrame([[1, "1"]], [:x])
@test df[:x] isa CategoricalVector{Any}
df = vcat(DataFrame([trues(1)], [:x]), DataFrame([[false]], [:x]))
@test df == DataFrame([[true, false]], [:x])
@test typeof.(columns(df)) == [Vector{Bool}]
@test typeof.(eachcol(df)) == [Vector{Bool}]
end

@testset "vcat out of order" begin
Expand Down
7 changes: 3 additions & 4 deletions test/constructors.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
module TestConstructors
using Test, DataFrames
using DataFrames: Index, _columns, index
using DataFrames: columns
const = isequal

#
Expand Down Expand Up @@ -116,13 +115,13 @@ module TestConstructors
@testset "column types" begin
df = DataFrame(A = 1:3, B = 2:4, C = 3:5)
answer = [Array{Int,1}, Array{Int,1}, Array{Int,1}]
@test map(typeof, columns(df)) == answer
@test map(typeof, eachcol(df)) == answer
df[:D] = [4, 5, missing]
push!(answer, Vector{Union{Int, Missing}})
@test map(typeof, columns(df)) == answer
@test map(typeof, eachcol(df)) == answer
df[:E] = 'c'
push!(answer, Vector{Char})
@test map(typeof, columns(df)) == answer
@test map(typeof, eachcol(df)) == answer
end

@testset "categorical constructor" begin
Expand Down
25 changes: 12 additions & 13 deletions test/dataframe.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
module TestDataFrame
using Dates, DataFrames, LinearAlgebra, Statistics, Random, Test
using DataFrames: _columns
using DataFrames: columns
const = isequal
const = !isequal

Expand Down Expand Up @@ -73,20 +72,20 @@ module TestDataFrame
c = CategoricalArray{Union{Float64, Missing}}(undef, 2))
# https://github.com/JuliaData/Missings.jl/issues/66
# @test missingdf ≅ similar(df, 2)
@test typeof.(columns(similar(df, 2))) == typeof.(columns(missingdf))
@test typeof.(eachcol(similar(df, 2))) == typeof.(eachcol(missingdf))
@test size(similar(df, 2)) == size(missingdf)
end

@testset "Associative methods" begin
df = DataFrame(a=[1, 2], b=[3.0, 4.0])
@test haskey(df, :a)
@test !haskey(df, :c)
@test get(df, :a, -1) === columns(df)[1]
@test get(df, :a, -1) === eachcol(df)[1]
@test get(df, :c, -1) == -1
@test !isempty(df)

@test empty!(df) === df
@test isempty(columns(df))
@test isempty(eachcol(df))
@test isempty(df)
@test isempty(DataFrame(a=[], b=[]))

Expand Down Expand Up @@ -524,11 +523,11 @@ module TestDataFrame

df = DataFrame(A = Vector{Union{Int, Missing}}(1:3), B = Vector{Union{Int, Missing}}(4:6))
DRT = CategoricalArrays.DefaultRefType
@test all(c -> isa(c, Vector{Union{Int, Missing}}), columns(categorical!(deepcopy(df))))
@test all(c -> isa(c, Vector{Union{Int, Missing}}), eachcol(categorical!(deepcopy(df))))
@test all(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}},
columns(categorical!(deepcopy(df), [1,2])))
eachcol(categorical!(deepcopy(df), [1,2])))
@test all(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}},
columns(categorical!(deepcopy(df), [:A,:B])))
eachcol(categorical!(deepcopy(df), [:A,:B])))
@test findfirst(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}},
_columns(categorical!(deepcopy(df), [:A]))) == 1
@test findfirst(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}},
Expand Down Expand Up @@ -561,7 +560,7 @@ module TestDataFrame
Union{Int, Missing}[2, 6], Union{Int, Missing}[3, 7],
Union{Int, Missing}[4, 8]], [:id, :a, :b, :c, :d])
@test isa(udf[1], Vector{Int})
@test all(isa.(columns(udf)[2:end], Vector{Union{Int, Missing}}))
@test all(isa.(eachcol(udf)[2:end], Vector{Union{Int, Missing}}))
df = DataFrame([categorical(repeat(1:2, inner=4)),
categorical(repeat('a':'d', outer=2)), categorical(1:8)],
[:id, :variable, :value])
Expand All @@ -571,7 +570,7 @@ module TestDataFrame
Union{Int, Missing}[2, 6], Union{Int, Missing}[3, 7],
Union{Int, Missing}[4, 8]], [:id, :a, :b, :c, :d])
@test isa(udf[1], CategoricalVector{Int})
@test all(isa.(columns(udf)[2:end], CategoricalVector{Union{Int, Missing}}))
@test all(isa.(eachcol(udf)[2:end], CategoricalVector{Union{Int, Missing}}))
end

@testset "duplicate entries in unstack warnings" begin
Expand Down Expand Up @@ -718,14 +717,14 @@ module TestDataFrame
df = DataFrame([CategoricalArray(1:10),
CategoricalArray(string.('a':'j'))])
allowmissing!(df)
@test all(x->x <: CategoricalVector, typeof.(columns(df)))
@test all(x->x <: CategoricalVector, typeof.(eachcol(df)))
@test eltypes(df)[1] <: Union{CategoricalValue{Int}, Missing}
@test eltypes(df)[2] <: Union{CategoricalString, Missing}
df[1,2] = missing
@test_throws MissingException disallowmissing!(df)
df[1,2] = "a"
disallowmissing!(df)
@test all(x->x <: CategoricalVector, typeof.(columns(df)))
@test all(x->x <: CategoricalVector, typeof.(eachcol(df)))
@test eltypes(df)[1] <: CategoricalValue{Int}
@test eltypes(df)[2] <: CategoricalString
end
Expand All @@ -735,12 +734,12 @@ module TestDataFrame
b = CategoricalArray(["foo"]),
c = [0.0],
d = CategoricalArray([0.0]))
@test typeof.(columns(similar(df))) == typeof.(columns(df))
@test typeof.(eachcol(similar(df))) == typeof.(eachcol(df))
@test size(similar(df)) == size(df)

rows = size(df, 1) + 5
@test size(similar(df, rows)) == (rows, size(df, 2))
@test typeof.(columns(similar(df, rows))) == typeof.(columns(df))
@test typeof.(eachcol(similar(df, rows))) == typeof.(eachcol(df))

e = @test_throws ArgumentError similar(df, -1)
@test e.value.msg == "the number of rows must be positive"
Expand Down
3 changes: 1 addition & 2 deletions test/dataframerow.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
module TestDataFrameRow
using Test, DataFrames
using DataFrames: columns

df = DataFrame(a=Union{Int, Missing}[1, 2, 3, 1, 2, 2],
b=[2.0, missing, 1.2, 2.0, missing, missing],
Expand Down Expand Up @@ -46,7 +45,7 @@ module TestDataFrameRow
@test hash(DataFrameRow(df, 2)) != hash(DataFrameRow(df, 6))

# check that hashrows() function generates the same hashes as DataFrameRow
df_rowhashes, _ = DataFrames.hashrows(Tuple(columns(df)), false)
df_rowhashes, _ = DataFrames.hashrows(Tuple(eachcol(df)), false)
@test df_rowhashes == [hash(dr) for dr in eachrow(df)]

# test incompatible frames
Expand Down
Loading

0 comments on commit 3f89718

Please sign in to comment.