Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

make eachcol default to false #1613

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/abstractdataframe/abstractdataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ eltypes(df)
```

"""
eltypes(df::AbstractDataFrame) = eltype.(columns(df))
eltypes(df::AbstractDataFrame) = eltype.(eachcol(df))

Base.size(df::AbstractDataFrame) = (nrow(df), ncol(df))
function Base.size(df::AbstractDataFrame, i::Integer)
Expand Down Expand Up @@ -241,7 +241,7 @@ that is different than the number of rows present in `df`.
"""
function Base.similar(df::AbstractDataFrame, rows::Integer = size(df, 1))
rows < 0 && throw(ArgumentError("the number of rows must be positive"))
DataFrame(Any[similar(x, rows) for x in columns(df)], copy(index(df)))
DataFrame(Any[similar(x, rows) for x in eachcol(df)], copy(index(df)))
end

##############################################################################
Expand Down Expand Up @@ -432,7 +432,7 @@ function StatsBase.describe(df::AbstractDataFrame; stats::Union{Symbol,AbstractV
data[:variable] = names(df)

# An array of Dicts for summary statistics
column_stats_dicts = [get_stats(col) for col in columns(df)]
column_stats_dicts = [get_stats(col) for col in eachcol(df)]
for stat in stats
# for each statistic, loop through the columns array to find values
# letting the comprehension choose the appropriate type
Expand Down Expand Up @@ -794,7 +794,7 @@ function Base.convert(::Type{Matrix{T}}, df::AbstractDataFrame) where T
n, p = size(df)
res = Matrix{T}(undef, n, p)
idx = 1
for (name, col) in zip(names(df), columns(df))
for (name, col) in eachcol(df, true)
try
copyto!(res, idx, col)
catch err
Expand Down
34 changes: 11 additions & 23 deletions src/abstractdataframe/iteration.jl
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,12 @@ struct DataFrameColumns{T<:AbstractDataFrame, V} <: AbstractVector{V}
end

"""
eachcol(df::AbstractDataFrame, names::Bool=true)
eachcol(df::AbstractDataFrame, names::Bool=false)

Return a `DataFrameColumns` that iterates an `AbstractDataFrame` column by column.
If `names` is equal to `true` (currently the default, in the future the default
will be set to `false`) iteration returns a pair consisting of column name
If `names` is equal to `true` iteration returns a pair consisting of column name
and column vector.
If `names` is equal to `false` then column vectors are yielded.
If `names` is equal to `false` (the default) then column vectors are yielded.

**Examples**

Expand All @@ -70,17 +69,17 @@ julia> df = DataFrame(x=1:4, y=11:14)
│ 3 │ 3 │ 13 │
│ 4 │ 4 │ 14 │

julia> collect(eachcol(df))
2-element Array{AbstractArray{T,1} where T,1}:
[1, 2, 3, 4]
[11, 12, 13, 14]

julia> collect(eachcol(df, true))
2-element Array{Pair{Symbol,AbstractArray{T,1} where T},1}:
:x => [1, 2, 3, 4]
:y => [11, 12, 13, 14]

julia> collect(eachcol(df, false))
2-element Array{AbstractArray{T,1} where T,1}:
[1, 2, 3, 4]
[11, 12, 13, 14]

julia> sum.(eachcol(df, false))
julia> sum.(eachcol(df))
2-element Array{Int64,1}:
10
50
Expand All @@ -93,25 +92,14 @@ julia> map(eachcol(df, false)) do col
3
```
"""
@inline function eachcol(df::T, names::Bool) where T<: AbstractDataFrame
@inline function eachcol(df::T, names::Bool=false) where T<: AbstractDataFrame
if names
DataFrameColumns{T, Pair{Symbol, AbstractVector}}(df)
else
DataFrameColumns{T, AbstractVector}(df)
end
end

# TODO: remove this method after deprecation
# and add default argument value above
function eachcol(df::AbstractDataFrame)
Base.depwarn("In the future eachcol will have names argument set to false by default", :eachcol)
eachcol(df, true)
end

# TODO: remove this method after deprecation
# this is left to make sure we do not forget to properly fix columns calls
columns(df::AbstractDataFrame) = eachcol(df, false)

Base.size(itr::DataFrameColumns) = (size(itr.df, 2),)
Base.IndexStyle(::Type{<:DataFrameColumns}) = Base.IndexLinear()

Expand Down Expand Up @@ -178,7 +166,7 @@ julia> mapcols(x -> x.^2, df)
function mapcols(f::Union{Function,Type}, df::AbstractDataFrame)
# note: `f` must return a consistent length
res = DataFrame()
for (n, v) in eachcol(df)
for (n, v) in eachcol(df, true)
res[n] = f(v)
end
res
Expand Down
8 changes: 4 additions & 4 deletions src/abstractdataframe/join.jl
Original file line number Diff line number Diff line change
Expand Up @@ -89,13 +89,13 @@ function compose_joined_table(joiner::DataFrameJoiner, kind::Symbol,
cols = Vector{AbstractVector}(undef, ncleft + ncol(dfr_noon))
# inner and left joins preserve non-missingness of the left frame
_similar_left = kind == :inner || kind == :left ? similar : similar_missing
for (i, col) in enumerate(columns(joiner.dfl))
for (i, col) in enumerate(eachcol(joiner.dfl))
cols[i] = _similar_left(col, nrow)
copyto!(cols[i], view(col, all_orig_left_ixs))
end
# inner and right joins preserve non-missingness of the right frame
_similar_right = kind == :inner || kind == :right ? similar : similar_missing
for (i, col) in enumerate(columns(dfr_noon))
for (i, col) in enumerate(eachcol(dfr_noon))
cols[i+ncleft] = _similar_right(col, nrow)
copyto!(cols[i+ncleft], view(col, all_orig_right_ixs))
permute!(cols[i+ncleft], right_perm)
Expand Down Expand Up @@ -407,7 +407,7 @@ end
function crossjoin(df1::AbstractDataFrame, df2::AbstractDataFrame; makeunique::Bool=false)
r1, r2 = size(df1, 1), size(df2, 1)
colindex = merge(index(df1), index(df2), makeunique=makeunique)
cols = Any[[repeat(c, inner=r2) for c in columns(df1)];
[repeat(c, outer=r1) for c in columns(df2)]]
cols = Any[[repeat(c, inner=r2) for c in eachcol(df1)];
[repeat(c, outer=r1) for c in eachcol(df2)]]
DataFrame(cols, colindex)
end
2 changes: 1 addition & 1 deletion src/dataframerow/dataframerow.jl
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ function isequal_row(df1::AbstractDataFrame, r1::Int, df2::AbstractDataFrame, r2
elseif !(ncol(df1) == ncol(df2))
throw(ArgumentError("Rows of the tables that have different number of columns cannot be compared. Got $(ncol(df1)) and $(ncol(df2)) columns"))
end
@inbounds for (col1, col2) in zip(columns(df1), columns(df2))
@inbounds for (col1, col2) in zip(eachcol(df1), eachcol(df2))
isequal(col1[r1], col2[r2]) || return false
end
return true
Expand Down
4 changes: 2 additions & 2 deletions src/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import Base: @deprecate

import Base: keys, values, insert!
@deprecate keys(df::AbstractDataFrame) names(df)
@deprecate values(df::AbstractDataFrame) columns(df)
@deprecate values(df::AbstractDataFrame) eachcol(df)
@deprecate insert!(df::DataFrame, df2::AbstractDataFrame) (foreach(col -> df[col] = df2[col], names(df2)); df)

@deprecate pool categorical
Expand Down Expand Up @@ -1741,7 +1741,7 @@ end
function hashrows(df::SubDataFrame, skipmissing::Bool)
rhashes = zeros(UInt, nrow(df))
missings = fill(false, skipmissing ? nrow(df) : 0)
cols = columns(df)
cols = eachcol(df)
for i in 1:ncol(df)
hashrows_col!(rhashes, missings, view(parent(df)[i], rows(df)), i == 1)
end
Expand Down
2 changes: 1 addition & 1 deletion src/groupeddataframe/grouping.jl
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ function _combine(f::Any, gd::GroupedDataFrame)
fun = last(f)
elseif f isa Pair
df = gd.parent[collect(first(f))]
incols = NamedTuple{Tuple(names(df))}(columns(df))
incols = NamedTuple{Tuple(names(df))}(eachcol(df))
fun = last(f)
else
incols = nothing
Expand Down
11 changes: 5 additions & 6 deletions test/cat.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
module TestCat
using Test, Random, DataFrames
using DataFrames: columns
const ≅ = isequal

#
Expand Down Expand Up @@ -165,13 +164,13 @@ module TestCat
@testset "vcat mixed coltypes" begin
df = vcat(DataFrame([[1]], [:x]), DataFrame([[1.0]], [:x]))
@test df == DataFrame([[1.0, 1.0]], [:x])
@test typeof.(columns(df)) == [Vector{Float64}]
@test typeof.(eachcol(df)) == [Vector{Float64}]
df = vcat(DataFrame([[1]], [:x]), DataFrame([["1"]], [:x]))
@test df == DataFrame([[1, "1"]], [:x])
@test typeof.(columns(df)) == [Vector{Any}]
@test typeof.(eachcol(df)) == [Vector{Any}]
df = vcat(DataFrame([Union{Missing, Int}[1]], [:x]), DataFrame([[1]], [:x]))
@test df == DataFrame([[1, 1]], [:x])
@test typeof.(columns(df)) == [Vector{Union{Missing, Int}}]
@test typeof.(eachcol(df)) == [Vector{Union{Missing, Int}}]
df = vcat(DataFrame([CategoricalArray([1])], [:x]), DataFrame([[1]], [:x]))
@test df == DataFrame([[1, 1]], [:x])
@test df[:x] isa Vector{Int}
Expand All @@ -186,14 +185,14 @@ module TestCat
df = vcat(DataFrame([Union{Int, Missing}[1]], [:x]),
DataFrame([["1"]], [:x]))
@test df == DataFrame([[1, "1"]], [:x])
@test typeof.(columns(df)) == [Vector{Any}]
@test typeof.(eachcol(df)) == [Vector{Any}]
df = vcat(DataFrame([CategoricalArray([1])], [:x]),
DataFrame([CategoricalArray(["1"])], [:x]))
@test df == DataFrame([[1, "1"]], [:x])
@test df[:x] isa CategoricalVector{Any}
df = vcat(DataFrame([trues(1)], [:x]), DataFrame([[false]], [:x]))
@test df == DataFrame([[true, false]], [:x])
@test typeof.(columns(df)) == [Vector{Bool}]
@test typeof.(eachcol(df)) == [Vector{Bool}]
end

@testset "vcat out of order" begin
Expand Down
7 changes: 3 additions & 4 deletions test/constructors.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
module TestConstructors
using Test, DataFrames
using DataFrames: Index, _columns, index
using DataFrames: columns
const ≅ = isequal

#
Expand Down Expand Up @@ -116,13 +115,13 @@ module TestConstructors
@testset "column types" begin
df = DataFrame(A = 1:3, B = 2:4, C = 3:5)
answer = [Array{Int,1}, Array{Int,1}, Array{Int,1}]
@test map(typeof, columns(df)) == answer
@test map(typeof, eachcol(df)) == answer
df[:D] = [4, 5, missing]
push!(answer, Vector{Union{Int, Missing}})
@test map(typeof, columns(df)) == answer
@test map(typeof, eachcol(df)) == answer
df[:E] = 'c'
push!(answer, Vector{Char})
@test map(typeof, columns(df)) == answer
@test map(typeof, eachcol(df)) == answer
end

@testset "categorical constructor" begin
Expand Down
25 changes: 12 additions & 13 deletions test/dataframe.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
module TestDataFrame
using Dates, DataFrames, LinearAlgebra, Statistics, Random, Test
using DataFrames: _columns
using DataFrames: columns
const ≅ = isequal
const ≇ = !isequal

Expand Down Expand Up @@ -73,20 +72,20 @@ module TestDataFrame
c = CategoricalArray{Union{Float64, Missing}}(undef, 2))
# https://github.com/JuliaData/Missings.jl/issues/66
# @test missingdf ≅ similar(df, 2)
@test typeof.(columns(similar(df, 2))) == typeof.(columns(missingdf))
@test typeof.(eachcol(similar(df, 2))) == typeof.(eachcol(missingdf))
@test size(similar(df, 2)) == size(missingdf)
end

@testset "Associative methods" begin
df = DataFrame(a=[1, 2], b=[3.0, 4.0])
@test haskey(df, :a)
@test !haskey(df, :c)
@test get(df, :a, -1) === columns(df)[1]
@test get(df, :a, -1) === eachcol(df)[1]
@test get(df, :c, -1) == -1
@test !isempty(df)

@test empty!(df) === df
@test isempty(columns(df))
@test isempty(eachcol(df))
@test isempty(df)
@test isempty(DataFrame(a=[], b=[]))

Expand Down Expand Up @@ -524,11 +523,11 @@ module TestDataFrame

df = DataFrame(A = Vector{Union{Int, Missing}}(1:3), B = Vector{Union{Int, Missing}}(4:6))
DRT = CategoricalArrays.DefaultRefType
@test all(c -> isa(c, Vector{Union{Int, Missing}}), columns(categorical!(deepcopy(df))))
@test all(c -> isa(c, Vector{Union{Int, Missing}}), eachcol(categorical!(deepcopy(df))))
@test all(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}},
columns(categorical!(deepcopy(df), [1,2])))
eachcol(categorical!(deepcopy(df), [1,2])))
@test all(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}},
columns(categorical!(deepcopy(df), [:A,:B])))
eachcol(categorical!(deepcopy(df), [:A,:B])))
@test findfirst(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}},
_columns(categorical!(deepcopy(df), [:A]))) == 1
@test findfirst(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}},
Expand Down Expand Up @@ -561,7 +560,7 @@ module TestDataFrame
Union{Int, Missing}[2, 6], Union{Int, Missing}[3, 7],
Union{Int, Missing}[4, 8]], [:id, :a, :b, :c, :d])
@test isa(udf[1], Vector{Int})
@test all(isa.(columns(udf)[2:end], Vector{Union{Int, Missing}}))
@test all(isa.(eachcol(udf)[2:end], Vector{Union{Int, Missing}}))
df = DataFrame([categorical(repeat(1:2, inner=4)),
categorical(repeat('a':'d', outer=2)), categorical(1:8)],
[:id, :variable, :value])
Expand All @@ -571,7 +570,7 @@ module TestDataFrame
Union{Int, Missing}[2, 6], Union{Int, Missing}[3, 7],
Union{Int, Missing}[4, 8]], [:id, :a, :b, :c, :d])
@test isa(udf[1], CategoricalVector{Int})
@test all(isa.(columns(udf)[2:end], CategoricalVector{Union{Int, Missing}}))
@test all(isa.(eachcol(udf)[2:end], CategoricalVector{Union{Int, Missing}}))
end

@testset "duplicate entries in unstack warnings" begin
Expand Down Expand Up @@ -718,14 +717,14 @@ module TestDataFrame
df = DataFrame([CategoricalArray(1:10),
CategoricalArray(string.('a':'j'))])
allowmissing!(df)
@test all(x->x <: CategoricalVector, typeof.(columns(df)))
@test all(x->x <: CategoricalVector, typeof.(eachcol(df)))
@test eltypes(df)[1] <: Union{CategoricalValue{Int}, Missing}
@test eltypes(df)[2] <: Union{CategoricalString, Missing}
df[1,2] = missing
@test_throws MissingException disallowmissing!(df)
df[1,2] = "a"
disallowmissing!(df)
@test all(x->x <: CategoricalVector, typeof.(columns(df)))
@test all(x->x <: CategoricalVector, typeof.(eachcol(df)))
@test eltypes(df)[1] <: CategoricalValue{Int}
@test eltypes(df)[2] <: CategoricalString
end
Expand All @@ -735,12 +734,12 @@ module TestDataFrame
b = CategoricalArray(["foo"]),
c = [0.0],
d = CategoricalArray([0.0]))
@test typeof.(columns(similar(df))) == typeof.(columns(df))
@test typeof.(eachcol(similar(df))) == typeof.(eachcol(df))
@test size(similar(df)) == size(df)

rows = size(df, 1) + 5
@test size(similar(df, rows)) == (rows, size(df, 2))
@test typeof.(columns(similar(df, rows))) == typeof.(columns(df))
@test typeof.(eachcol(similar(df, rows))) == typeof.(eachcol(df))

e = @test_throws ArgumentError similar(df, -1)
@test e.value.msg == "the number of rows must be positive"
Expand Down
3 changes: 1 addition & 2 deletions test/dataframerow.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
module TestDataFrameRow
using Test, DataFrames
using DataFrames: columns

df = DataFrame(a=Union{Int, Missing}[1, 2, 3, 1, 2, 2],
b=[2.0, missing, 1.2, 2.0, missing, missing],
Expand Down Expand Up @@ -46,7 +45,7 @@ module TestDataFrameRow
@test hash(DataFrameRow(df, 2)) != hash(DataFrameRow(df, 6))

# check that hashrows() function generates the same hashes as DataFrameRow
df_rowhashes, _ = DataFrames.hashrows(Tuple(columns(df)), false)
df_rowhashes, _ = DataFrames.hashrows(Tuple(eachcol(df)), false)
@test df_rowhashes == [hash(dr) for dr in eachrow(df)]

# test incompatible frames
Expand Down
Loading