From 55533d1f0e13b6f2397180e9269ed3f7eaec2484 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Fri, 6 Nov 2020 22:55:42 +0100 Subject: [PATCH] [BREAKING] deprecate DataFrame constructors (#2464) --- NEWS.md | 2 + Project.toml | 2 +- src/dataframe/dataframe.jl | 319 ++++++++++++++++++++--------------- src/deprecated.jl | 40 +++++ src/other/tables.jl | 10 +- test/broadcasting.jl | 149 ++++++++-------- test/cat.jl | 14 +- test/constructors.jl | 243 ++++---------------------- test/data.jl | 14 +- test/dataframe.jl | 48 ++---- test/dataframerow.jl | 16 +- test/deprecated.jl | 160 +++++++++++++++++- test/grouping.jl | 15 +- test/indexing.jl | 76 ++++----- test/indexing_begin_tests.jl | 2 +- test/io.jl | 6 +- test/iteration.jl | 8 +- test/reshape.jl | 10 +- test/select.jl | 54 +++--- test/show.jl | 5 +- test/sort.jl | 6 +- test/subdataframe.jl | 6 +- test/tables.jl | 17 +- 23 files changed, 647 insertions(+), 575 deletions(-) diff --git a/NEWS.md b/NEWS.md index 9d0c5d3a29..c1a23764a9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -91,6 +91,8 @@ ## Deprecated * `DataFrame!` is now deprecated ([#2338](https://github.com/JuliaData/DataFrames.jl/pull/2338)) +* several in-standard `DataFrame` constructors are now deprecated + ([#2464](https://github.com/JuliaData/DataFrames.jl/pull/2464)) * all old deprecations now throw an error ([#2350](https://github.com/JuliaData/DataFrames.jl/pull/2350)) diff --git a/Project.toml b/Project.toml index c743385032..444581d0d7 100644 --- a/Project.toml +++ b/Project.toml @@ -44,5 +44,5 @@ Missings = "0.4.2" PooledArrays = "0.5" Reexport = "0.1, 0.2" SortingAlgorithms = "0.1, 0.2, 0.3" -Tables = "1" +Tables = "1.1" TableTraits = "0.4, 1" diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index a2676e487c..2ad9d8cba1 100644 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -8,105 +8,150 @@ particularly a Vector or CategoricalVector. # Constructors ```julia -DataFrame(columns::AbstractVector, names::AbstractVector{Symbol}; - makeunique::Bool=false, copycols::Bool=true) -DataFrame(columns::AbstractVector, names::AbstractVector{<:AbstractString}; - makeunique::Bool=false, copycols::Bool=true) -DataFrame(columns::NTuple{N,AbstractVector}, names::NTuple{N,Symbol}; - makeunique::Bool=false, copycols::Bool=true) -DataFrame(columns::NTuple{N,AbstractVector}, names::NTuple{N,<:AbstractString}; - makeunique::Bool=false, copycols::Bool=true) -DataFrame(columns::Matrix, names::AbstractVector{Symbol}; makeunique::Bool=false) -DataFrame(columns::Matrix, names::AbstractVector{<:AbstractString}; - makeunique::Bool=false) -DataFrame(kwargs...) -DataFrame(pairs::Pair{Symbol,<:Any}...; makeunique::Bool=false, copycols::Bool=true) -DataFrame(pairs::Pair{<:AbstractString,<:Any}...; makeunique::Bool=false, - copycols::Bool=true) +DataFrame(pairs::Pair...; makeunique::Bool=false, copycols::Bool=true) DataFrame(pairs::AbstractVector{<:Pair}; makeunique::Bool=false, copycols::Bool=true) -DataFrame(pairs::NTuple{N, Pair}; makeunique::Bool=false, copycols::Bool=true) where {N} -DataFrame() # an empty DataFrame -DataFrame(column_eltypes::AbstractVector, names::AbstractVector{Symbol}, - nrows::Integer=0; makeunique::Bool=false) -DataFrame(column_eltypes::AbstractVector, names::AbstractVector{<:AbstractString}, - nrows::Integer=0; makeunique::Bool=false) DataFrame(ds::AbstractDict; copycols::Bool=true) -DataFrame(table; makeunique::Bool=false, copycols::Bool=true) -DataFrame(::Union{DataFrame, SubDataFrame}; copycols::Bool=true) +DataFrame(kwargs..., copycols::Bool=true) + +DataFrame(columns::AbstractVecOrMat, names::Union{AbstractVector, Symbol}; + makeunique::Bool=false, copycols::Bool=true) + +DataFrame(table; copycols::Bool=true) +DataFrame(::DataFrameRow) DataFrame(::GroupedDataFrame; keepkeys::Bool=true) ``` -# Arguments -- `columns` : a Vector with each column as contents or a Matrix -- `names` : the column names +# Keyword arguments + +- `copycols` : whether vectors passed as columns should be copied; by default set + to `true` and the vectors are copied; if set to `false` then the constructor + will still copy the passed columns if it is not possible to construct a + `DataFrame` without materializing new columns. - `makeunique` : if `false` (the default), an error will be raised - if duplicates in `names` are found; if `true`, duplicate names will be suffixed - with `_i` (`i` starting at 1 for the first duplicate). -- `kwargs` : the key gives the column names, and the value is the - column contents; note that the `copycols` keyword argument indicates if - if vectors passed as columns should be copied so it is not possible to create - a column whose name is `:copycols` using this constructor -- `t` : elemental type of all columns -- `nrows`, `ncols` : number of rows and columns -- `column_eltypes` : element type of each column -- `categorical` : a vector of `Bool` indicating which columns should be converted - to `CategoricalVector` -- `ds` : `AbstractDict` of columns -- `table` : any type that implements the - [Tables.jl](https://github.com/JuliaData/Tables.jl) interface -- `copycols` : whether vectors passed as columns should be copied; if set - to `false` then the constructor will still copy the passed columns - if it is not possible to construct a `DataFrame` without materializing new columns. - -All columns in `columns` must be `AbstractVector`s and have the same length. An -exception are `DataFrame(kwargs...)`, `DataFrame(pairs::Pair...)`, -`DataFrame(pairs::AbstractVector{<:Pair})`, and `DataFrame(pairs::NTuple{N, Pair})` form -constructors which additionally allow a column to be of any other type that is -not an `AbstractArray`, in which case the passed value is automatically repeated -to fill a new vector of the appropriate length. As a particular rule values -stored in a `Ref` or a `0`-dimensional `AbstractArray` are unwrapped and treated -in the same way. - -Additionally `DataFrame` can be used to collect a [`GroupedDataFrame`](@ref) -into a `DataFrame`. In this case the order of rows in the result follows the order + +(note that not all constructors support these keyword arguments) + +# Details on behavior of different constructors + +It is allowed to pass a vector of `Pair`s, a list of `Pair`s as positional +arguments, or a list of keyword arguments. In this case each pair is considered +to represent a column name to column value mapping and column name must be a +`Symbol` or string. Alternatively a dictionary can be passed to the constructor +in which case its entries are considered to define the column name and column +value pairs. If the dictionary is a `Dict` then column names will be sorted in +the returned `DataFrame`. + +In all the constructors described above column value can be a vector which is +consumed as is or an object of any other type (except `AbstractArray`). In the +latter case the passed value is automatically repeated to fill a new vector of +the appropriate length. As a particular rule values stored in a `Ref` or a +`0`-dimensional `AbstractArray` are unwrapped and treated in the same way. + +It is also allowed to pass a vector of vectors or a matrix as as the first +argument. In this case the second argument must be +a vector of `Symbol`s or strings specifying column names, or the symbol `:auto` +to generate column names `x1`, `x2`, ... automatically. + +If a single positional argument is passed to a `DataFrame` constructor then it +is assumed to be of type that implements the +[Tables.jl](https://github.com/JuliaData/Tables.jl) interface using which the +returned `DataFrame` is materialized. + +Finally it is allowed to construct a `DataFrame` from a `DataFrameRow` or a +`GroupedDataFrame`. In the latter case the `keepkeys` keyword argument specifies +whether the resulting `DataFrame` should contain the grouping columns of the +passed `GroupedDataFrame` and the order of rows in the result follows the order of groups in the `GroupedDataFrame` passed. # Notes -The `DataFrame` constructor by default copies all columns vectors passed to it. -Pass `copycols=false` to reuse vectors without copying them - -If a column is passed to a `DataFrame` constructor or is assigned as a whole -using `setindex!` then its reference is stored in the `DataFrame`. An exception -to this rule is assignment of an `AbstractRange` as a column, in which case the -range is collected to a `Vector`. -Because column types can vary, a `DataFrame` is not type stable. For -performance-critical code, do not index into a `DataFrame` inside of loops. +The `DataFrame` constructor by default copies all columns vectors passed to it. +Pass the `copycols=false` keyword argument (where supported) to reuse vectors without +copying them. + +By default an error will be raised if duplicates in column names are found. Pass +`makeunique=true` keyword argument (where supported) to accept duplicate names, +in which case they will be suffixed with `_i` (`i` starting at 1 for the first +duplicate). + +If an `AbstractRange` is passed to a `DataFrame` constructor as a column it is +always collected to a `Vector` (even if `copycols=false`). As a general rule +`AbstractRange` values are always materialized to a `Vector` by all functions in +DataFrames.jl before being stored in a `DataFrame`. + +The `DataFrame` type is designed to allow column types to vary and to be +dynamically changed also after it is constructed. Therefore `DataFrame`s are not +type stable. For performance-critical code that requires type-stability either +use the functionality provided by `select`/`transform`/`combine` functions, use +`Tables.columntable` and `Tables.namedtupleiterator` functions, use barrier +functions, or provide type assertions to the variables that hold columns +extracted from a `DataFrame`. # Examples ```julia -df = DataFrame() -v = ["x","y","z"][rand(1:3, 10)] -df1 = DataFrame(Any[collect(1:10), v, rand(10)], [:A, :B, :C]) -df2 = DataFrame(A = 1:10, B = v, C = rand(10)) -summary(df1) -describe(df2) -first(df1, 10) -df1.B -df2[!, :C] -df1[:, :A] -df1[1:4, 1:2] -df1[Not(1:4), Not(1:2)] -df1[1:2, [:A,:C]] -df1[1:2, r"[AC]"] -df1[:, [:A,:C]] -df1[:, [1,3]] -df1[1:4, :] -df1[1:4, :C] -df1[1:4, :C] = 40. * df1[1:4, :C] -[df1; df2] # vcat -[df1 df2] # hcat -size(df1) +julia> DataFrame((a=[1,2], b=[3,4])) # Tables.jl table constructor +2×2 DataFrame +│ Row │ a │ b │ +│ │ Int64 │ Int64 │ +├─────┼───────┼───────┤ +│ 1 │ 1 │ 3 │ +│ 2 │ 2 │ 4 │ + +julia> DataFrame([(a=1, b=0), (a=2, b=0)]) # Tables.jl table constructor +2×2 DataFrame +│ Row │ a │ b │ +│ │ Int64 │ Int64 │ +├─────┼───────┼───────┤ +│ 1 │ 1 │ 0 │ +│ 2 │ 2 │ 0 │ + +julia> DataFrame("a" => 1:2, "b" => 0) # Pair constructor +2×2 DataFrame +│ Row │ a │ b │ +│ │ Int64 │ Int64 │ +├─────┼───────┼───────┤ +│ 1 │ 1 │ 0 │ +│ 2 │ 2 │ 0 │ + +julia> DataFrame([:a => 1:2, :b => 0]) # vector of Pairs constructor +2×2 DataFrame +│ Row │ a │ b │ +│ │ Int64 │ Int64 │ +├─────┼───────┼───────┤ +│ 1 │ 1 │ 0 │ +│ 2 │ 2 │ 0 │ + +julia> DataFrame(Dict(:a => 1:2, :b => 0)) # dictionary constructor +2×2 DataFrame +│ Row │ a │ b │ +│ │ Int64 │ Int64 │ +├─────┼───────┼───────┤ +│ 1 │ 1 │ 0 │ +│ 2 │ 2 │ 0 │ + +julia> DataFrame(a=1:2, b=0) # keyword argument constructor +2×2 DataFrame +│ Row │ a │ b │ +│ │ Int64 │ Int64 │ +├─────┼───────┼───────┤ +│ 1 │ 1 │ 0 │ +│ 2 │ 2 │ 0 │ + +julia> DataFrame([[1, 2], [0, 0]], [:a, :b]) # vector of vectors constructor +2×2 DataFrame +│ Row │ a │ b │ +│ │ Int64 │ Int64 │ +├─────┼───────┼───────┤ +│ 1 │ 1 │ 0 │ +│ 2 │ 2 │ 0 │ + +julia> DataFrame([1 0; 2 0], :auto) # matrix constructor +2×2 DataFrame +│ Row │ x1 │ x2 │ +│ │ Int64 │ Int64 │ +├─────┼───────┼───────┤ +│ 1 │ 1 │ 0 │ +│ 2 │ 2 │ 0 │ ``` """ struct DataFrame <: AbstractDataFrame @@ -182,24 +227,36 @@ function DataFrame(pairs::Pair{<:AbstractString,<:Any}...; makeunique::Bool=fals copycols=copycols) end -# these two are needed as a workaround Tables.jl dispatch -DataFrame(pairs::AbstractVector{<:Pair}; makeunique::Bool=false, - copycols::Bool=true) = - DataFrame(pairs..., makeunique=makeunique, copycols=copycols) - -DataFrame(pairs::NTuple{N, Pair}; makeunique::Bool=false, - copycols::Bool=true) where {N} = - DataFrame(pairs..., makeunique=makeunique, copycols=copycols) +# this is needed as a workaround for Tables.jl dispatch +function DataFrame(pairs::AbstractVector{<:Pair}; makeunique::Bool=false, + copycols::Bool=true) + if isempty(pairs) + return DataFrame() + else + if !(all(((k,v),) -> k isa Symbol, pairs) || all(((k,v),) -> k isa AbstractString, pairs)) + throw(ArgumentError("All column names must be either Symbols or strings (mixing is not allowed)")) + end + colnames = [Symbol(k) for (k,v) in pairs] + columns = Any[v for (k,v) in pairs] + return DataFrame(columns, Index(colnames, makeunique=makeunique), + copycols=copycols) + end +end function DataFrame(d::AbstractDict; copycols::Bool=true) - if isa(d, Dict) - colnames = sort!(collect(keys(d))) + if all(k -> k isa Symbol, keys(d)) + colnames = collect(Symbol, keys(d)) + elseif all(k -> k isa AbstractString, keys(d)) + colnames = [Symbol(k) for k in keys(d)] else - colnames = keys(d) + throw(ArgumentError("All column names must be either Symbols or strings (mixing is not allowed)")) end - colindex = Index([Symbol(k) for k in colnames]) - columns = Any[d[c] for c in colnames] - DataFrame(columns, colindex, copycols=copycols) + + colindex = Index(colnames) + columns = Any[v for v in values(d)] + df = DataFrame(columns, colindex, copycols=copycols) + d isa Dict && select!(df, sort!(propertynames(df))) + return df end function DataFrame(; kwargs...) @@ -210,12 +267,15 @@ function DataFrame(; kwargs...) columns = Any[] copycols = true for (kw, val) in kwargs - if kw == :copycols + if kw === :copycols if val isa Bool copycols = val else throw(ArgumentError("the `copycols` keyword argument must be Boolean")) end + elseif kw === :makeunique + throw(ArgumentError("the `makeunique` keyword argument is not allowed" * + " in DataFrame(; kwargs...) constructor")) else push!(cnames, kw) push!(columns, val) @@ -239,34 +299,27 @@ DataFrame(columns::AbstractVector, cnames::AbstractVector{<:AbstractString}; makeunique::Bool=false, copycols::Bool=true) = DataFrame(columns, Symbol.(cnames), makeunique=makeunique, copycols=copycols) -DataFrame(columns::AbstractVector{<:AbstractVector}, - cnames::AbstractVector{Symbol}=gennames(length(columns)); +DataFrame(columns::AbstractVector{<:AbstractVector}, cnames::AbstractVector{Symbol}; makeunique::Bool=false, copycols::Bool=true)::DataFrame = DataFrame(collect(AbstractVector, columns), Index(convert(Vector{Symbol}, cnames), makeunique=makeunique), copycols=copycols) -DataFrame(columns::AbstractVector{<:AbstractVector}, - cnames::AbstractVector{<:AbstractString}; +DataFrame(columns::AbstractVector{<:AbstractVector}, cnames::AbstractVector{<:AbstractString}; makeunique::Bool=false, copycols::Bool=true) = DataFrame(columns, Symbol.(cnames); makeunique=makeunique, copycols=copycols) -DataFrame(columns::NTuple{N, AbstractVector}, cnames::NTuple{N, Symbol}; - makeunique::Bool=false, copycols::Bool=true) where {N} = - DataFrame(collect(AbstractVector, columns), collect(Symbol, cnames), - makeunique=makeunique, copycols=copycols) - -DataFrame(columns::NTuple{N, AbstractVector}, cnames::NTuple{N, AbstractString}; - makeunique::Bool=false, copycols::Bool=true) where {N} = - DataFrame(columns, Symbol.(cnames); makeunique=makeunique, copycols=copycols) - -DataFrame(columns::NTuple{N, AbstractVector}; copycols::Bool=true) where {N} = - DataFrame(collect(AbstractVector, columns), gennames(length(columns)), - copycols=copycols) +function DataFrame(columns::AbstractVector, cnames::Symbol; copycols::Bool=true) + if cnames !== :auto + throw(ArgumentError("if the first positional argument to DataFrame " * + "constructor is a vector of vectors and the second " * + "positional argument is passed then the second " * + "argument must be a vector of column names or :auto")) + end + return DataFrame(columns, gennames(length(columns)), copycols=copycols) +end -DataFrame(columns::AbstractMatrix, - cnames::AbstractVector{Symbol} = gennames(size(columns, 2)); - makeunique::Bool=false) = +DataFrame(columns::AbstractMatrix, cnames::AbstractVector{Symbol}; makeunique::Bool=false) = DataFrame(AbstractVector[columns[:, i] for i in 1:size(columns, 2)], cnames, makeunique=makeunique, copycols=false) @@ -274,20 +327,16 @@ DataFrame(columns::AbstractMatrix, cnames::AbstractVector{<:AbstractString}; makeunique::Bool=false) = DataFrame(columns, Symbol.(cnames); makeunique=makeunique) -function DataFrame(column_eltypes::AbstractVector{T}, cnames::AbstractVector{Symbol}, - nrows::Integer=0; makeunique::Bool=false)::DataFrame where T<:Type - columns = AbstractVector[elty >: Missing ? - fill!(Tables.allocatecolumn(elty, nrows), missing) : - Tables.allocatecolumn(elty, nrows) - for elty in column_eltypes] - return DataFrame(columns, Index(convert(Vector{Symbol}, cnames), - makeunique=makeunique), copycols=false) +function DataFrame(columns::AbstractMatrix, cnames::Symbol) + if cnames !== :auto + throw(ArgumentError("if the first positional argument to DataFrame " * + "constructor is a matrix and a second " * + "positional argument is passed then the second " * + "argument must be a vector of column names or :auto")) + end + return DataFrame(columns, gennames(size(columns, 2)), makeunique=false) end -DataFrame(column_eltypes::AbstractVector{<:Type}, - cnames::AbstractVector{<:AbstractString}, - nrows::Integer=0; makeunique::Bool=false) = - DataFrame(column_eltypes, Symbol.(cnames), nrows; makeunique=makeunique) ############################################################################## ## @@ -881,10 +930,10 @@ hcat!(df1::DataFrame, df2::DataFrame; makeunique=makeunique, copycols=copycols)::DataFrame hcat!(df::DataFrame, x::AbstractVector; makeunique::Bool=false, copycols::Bool=true) = - hcat!(df, DataFrame(AbstractVector[x], copycols=copycols), + hcat!(df, DataFrame(AbstractVector[x], [:x1], copycols=copycols), makeunique=makeunique, copycols=copycols) hcat!(x::AbstractVector, df::DataFrame; makeunique::Bool=false, copycols::Bool=true) = - hcat!(DataFrame(AbstractVector[x], copycols=copycols), df, + hcat!(DataFrame(AbstractVector[x], [:x1], copycols=copycols), df, makeunique=makeunique, copycols=copycols) hcat!(x, df::DataFrame; makeunique::Bool=false, copycols::Bool=true) = throw(ArgumentError("x must be AbstractVector or AbstractDataFrame")) @@ -1177,8 +1226,6 @@ function Base.append!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol=:sete return df1 end -Base.convert(::Type{DataFrame}, A::AbstractMatrix) = DataFrame(A) - Base.convert(::Type{DataFrame}, d::AbstractDict) = DataFrame(d, copycols=false) function Base.push!(df::DataFrame, row::Union{AbstractDict, NamedTuple}; diff --git a/src/deprecated.jl b/src/deprecated.jl index e491bdcfdd..ea9a9c323e 100644 --- a/src/deprecated.jl +++ b/src/deprecated.jl @@ -105,3 +105,43 @@ function categorical!(df::DataFrame, cols::Union{Type, Nothing}=nothing; end return transform!(df, names(df, cols) .=> (x -> categorical(x, compress=compress)), renamecols=false) end + +@deprecate DataFrame(pairs::NTuple{N, Pair}; makeunique::Bool=false, + copycols::Bool=true) where {N} DataFrame(pairs..., makeunique=makeunique, copycols=copycols) +@deprecate DataFrame(columns::NTuple{N, AbstractVector}, cnames::NTuple{N, Symbol}; makeunique::Bool=false, + copycols::Bool=true) where {N} DataFrame(collect(columns), collect(cnames); + makeunique=makeunique, copycols=copycols) +@deprecate DataFrame(columns::NTuple{N, AbstractVector}, cnames::NTuple{N, AbstractString}; makeunique::Bool=false, + copycols::Bool=true) where {N} DataFrame(collect(columns), [Symbol(c) for c in cnames]; + makeunique=makeunique, copycols=copycols) +@deprecate DataFrame(columns::NTuple{N, AbstractVector}; + copycols::Bool=true) where {N} DataFrame(collect(columns), + Symbol.(:x, 1:length(columns)), copycols=copycols) + +# this deprecation is very important, becuase without it users will +# get strange results with old code as described in https://github.com/JuliaData/Tables.jl/issues/208 +@deprecate DataFrame(columns::AbstractVector{<:AbstractVector}; makeunique::Bool=false, + copycols::Bool=true) DataFrame(columns, :auto, copycols=copycols) + +@deprecate DataFrame(columns::AbstractMatrix) DataFrame(columns, :auto) + +function DataFrame(column_eltypes::AbstractVector{T}, cnames::AbstractVector{Symbol}, + nrows::Integer=0; makeunique::Bool=false)::DataFrame where T<:Type + Base.depwarn("`DataFrame` constructor with passed eltypes is deprecated. " * + "Pass explicitly created columns to a `DataFrame` constructor instead.", + :DataFrame) + columns = AbstractVector[elty >: Missing ? + fill!(Tables.allocatecolumn(elty, nrows), missing) : + Tables.allocatecolumn(elty, nrows) + for elty in column_eltypes] + return DataFrame(columns, Index(convert(Vector{Symbol}, cnames), + makeunique=makeunique), copycols=false) +end + +DataFrame(column_eltypes::AbstractVector{<:Type}, + cnames::AbstractVector{<:AbstractString}, + nrows::Integer=0; makeunique::Bool=false) = + DataFrame(column_eltypes, Symbol.(cnames), nrows; makeunique=makeunique) + +import Base: convert +@deprecate convert(::Type{DataFrame}, A::AbstractMatrix) DataFrame(Tables.table(A, header=Symbol.(:x, axes(A, 2)))) diff --git a/src/other/tables.jl b/src/other/tables.jl index c2c61f46ee..8a406b814e 100644 --- a/src/other/tables.jl +++ b/src/other/tables.jl @@ -43,11 +43,11 @@ fromcolumns(x, names; copycols::Bool=true) = copycols=copycols) function DataFrame(x::T; copycols::Bool=true) where {T} - if !Tables.istable(x) - if x isa AbstractVector && all(col -> isa(col, AbstractVector), x) - return DataFrame(Vector{AbstractVector}(x), copycols=copycols) - elseif (x isa AbstractVector || x isa Tuple) && - all(v -> v isa Pair{Symbol, <:AbstractVector}, x) + if !Tables.istable(x) && x isa AbstractVector && !isempty(x) + # here we handle eltypes not specific enough to be dispatched + # to other DataFrames constructors taking vector of `Pair`s + if all(v -> v isa Pair{Symbol, <:AbstractVector}, x) || + all(v -> v isa Pair{<:AbstractString, <:AbstractVector}, x) return DataFrame(AbstractVector[last(v) for v in x], [first(v) for v in x], copycols=copycols) end diff --git a/test/broadcasting.jl b/test/broadcasting.jl index 5204b86779..35c45b0047 100644 --- a/test/broadcasting.jl +++ b/test/broadcasting.jl @@ -4,10 +4,10 @@ using Test, DataFrames, PooledArrays, Random, CategoricalArrays const ≅ = isequal -refdf = DataFrame(reshape(1.5:15.5, (3,5))) +refdf = DataFrame(reshape(1.5:15.5, (3,5)), :auto) @testset "CartesianIndex" begin - df = DataFrame(rand(2, 3)) + df = DataFrame(rand(2, 3), :auto) for i in axes(df, 1), j in axes(df, 2) @test df[i,j] == df[CartesianIndex(i,j)] r = rand() @@ -48,9 +48,9 @@ end df1 = copy(refdf) df2 = view(copy(refdf), :, :) @test (df1 .+ df2) ./ 2 == refdf - @test (df1 .- df2) == DataFrame(zeros(size(refdf))) + @test (df1 .- df2) == DataFrame(zeros(size(refdf)), names(refdf)) @test (df1 .* df2) == refdf .^ 2 - @test (df1 ./ df2) == DataFrame(ones(size(refdf))) + @test (df1 ./ df2) == DataFrame(ones(size(refdf)), names(refdf)) end @testset "broadcasting of AbstractDataFrame objects thrown exceptions" begin @@ -995,32 +995,32 @@ end end @testset "tuple broadcasting" begin - X = DataFrame(zeros(2, 3)) + X = DataFrame(zeros(2, 3), :auto) X .= (1, 2) - @test X == DataFrame([1 1 1; 2 2 2]) + @test X == DataFrame([1 1 1; 2 2 2], :auto) - X = DataFrame(zeros(2, 3)) + X = DataFrame(zeros(2, 3), :auto) X .= (1, 2) .+ 10 .- X - @test X == DataFrame([11 11 11; 12 12 12]) + @test X == DataFrame([11 11 11; 12 12 12], :auto) - X = DataFrame(zeros(2, 3)) + X = DataFrame(zeros(2, 3), :auto) X .+= (1, 2) .+ 10 - @test X == DataFrame([11 11 11; 12 12 12]) + @test X == DataFrame([11 11 11; 12 12 12], :auto) - df = DataFrame(rand(2, 3)) - @test floor.(Int, df ./ (1,)) == DataFrame(zeros(Int, 2, 3)) + df = DataFrame(rand(2, 3), :auto) + @test floor.(Int, df ./ (1,)) == DataFrame(zeros(Int, 2, 3), :auto) df .= floor.(Int, df ./ (1,)) - @test df == DataFrame(zeros(2, 3)) + @test df == DataFrame(zeros(2, 3), :auto) - df = DataFrame(rand(2, 3)) + df = DataFrame(rand(2, 3), :auto) @test_throws InexactError convert.(Int, df) df2 = convert.(Int, floor.(df)) - @test df2 == DataFrame(zeros(Int, 2, 3)) + @test df2 == DataFrame(zeros(Int, 2, 3), :auto) @test eltype.(eachcol(df2)) == [Int, Int, Int] end @testset "scalar on assignment side" begin - df = DataFrame(rand(2, 3)) + df = DataFrame(rand(2, 3), :auto) @test_throws MethodError df[1, 1] .= df[1, 1] .- df[1, 1] df[1, 1:1] .= df[1, 1] .- df[1, 1] @test df[1, 1] == 0 @@ -1030,17 +1030,17 @@ end end @testset "nothing test" begin - X = DataFrame(Any[1 2; 3 4]) + X = DataFrame(Any[1 2; 3 4], :auto) X .= nothing - @test (X .== nothing) == DataFrame(trues(2, 2)) + @test (X .== nothing) == DataFrame(trues(2, 2), :auto) - X = DataFrame([1 2; 3 4]) + X = DataFrame([1 2; 3 4], :auto) @test_throws MethodError X .= nothing - @test X == DataFrame([1 2; 3 4]) + @test X == DataFrame([1 2; 3 4], :auto) - X = DataFrame([1 2; 3 4]) + X = DataFrame([1 2; 3 4], :auto) foreach(i -> X[!, i] .= nothing, axes(X, 2)) - @test (X .== nothing) == DataFrame(trues(2, 2)) + @test (X .== nothing) == DataFrame(trues(2, 2), :auto) end @testset "aliasing test" begin @@ -1071,7 +1071,7 @@ end Random.seed!(1234) for i in 1:10 - df1 = DataFrame(rand(100, 100)) + df1 = DataFrame(rand(100, 100), :auto) df2 = copy(df1) for i in 1:100 df2[!, rand(1:100)] = df1[!, i] @@ -1083,7 +1083,7 @@ end end for i in 1:10 - df1 = DataFrame(rand(100, 100)) + df1 = DataFrame(rand(100, 100), :auto) df2 = copy(df1) for i in 1:100 df2[!, rand(1:100)] = df1[!, i] @@ -1095,7 +1095,7 @@ end end for i in 1:10 - df1 = DataFrame(rand(100, 100)) + df1 = DataFrame(rand(100, 100), :auto) df2 = copy(df1) for i in 1:100 df2[!, rand(1:100)] = df1[!, i] @@ -1107,7 +1107,7 @@ end end for i in 1:10 - df1 = DataFrame(rand(100, 100)) + df1 = DataFrame(rand(100, 100), :auto) df2 = copy(df1) df3 = copy(df1) for i in 1:100 @@ -1116,7 +1116,7 @@ end end df6 = copy(df2) df7 = copy(df3) - df4 = DataFrame(sin.(df1[1,1] .+ copy(df1[!, 1]) .+ Matrix(df2) ./ Matrix(df3))) + df4 = DataFrame(sin.(df1[1,1] .+ copy(df1[!, 1]) .+ Matrix(df2) ./ Matrix(df3)), names(df3)) df5 = sin.(view(df1,1,1) .+ df1[!, 1] .+ df2 ./ df3) df1 .= sin.(view(df1,1,1) .+ df1[!, 1] .+ df2 ./ df3) @test df1 == df4 == df5 @@ -1125,7 +1125,7 @@ end end for i in 1:10 - df1 = DataFrame(rand(100, 100)) + df1 = DataFrame(rand(100, 100), :auto) df2 = copy(df1) df3 = copy(df1) for i in 1:100 @@ -1134,7 +1134,7 @@ end end df6 = copy(df2) df7 = copy(df3) - df4 = DataFrame(sin.(df1[1,1] .+ copy(df1[!, 1]) .+ Matrix(df2) ./ Matrix(df3))) + df4 = DataFrame(sin.(df1[1,1] .+ copy(df1[!, 1]) .+ Matrix(df2) ./ Matrix(df3)), names(df3)) df5 = sin.(view(df1,1,1) .+ df1[!, 1] .+ view(df2, :, :) ./ df3) df1 .= sin.(view(df1[!, 1],1) .+ view(df1[!, 1], :) .+ df2 ./ view(df3, :, :)) @test df1 == df4 == df5 @@ -1143,7 +1143,7 @@ end end for i in 1:10 - df1 = DataFrame(rand(100, 100)) + df1 = DataFrame(rand(100, 100), :auto) df2 = copy(df1) df3 = copy(df1) for i in 1:100 @@ -1152,7 +1152,7 @@ end end df6 = copy(df2) df7 = copy(df3) - df4 = DataFrame(sin.(df1[1,1] .+ copy(df1[!, 1]) .+ Matrix(df2) ./ Matrix(df3))) + df4 = DataFrame(sin.(df1[1,1] .+ copy(df1[!, 1]) .+ Matrix(df2) ./ Matrix(df3)), names(df3)) df5 = sin.(view(df1,1,1) .+ df1[!, 1] .+ view(df2, :, :) ./ df3) view(df1, :, :) .= sin.(view(df1[!, 1],1) .+ view(df1[!, 1], :) .+ df2 ./ view(df3, :, :)) @test df1 == df4 == df5 @@ -1162,47 +1162,46 @@ end end @testset "@. test" begin - df = DataFrame(rand(2, 3)) + df = DataFrame(rand(2, 3), :auto) sdf = view(df, 1:1, :) dfm = Matrix(df) sdfm = Matrix(sdf) r1 = @. (df + sdf + 5) / sdf - r2 = @. (df + sdf + 5) / sdf - @test r1 == DataFrame(r2) + @test r1 isa DataFrame @. df = sin(sdf / (df + 1)) @. dfm = sin(sdfm / (dfm + 1)) - @test df == DataFrame(dfm) + @test df == DataFrame(dfm, names(df)) end @testset "test common cases" begin m = rand(1000, 10) - df = DataFrame(m) - @test df .+ 1 == DataFrame(m .+ 1) - @test df .+ transpose(1:10) == DataFrame(m .+ transpose(1:10)) - @test df .+ (1:1000) == DataFrame(m .+ (1:1000)) - @test df .+ m == DataFrame(m .+ m) - @test m .+ df == DataFrame(m .+ m) - @test df .+ df == DataFrame(m .+ m) + df = DataFrame(m, :auto) + @test df .+ 1 == DataFrame(m .+ 1, names(df)) + @test df .+ transpose(1:10) == DataFrame(m .+ transpose(1:10), names(df)) + @test df .+ (1:1000) == DataFrame(m .+ (1:1000), names(df)) + @test df .+ m == DataFrame(m .+ m, names(df)) + @test m .+ df == DataFrame(m .+ m, names(df)) + @test df .+ df == DataFrame(m .+ m, names(df)) df .+= 1 m .+= 1 - @test df == DataFrame(m) + @test df == DataFrame(m, names(df)) df .+= transpose(1:10) m .+= transpose(1:10) - @test df == DataFrame(m) + @test df == DataFrame(m, names(df)) df .+= (1:1000) m .+= (1:1000) - @test df == DataFrame(m) + @test df == DataFrame(m, names(df)) df .+= df m .+= m - @test df == DataFrame(m) + @test df == DataFrame(m, names(df)) df2 = copy(df) m2 = copy(m) df .+= df .+ df2 .+ m2 .+ 1 m .+= m .+ df2 .+ m2 .+ 1 - @test df == DataFrame(m) + @test df == DataFrame(m, names(df)) end @testset "data frame only on left hand side broadcasting assignment" begin @@ -1258,7 +1257,7 @@ end @testset "broadcasting with 3-dimensional object" begin y = zeros(4,3,2) - df = DataFrame(ones(4,3)) + df = DataFrame(ones(4,3), :auto) @test_throws DimensionMismatch df .+ y @test_throws DimensionMismatch y .+ df @test_throws DimensionMismatch df .+= y @@ -1659,51 +1658,51 @@ end ["x1", "x2"], Between("x1", "x2")] df = DataFrame(x1=1:3, x2=4:6) df[!, selector] .= "a" - @test df == DataFrame(fill("a", 3, 2)) + @test df == DataFrame(fill("a", 3, 2), :auto) @test df.x1 !== df.x2 df = DataFrame(x1=1:3, x2=4:6) df[!, selector] .= Ref((a=1,b=2)) - @test df == DataFrame(fill((a=1, b=2), 3, 2)) + @test df == DataFrame(fill((a=1, b=2), 3, 2), :auto) @test df.x1 !== df.x2 df = DataFrame(x1=1:3, x2=4:6) df[!, selector] .= ["a" "b"] @test df == DataFrame(["a" "b" "a" "b" - "a" "b"]) + "a" "b"], :auto) @test df.x1 !== df.x2 df = DataFrame(x1=1:3, x2=4:6) df[!, selector] .= ["a", "b", "c"] @test df == DataFrame(["a" "a" "b" "b" - "c" "c"]) + "c" "c"], :auto) @test df.x1 !== df.x2 df = DataFrame(x1=1:3, x2=4:6) df[!, selector] .= categorical(["a"]) @test df == DataFrame(["a" "a" "a" "a" - "a" "a"]) + "a" "a"], :auto) @test df.x1 isa CategoricalVector @test df.x2 isa CategoricalVector @test df.x1 !== df.x2 df = DataFrame(x1=1:3, x2=4:6) - df[!, selector] .= DataFrame(["a" "b"]) + df[!, selector] .= DataFrame(["a" "b"], :auto) @test df == DataFrame(["a" "b" "a" "b" - "a" "b"]) + "a" "b"], :auto) @test df.x1 !== df.x2 df = DataFrame(x1=1:3, x2=4:6) df[!, selector] .= DataFrame(["a" "d" "b" "e" - "c" "f"]) + "c" "f"], :auto) @test df == DataFrame(["a" "d" "b" "e" - "c" "f"]) + "c" "f"], :auto) @test df.x1 !== df.x2 df = DataFrame(x1=1:3, x2=4:6) @@ -1712,19 +1711,19 @@ end "c" "f"] @test df == DataFrame(["a" "d" "b" "e" - "c" "f"]) + "c" "f"], :auto) @test df.x1 !== df.x2 df = DataFrame(x1=1:3, x2=4:6, x3=1) df[!, selector] .= "a" @test df == DataFrame(["a" "a" 1 "a" "a" 1 - "a" "a" 1]) + "a" "a" 1], :auto) @test df.x1 !== df.x2 df = DataFrame(x1=1:3, x2=4:6, x3=1) df[!, selector] .= Ref((a=1,b=2)) - @test df[:, 1:2] == DataFrame(fill((a=1, b=2), 3, 2)) + @test df[:, 1:2] == DataFrame(fill((a=1, b=2), 3, 2), :auto) @test df[:, 3] == [1, 1, 1] @test df.x1 !== df.x2 @@ -1732,39 +1731,39 @@ end df[!, selector] .= ["a" "b"] @test df == DataFrame(["a" "b" 1 "a" "b" 1 - "a" "b" 1]) + "a" "b" 1], :auto) @test df.x1 !== df.x2 df = DataFrame(x1=1:3, x2=4:6, x3=1) df[!, selector] .= ["a", "b", "c"] @test df == DataFrame(["a" "a" 1 "b" "b" 1 - "c" "c" 1]) + "c" "c" 1], :auto) @test df.x1 !== df.x2 df = DataFrame(x1=1:3, x2=4:6, x3=1) df[!, selector] .= categorical(["a"]) @test df == DataFrame(["a" "a" 1 "a" "a" 1 - "a" "a" 1]) + "a" "a" 1], :auto) @test df.x1 isa CategoricalVector @test df.x2 isa CategoricalVector @test df.x1 !== df.x2 df = DataFrame(x1=1:3, x2=4:6, x3=1) - df[!, selector] .= DataFrame(["a" "b"]) + df[!, selector] .= DataFrame(["a" "b"], :auto) @test df == DataFrame(["a" "b" 1 "a" "b" 1 - "a" "b" 1]) + "a" "b" 1], :auto) @test df.x1 !== df.x2 df = DataFrame(x1=1:3, x2=4:6, x3=1) df[!, selector] .= DataFrame(["a" "d" "b" "e" - "c" "f"]) + "c" "f"], :auto) @test df == DataFrame(["a" "d" 1 "b" "e" 1 - "c" "f" 1]) + "c" "f" 1], :auto) @test df.x1 !== df.x2 df = DataFrame(x1=1:3, x2=4:6, x3=1) @@ -1773,7 +1772,7 @@ end "c" "f"] @test df == DataFrame(["a" "d" 1 "b" "e" 1 - "c" "f" 1]) + "c" "f" 1], :auto) @test df.x1 !== df.x2 end @@ -1788,7 +1787,7 @@ end end @testset "@views on df[!, col]" begin - df = DataFrame(ones(3, 4)) + df = DataFrame(ones(3, 4), :auto) @views df[!, 1] .+= 1 @test df[!, 1] == [2.0, 2.0, 2.0] @views df[:, 2] .= df[!, 4] .+ df[!, 3] @@ -1799,7 +1798,7 @@ end end @testset "broadcasting of df[:, col] = value" begin - df = DataFrame(ones(3, 4)) + df = DataFrame(ones(3, 4), :auto) z = ["a", "b", "c"] df[:, :z] .= z @test df.z == z @@ -1807,29 +1806,29 @@ end @test_throws ArgumentError df[:, 6] .= z @test_throws MethodError df[:, 1] .= z - df = DataFrame(ones(3, 4)) + df = DataFrame(ones(3, 4), :auto) z = "abc" df[:, :z] .= z @test df.z == fill("abc", 3) @test_throws ArgumentError df[:, 6] .= z @test_throws MethodError df[:, 1] .= z - df = DataFrame(ones(3, 4)) + df = DataFrame(ones(3, 4), :auto) z = fill("abc", 1, 1, 1) @test_throws DimensionMismatch df[:, :z] .= z - df = DataFrame(ones(3, 4)) + df = DataFrame(ones(3, 4), :auto) z = ["a", "b", "c"] df[:, "z"] .= z @test df.z == z @test df.z !== z - df = DataFrame(ones(3, 4)) + df = DataFrame(ones(3, 4), :auto) z = "abc" df[:, "z"] .= z @test df.z == fill("abc", 3) - df = DataFrame(ones(3, 4)) + df = DataFrame(ones(3, 4), :auto) z = fill("abc", 1, 1, 1) @test_throws DimensionMismatch df[:, "z"] .= z end diff --git a/test/cat.jl b/test/cat.jl index af1c7325bc..8f51612f6a 100644 --- a/test/cat.jl +++ b/test/cat.jl @@ -10,10 +10,10 @@ const ≅ = isequal nvint = [1, 2, missing, 4] nvstr = ["one", "two", missing, "four"] - df2 = DataFrame([nvint, nvstr]) - df3 = DataFrame([nvint]) - df4 = convert(DataFrame, [1:4 1:4]) - df5 = DataFrame([Union{Int, Missing}[1,2,3,4], nvstr]) + df2 = DataFrame([nvint, nvstr], :auto) + df3 = DataFrame([nvint], :auto) + df4 = DataFrame([1:4 1:4], [:x1, :x2]) + df5 = DataFrame([Union{Int, Missing}[1,2,3,4], nvstr], :auto) ref_df = copy(df3) dfh = hcat(df3, df4, makeunique=true) @@ -227,7 +227,7 @@ end df = DataFrame([3.0 2.0 2.0 2.0 2.0 2.0 3.0 1.0 2.0 - 3.0 3.0 3.0]) + 3.0 3.0 3.0], :auto) df[!, 3] = Int.(df[!, 3]) @test vcat(missing_df) == DataFrame() @@ -253,13 +253,13 @@ end alt_df = deepcopy(df) @test vcat(df, alt_df) == DataFrame([[3.0,2.0,3.0,3.0,3.0,2.0,3.0,3.0], [2.0,2.0,1.0,3.0,2.0,2.0,1.0,3.0], - [2,2,2,3,2,2,2,3]]) + [2,2,2,3,2,2,2,3]], :auto) # Don't fail on non-matching types df[!, 1] = zeros(Int, nrow(df)) @test vcat(df, alt_df) == DataFrame([[0.0,0.0,0.0,0.0,3.0,2.0,3.0,3.0], [2.0,2.0,1.0,3.0,2.0,2.0,1.0,3.0], - [2,2,2,3,2,2,2,3]]) + [2,2,2,3,2,2,2,3]], :auto) df1 = DataFrame(A=Int[], B=Float64[]) df2 = DataFrame(B=1.0, A=1) diff --git a/test/constructors.jl b/test/constructors.jl index d856f1461c..a8809ca029 100644 --- a/test/constructors.jl +++ b/test/constructors.jl @@ -1,6 +1,6 @@ module TestConstructors -using Test, DataFrames, CategoricalArrays +using Test, DataFrames, CategoricalArrays, DataStructures using DataFrames: Index, _columns, index const ≅ = isequal @@ -30,106 +30,35 @@ const ≅ = isequal @test df2.x2 === vecvec[2] for copycolsarg in (true, false) - @test df == DataFrame(vecvec, copycols=copycolsarg) - @test df == DataFrame(collect(Any, vecvec), copycols=copycolsarg) - @test df == DataFrame(collect(AbstractVector, vecvec), copycols=copycolsarg) - @test df == DataFrame(Tuple(vecvec), copycols=copycolsarg) + @test df == DataFrame(vecvec, :auto, copycols=copycolsarg) + @test df == DataFrame(collect(Any, vecvec), :auto, copycols=copycolsarg) + @test df == DataFrame(collect(AbstractVector, vecvec), :auto, copycols=copycolsarg) @test df == DataFrame(x1 = vecvec[1], x2 = vecvec[2], copycols=copycolsarg) for cols in ([:x1, :x2], ["x1", "x2"]) @test df == DataFrame(vecvec, cols, copycols=copycolsarg) @test df == DataFrame(collect(Any, vecvec), cols, copycols=copycolsarg) @test df == DataFrame(collect(AbstractVector, vecvec), cols, copycols=copycolsarg) - @test df == DataFrame(Tuple(vecvec), Tuple(cols), copycols=copycolsarg) @test df == DataFrame([col=>vect for (col, vect) in zip(cols, vecvec)], copycols=copycolsarg) end end - @test DataFrame([1:3, 1:3]) == DataFrame(Any[1:3, 1:3]) == - DataFrame(UnitRange[1:3, 1:3]) == DataFrame(AbstractVector[1:3, 1:3]) == - DataFrame([[1,2,3], [1,2,3]]) == DataFrame(Any[[1,2,3], [1,2,3]]) == - DataFrame(([1,2,3], [1,2,3])) == DataFrame((1:3, 1:3)) == - DataFrame((1:3, [1,2,3])) == DataFrame([1:3, [1,2,3]]) - DataFrame((:x1=>1:3, :x2=>[1,2,3])) == DataFrame([:x1=>1:3, :x2=>[1,2,3]]) == - DataFrame(("x1"=>1:3, "x2"=>[1,2,3])) == DataFrame(["x1"=>1:3, "x2"=>[1,2,3]]) + @test DataFrame([1:3, 1:3], :auto) == DataFrame(Any[1:3, 1:3], :auto) == + DataFrame(UnitRange[1:3, 1:3], :auto) == DataFrame(AbstractVector[1:3, 1:3], :auto) == + DataFrame([[1,2,3], [1,2,3]], :auto) == DataFrame(Any[[1,2,3], [1,2,3]], :auto) == + DataFrame([1:3, [1,2,3]], :auto) + DataFrame([:x1=>1:3, :x2=>[1,2,3]]) == DataFrame(["x1"=>1:3, "x2"=>[1,2,3]]) - @inferred DataFrame([1:3, 1:3]) - @inferred DataFrame((1:3, 1:3)) + @inferred DataFrame([1:3, 1:3], :auto) @inferred DataFrame([1:3, 1:3], [:a, :b]) - @inferred DataFrame((1:3, 1:3), (:a, :b)) @inferred DataFrame([1:3, 1:3], ["a", "b"]) - @inferred DataFrame((1:3, 1:3), ("a", "b")) - @inferred DataFrame((:x1=>1:3, :x2=>[1,2,3])) @inferred DataFrame([:x1=>1:3, :x2=>[1,2,3]]) - @inferred DataFrame(("x1"=>1:3, "x2"=>[1,2,3])) @inferred DataFrame(["x1"=>1:3, "x2"=>[1,2,3]]) @test df !== DataFrame(df) @test df == DataFrame(df) - df2 = convert(DataFrame, Union{Float64, Missing}[0.0 1.0; - 0.0 1.0; - 0.0 1.0]) - rename!(df2, [:x1, :x2]) - @test df[!, :x1] == df2[!, :x1] - @test df[!, :x2] == df2[!, :x2] - - - df2 = convert(DataFrame, Union{Float64, Missing}[0.0 1.0; - 0.0 1.0; - 0.0 1.0]) - rename!(df2, ["x1", "x2"]) - @test df[!, "x1"] == df2[!, "x1"] - @test df[!, "x2"] == df2[!, "x2"] - - df2 = DataFrame([0.0 1.0; - 0.0 1.0; - 0.0 1.0]) - rename!(df2, [:x1, :x2]) - @test df[!, :x1] == df2[!, :x1] - @test df[!, :x2] == df2[!, :x2] - - df2 = DataFrame([0.0 1.0; - 0.0 1.0; - 0.0 1.0]) - rename!(df2, ["x1", "x2"]) - @test df[!, "x1"] == df2[!, "x1"] - @test df[!, "x2"] == df2[!, "x2"] - - @test_throws MethodError DataFrame([0.0 1.0; - 0.0 1.0; - 0.0 1.0], copycols=false) - - df2 = DataFrame([0.0 1.0; - 0.0 1.0; - 0.0 1.0], ["a", "b"]) - rename!(df2, ["a", "b"]) - @test df[!, "x1"] == df2[!, "a"] - @test df[!, "x2"] == df2[!, "b"] - - df2 = DataFrame([0.0 1.0; - 0.0 1.0; - 0.0 1.0], [:a, :b]) - rename!(df2, [:a, :b]) - @test df[!, :x1] == df2[!, :a] - @test df[!, :x2] == df2[!, :b] - - @test_throws MethodError DataFrame([0.0 1.0; - 0.0 1.0; - 0.0 1.0], [:a, :b], copycols=false) - - df2 = DataFrame([0.0 1.0; - 0.0 1.0; - 0.0 1.0], ["a", "b"]) - rename!(df2, ["a", "b"]) - @test df[!, "x1"] == df2[!, "a"] - @test df[!, "x2"] == df2[!, "b"] - - @test_throws MethodError DataFrame([0.0 1.0; - 0.0 1.0; - 0.0 1.0], ["a", "b"], copycols=false) - @test df == DataFrame(x1 = Union{Float64, Missing}[0.0, 0.0, 0.0], x2 = Union{Float64, Missing}[1.0, 1.0, 1.0]) @test df == DataFrame(x1 = Union{Float64, Missing}[0.0, 0.0, 0.0], @@ -149,6 +78,10 @@ const ≅ = isequal @test view(SubDataFrame(DataFrame(A=1:10), 1:4, :), [true, true, false, false], :) == DataFrame(A=1:2) @test DataFrame(a=1, b=1:2) == DataFrame(a=[1,1], b=[1,2]) + + @test_throws ArgumentError DataFrame(makeunique=true) + @test_throws ArgumentError DataFrame(a=1, makeunique=true) + @test_throws ArgumentError DataFrame(a=1, makeunique=true, copycols=false) end @testset "DataFrame keyword argument constructor" begin @@ -218,7 +151,12 @@ end end @testset "pair constructor" begin - df = DataFrame(:x1 => zeros(3), :x2 => ones(3)) + @test DataFrame(:x1 => zeros(3), :x2 => ones(3)) == + DataFrame([:x1 => zeros(3), :x2 => ones(3)]) == + DataFrame("x1" => zeros(3), "x2" => ones(3)) == + DataFrame("x1" => zeros(3), "x2" => ones(3)) + + df = DataFrame([:x1 => zeros(3), :x2 => ones(3)]) @inferred DataFrame(:x1 => zeros(3), :x2 => ones(3)) @test size(df, 1) == 3 @test size(df, 2) == 2 @@ -257,6 +195,8 @@ end df = DataFrame("a"=>a, "b"=>1, "c"=>1:3, copycols=false) @test propertynames(df) == [:a, :b, :c] @test df."a" === a + + @test_throws ArgumentError DataFrame(["type" => 1, :begin => 2]) end @testset "associative" begin @@ -294,19 +234,19 @@ end x = [1,2,3] y = [1,2,3] - df = DataFrame([x, y]) + df = DataFrame([x, y], :auto) @test propertynames(df) == [:x1, :x2] @test df.x1 == x @test df.x2 == y @test df.x1 !== x @test df.x2 !== y - df = DataFrame([x, y], copycols=true) + df = DataFrame([x, y], :auto, copycols=true) @test propertynames(df) == [:x1, :x2] @test df.x1 == x @test df.x2 == y @test df.x1 !== x @test df.x2 !== y - df = DataFrame([x, y], copycols=false) + df = DataFrame([x, y], :auto, copycols=false) @test propertynames(df) == [:x1, :x2] @test df.x1 === x @test df.x2 === y @@ -345,57 +285,6 @@ end @test df."x1" === x @test df."x2" === y - df = DataFrame((x, y)) - @test propertynames(df) == [:x1, :x2] - @test df.x1 == x - @test df.x2 == y - @test df.x1 !== x - @test df.x2 !== y - df = DataFrame((x, y), copycols=true) - @test propertynames(df) == [:x1, :x2] - @test df.x1 == x - @test df.x2 == y - @test df.x1 !== x - @test df.x2 !== y - df = DataFrame((x, y), copycols=false) - @test propertynames(df) == [:x1, :x2] - @test df.x1 === x - @test df.x2 === y - - df = DataFrame((x, y), (:x1, :x2)) - @test propertynames(df) == [:x1, :x2] - @test df.x1 == x - @test df.x2 == y - @test df.x1 !== x - @test df.x2 !== y - df = DataFrame((x, y), (:x1, :x2), copycols=true) - @test propertynames(df) == [:x1, :x2] - @test df.x1 == x - @test df.x2 == y - @test df.x1 !== x - @test df.x2 !== y - df = DataFrame((x, y), (:x1, :x2), copycols=false) - @test propertynames(df) == [:x1, :x2] - @test df.x1 === x - @test df.x2 === y - - df = DataFrame((x, y), ("x1", "x2")) - @test names(df) == ["x1", "x2"] - @test df."x1" == x - @test df."x2" == y - @test df."x1" !== x - @test df."x2" !== y - df = DataFrame((x, y), ("x1", "x2"), copycols=true) - @test names(df) == ["x1", "x2"] - @test df."x1" == x - @test df."x2" == y - @test df."x1" !== x - @test df."x2" !== y - df = DataFrame((x, y), ("x1", "x2"), copycols=false) - @test names(df) == ["x1", "x2"] - @test df."x1" === x - @test df."x2" === y - n = [:x1, :x2] v = AbstractVector[1:3, [1,2,3]] @test DataFrame(v, n).x1 isa Vector{Int} @@ -419,14 +308,14 @@ end @test_throws DimensionMismatch DataFrame(Any[collect(1:10)], DataFrames.Index([:A, :B]), copycols=copycolsarg) @test_throws ArgumentError DataFrame(A = rand(2,2), copycols=copycolsarg) @test_throws ArgumentError DataFrame(A = rand(2,1), copycols=copycolsarg) - @test_throws ArgumentError DataFrame([1, 2, 3], copycols=copycolsarg) - @test_throws DimensionMismatch DataFrame(AbstractVector[1:3, [1,2]], copycols=copycolsarg) + @test_throws ArgumentError DataFrame([1, 2, 3], :auto, copycols=copycolsarg) + @test_throws DimensionMismatch DataFrame(AbstractVector[1:3, [1,2]], :auto, copycols=copycolsarg) @test_throws ArgumentError DataFrame([1:3, 1], [:x1, :x2], copycols=copycolsarg) @test_throws ArgumentError DataFrame([1:3, 1], ["x1", "x2"], copycols=copycolsarg) @test_throws ErrorException DataFrame([1:3, 1], copycols=copycolsarg) end - @test_throws MethodError DataFrame([1 2; 3 4], copycols=false) + @test_throws MethodError DataFrame([1 2; 3 4], :auto, copycols=false) end @testset "column types" begin @@ -441,66 +330,6 @@ end @test map(typeof, eachcol(df)) == answer end -@testset "Matrix constructor" begin - df = DataFrame([1 2; 3 4]) - @test size(df) == (2, 2) - @test df.x1 == [1, 3] - @test df.x2 == [2, 4] - @test_throws MethodError DataFrame([1 2; 3 4], copycols=false) - -end - -@testset "constructor with types" begin - df = DataFrame([Union{Int, Missing}, Union{Float64, Missing}, Union{String, Missing}], - [:A, :B, :C], 100) - @test size(df, 1) == 100 - @test size(df, 2) == 3 - @test typeof(df[!, 1]) == Vector{Union{Int, Missing}} - @test typeof(df[!, 2]) == Vector{Union{Float64, Missing}} - @test typeof(df[!, 3]) == Vector{Union{String, Missing}} - @test all(ismissing, df[!, 1]) - @test all(ismissing, df[!, 2]) - @test all(ismissing, df[!, 3]) - - df = DataFrame([Union{Int, Missing}, Union{Float64, Missing}, Union{String, Missing}], - ["A", "B", "C"], 100) - @test size(df, 1) == 100 - @test size(df, 2) == 3 - @test typeof(df[!, "A"]) == Vector{Union{Int, Missing}} - @test typeof(df[!, "B"]) == Vector{Union{Float64, Missing}} - @test typeof(df[!, "C"]) == Vector{Union{String, Missing}} - @test all(ismissing, df[!, "A"]) - @test all(ismissing, df[!, "B"]) - @test all(ismissing, df[!, "C"]) - - df = DataFrame([Union{Int, Missing}, Union{Float64, Missing}], [:x1, :x2], 2) - @test size(df) == (2, 2) - @test eltype.(eachcol(df)) == [Union{Int, Missing}, Union{Float64, Missing}] - - @test_throws MethodError DataFrame([Union{Int, Missing}, Union{Float64, Missing}], - [:x1, :x2], 2, copycols=false) - @test size(df) == (2, 2) - @test eltype.(eachcol(df)) == [Union{Int, Missing}, Union{Float64, Missing}] - - df = DataFrame([Union{Int, Missing}, Union{Float64, Missing}, Union{String, Missing}], - [:A, :B, :C]) - @test size(df, 1) == 0 - @test size(df, 2) == 3 - @test typeof(df[!, 1]) == Vector{Union{Int, Missing}} - @test typeof(df[!, 2]) == Vector{Union{Float64, Missing}} - @test typeof(df[!, 3]) == Vector{Union{String, Missing}} - @test propertynames(df) == [:A, :B, :C] - - df = DataFrame([Union{Int, Missing}, Union{Float64, Missing}, Union{String, Missing}], - ["A", "B", "C"]) - @test size(df, 1) == 0 - @test size(df, 2) == 3 - @test typeof(df[!, "A"]) == Vector{Union{Int, Missing}} - @test typeof(df[!, "B"]) == Vector{Union{Float64, Missing}} - @test typeof(df[!, "C"]) == Vector{Union{String, Missing}} - @test names(df) == ["A", "B", "C"] -end - @testset "expansion of Ref and 0-dimensional arrays" begin @test DataFrame(a=Ref(1), b=fill(1)) == DataFrame(a=[1], b=[1]) @test DataFrame(a=Ref(1), b=fill(1), c=1:3) == @@ -515,13 +344,15 @@ end end end -@testset "removed constructors" begin - @test_throws MethodError DataFrame(Union{Int, Missing}, 10, 3) - @test_throws MethodError DataFrame([Union{Int, Missing}, Union{Float64, Missing}, Union{String, Missing}], 100) - @test_throws MethodError DataFrame([Union{Int, Missing}, Union{Float64, Missing}, Union{String, Missing}], - [:A, :B, :C], [false, false, true], 100) - @test_throws MethodError DataFrame([Int, String], [:a, :b], [false, true], 3) - @test_throws MethodError DataFrame([Union{Int, Missing}, Union{Float64, Missing}], 2) +@testset "Dict constructor corner case" begin + @test_throws ArgumentError DataFrame(Dict('a' => 1, true => 2)) + @test_throws ArgumentError DataFrame(Dict(:z => 1, "true" => 2)) + @test DataFrame(Dict("z" => 1, "true" => 2)) == DataFrame("true" => 2, "z" => 1) + @test DataFrame(Dict([Symbol(c) => i for (i, c) in enumerate('a':'z')])) == + DataFrame(Dict([string(c) => i for (i, c) in enumerate('a':'z')])) == + DataFrame([Symbol(c) => i for (i, c) in enumerate('a':'z')]) + @test DataFrame(OrderedDict(:z => 1, :a => 2)) == DataFrame(z=1, a=2) + end end # module diff --git a/test/data.jl b/test/data.jl index 313fc60480..903fb7a735 100644 --- a/test/data.jl +++ b/test/data.jl @@ -5,8 +5,8 @@ const ≅ = isequal @testset "constructors" begin df1 = DataFrame([[1, 2, missing, 4], ["one", "two", missing, "four"]], [:Ints, :Strs]) - df2 = DataFrame([[1, 2, missing, 4], ["one", "two", missing, "four"]]) - df3 = DataFrame([[1, 2, missing, 4]]) + df2 = DataFrame([[1, 2, missing, 4], ["one", "two", missing, "four"]], :auto) + df3 = DataFrame([[1, 2, missing, 4]], :auto) df6 = DataFrame([[1, 2, missing, 4], [1, 2, missing, 4], ["one", "two", missing, "four"]], [:A, :B, :C]) df7 = DataFrame(x = [1, 2, missing, 4], y = ["one", "two", missing, "four"]) @@ -111,8 +111,10 @@ const ≅ = isequal end @testset "completecases and dropmissing" begin - df1 = DataFrame([Vector{Union{Int, Missing}}(1:4), Vector{Union{Int, Missing}}(1:4)]) - df2 = DataFrame([Union{Int, Missing}[1, 2, 3, 4], ["one", "two", missing, "four"]]) + df1 = DataFrame([Vector{Union{Int, Missing}}(1:4), Vector{Union{Int, Missing}}(1:4)], + :auto) + df2 = DataFrame([Union{Int, Missing}[1, 2, 3, 4], ["one", "two", missing, "four"]], + :auto) @test df2[completecases(df2), :] == df2[[1, 2, 4], :] @test dropmissing(df2) == df2[[1, 2, 4], :] @@ -181,7 +183,7 @@ end end @testset "dropmissing and unique view kwarg test" begin - df = DataFrame(rand(3,4)) + df = DataFrame(rand(3,4), :auto) for fun in (dropmissing, unique) @test fun(df) isa DataFrame @inferred fun(df) @@ -411,7 +413,7 @@ end end @testset "filter view kwarg test" begin - df = DataFrame(rand(3,4)) + df = DataFrame(rand(3,4), :auto) for fun in (row -> row.x1 > 0, :x1 => x -> x > 0, "x1" => x -> x > 0, [:x1] => x -> x > 0, ["x1"] => x -> x > 0, r"1" => x -> x > 0, AsTable(:) => x -> x.x1 > 0) diff --git a/test/dataframe.jl b/test/dataframe.jl index 4d5f6cb38b..6d6eff87c0 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -347,24 +347,6 @@ end end @testset "DataFrame constructors" begin - df = convert(DataFrame, zeros(10, 5)) - @test size(df, 1) == 10 - @test size(df, 2) == 5 - @test typeof(df[!, 1]) == Vector{Float64} - @test typeof(df[:, 1]) == Vector{Float64} - - df = convert(DataFrame, ones(10, 5)) - @test size(df, 1) == 10 - @test size(df, 2) == 5 - @test typeof(df[!, 1]) == Vector{Float64} - @test typeof(df[:, 1]) == Vector{Float64} - - df = convert(DataFrame, Matrix{Float64}(undef, 10, 5)) - @test size(df, 1) == 10 - @test size(df, 2) == 5 - @test typeof(df[!, 1]) == Vector{Float64} - @test typeof(df[:, 1]) == Vector{Float64} - @test DataFrame([Union{Int, Missing}[1, 2, 3], Union{Float64, Missing}[2.5, 4.5, 6.5]], [:A, :B]) == DataFrame(A = Union{Int, Missing}[1, 2, 3], B = Union{Float64, Missing}[2.5, 4.5, 6.5]) @@ -384,9 +366,9 @@ end df = DataFrame(x=categorical(["a"])[1]) @test df.x isa CategoricalVector{String} - @test hash(convert(DataFrame, [1 2; 3 4])) == hash(convert(DataFrame, [1 2; 3 4])) - @test hash(convert(DataFrame, [1 2; 3 4])) != hash(convert(DataFrame, [1 3; 2 4])) - @test hash(convert(DataFrame, [1 2; 3 4])) == hash(convert(DataFrame, [1 2; 3 4]), zero(UInt)) + @test hash(DataFrame([1 2; 3 4], :auto)) == hash(DataFrame([1 2; 3 4], :auto)) + @test hash(DataFrame([1 2; 3 4], :auto)) != hash(DataFrame([1 3; 2 4], :auto)) + @test hash(DataFrame([1 2; 3 4], :auto)) == hash(DataFrame([1 2; 3 4], :auto), zero(UInt)) end @testset "push!(df, row)" begin @@ -1006,7 +988,7 @@ end @test rename(x -> 1, df) == DataFrame(Symbol("1") => 1) end - sdf = view(DataFrame(ones(2,3)), 1:2, 1:3) + sdf = view(DataFrame(ones(2,3), :auto), 1:2, 1:3) @test_throws ArgumentError rename!(uppercase, sdf) @test_throws ArgumentError rename!(sdf, :x1 => :y1) @test_throws ArgumentError rename!(sdf, [:a, :b, :c]) @@ -1071,7 +1053,7 @@ end end @testset "column conversions" begin - df = DataFrame([collect(1:10), collect(1:10)]) + df = DataFrame([collect(1:10), collect(1:10)], :auto) @test !isa(df[!, 1], Vector{Union{Int, Missing}}) @test allowmissing!(df, 1) === df @test isa(df[!, 1], Vector{Union{Int, Missing}}) @@ -1085,7 +1067,7 @@ end @test disallowmissing!(df, 1) === df @test isa(df[!, 1], Vector{Int}) - df = DataFrame([collect(1:10), collect(1:10)]) + df = DataFrame([collect(1:10), collect(1:10)], :auto) @test !isa(df[!, 1], Vector{Union{Int, Missing}}) @test allowmissing!(df, Not(Not(1))) === df @test isa(df[!, 1], Vector{Union{Int, Missing}}) @@ -1100,19 +1082,19 @@ end @test isa(df[!, 1], Vector{Int}) for em in [true, false] - df = DataFrame([collect(1:10), collect(1:10)]) + df = DataFrame([collect(1:10), collect(1:10)], :auto) @test allowmissing!(df, [1,2]) === df @test isa(df[!, 1], Vector{Union{Int, Missing}}) && isa(df[!, 2], Vector{Union{Int, Missing}}) @test disallowmissing!(df, [1,2], error=em) === df @test isa(df[!, 1], Vector{Int}) && isa(df[!, 2], Vector{Int}) - df = DataFrame([collect(1:10), collect(1:10)]) + df = DataFrame([collect(1:10), collect(1:10)], :auto) @test allowmissing!(df, Not(Not([1,2]))) === df @test isa(df[!, 1], Vector{Union{Int, Missing}}) && isa(df[!, 2], Vector{Union{Int, Missing}}) @test disallowmissing!(df, Not(Not([1,2])), error=em) === df @test isa(df[!, 1], Vector{Int}) && isa(df[!, 2], Vector{Int}) - df = DataFrame([collect(1:10), collect(1:10)]) + df = DataFrame([collect(1:10), collect(1:10)], :auto) @test_throws BoundsError allowmissing!(df, [true]) @test allowmissing!(df, [true, true]) === df @test isa(df[!, 1], Vector{Union{Int, Missing}}) && isa(df[!, 2], Vector{Union{Int, Missing}}) @@ -1120,25 +1102,25 @@ end @test disallowmissing!(df, [true,true], error=em) === df @test isa(df[!, 1], Vector{Int}) && isa(df[!, 2], Vector{Int}) - df = DataFrame([collect(1:10), collect(1:10)]) + df = DataFrame([collect(1:10), collect(1:10)], :auto) @test allowmissing!(df) === df @test isa(df[!, 1], Vector{Union{Int, Missing}}) && isa(df[!, 2], Vector{Union{Int, Missing}}) @test disallowmissing!(df, error=em) === df @test isa(df[!, 1], Vector{Int}) && isa(df[!, 2], Vector{Int}) - df = DataFrame([collect(1:10), collect(1:10)]) + df = DataFrame([collect(1:10), collect(1:10)], :auto) @test allowmissing!(df, :) === df @test isa(df[!, 1], Vector{Union{Int, Missing}}) && isa(df[!, 2], Vector{Union{Int, Missing}}) @test disallowmissing!(df, :, error=em) === df @test isa(df[!, 1], Vector{Int}) && isa(df[!, 2], Vector{Int}) - df = DataFrame([collect(1:10), collect(1:10)]) + df = DataFrame([collect(1:10), collect(1:10)], :auto) @test allowmissing!(df, r"") === df @test isa(df[!, 1], Vector{Union{Int, Missing}}) && isa(df[!, 2], Vector{Union{Int, Missing}}) @test disallowmissing!(df, r"", error=em) === df @test isa(df[!, 1], Vector{Int}) && isa(df[!, 2], Vector{Int}) - df = DataFrame([collect(1:10), collect(1:10)]) + df = DataFrame([collect(1:10), collect(1:10)], :auto) @test allowmissing!(df, Not(1:0)) === df @test isa(df[!, 1], Vector{Union{Int, Missing}}) && isa(df[!, 2], Vector{Union{Int, Missing}}) @test disallowmissing!(df, Not(1:0), error=em) === df @@ -1146,7 +1128,7 @@ end end df = DataFrame([CategoricalArray(1:10), - CategoricalArray(string.('a':'j'))]) + CategoricalArray(string.('a':'j'))], :auto) @test allowmissing!(df) === df @test all(x->x <: CategoricalVector, typeof.(eachcol(df))) @test eltype(df[!, 1]) <: Union{CategoricalValue{Int}, Missing} @@ -1416,7 +1398,7 @@ end end @testset "handling of end in indexing" begin - z = DataFrame(rand(4,5)) + z = DataFrame(rand(4,5), :auto) x = z y = deepcopy(x) @test x[:, end] == x[:, 5] diff --git a/test/dataframerow.jl b/test/dataframerow.jl index 68058de3b6..950f2aff7b 100644 --- a/test/dataframerow.jl +++ b/test/dataframerow.jl @@ -106,15 +106,15 @@ end @test df.c[3] == "C" df = DataFrame([1 2 3 4 - 5 6 7 8]) + 5 6 7 8], :auto) r = df[1, r"[1-3]"] @test names(r) == ["x1", "x2", "x3"] r[:] .= 10 @test df == DataFrame([10 10 10 4 - 5 6 7 8]) + 5 6 7 8], :auto) r[r"[2-3]"] .= 20 @test df == DataFrame([10 20 20 4 - 5 6 7 8]) + 5 6 7 8], :auto) end @testset "equality" begin @@ -235,13 +235,13 @@ end @test !haskey(r, 1000) @test_throws ArgumentError haskey(r, true) - x = DataFrame(ones(5,4)) + x = DataFrame(ones(5,4), :auto) dfr = view(x, 2, 2:3) @test names(dfr) == names(x)[2:3] dfr = view(x, 2, [4,2]) @test names(dfr) == names(x)[[4,2]] - x = DataFrame(ones(10,10)) + x = DataFrame(ones(10,10), :auto) r = x[3, [8, 5, 1, 3]] @test length(r) == 4 @test lastindex(r) == 4 @@ -350,7 +350,7 @@ end @testset "iteration and collect" begin ref = ["a", "b", "c"] - df = DataFrame(permutedims(ref)) + df = DataFrame(permutedims(ref), :auto) dfr = df[1, :] @test Base.IteratorEltype(dfr) == Base.EltypeUnknown() @test collect(dfr) == ref @@ -366,7 +366,7 @@ end end @testset "duplicate column" begin - df = DataFrame([11:16 21:26 31:36 41:46]) + df = DataFrame([11:16 21:26 31:36 41:46], :auto) sdf = view(df, [3,1,4], [3,1,4]) @test_throws ArgumentError df[2, [2,2,2]] @test_throws ArgumentError sdf[2, [2,2,2]] @@ -522,7 +522,7 @@ end end @testset "rownumber" begin - df = DataFrame(reshape(1:12, 3, 4)) + df = DataFrame(reshape(1:12, 3, 4), :auto) dfr = df[2, :] @test rownumber(dfr) == 2 @test parentindices(dfr) == (2, 1:4) diff --git a/test/deprecated.jl b/test/deprecated.jl index b21470d668..2e05d9f9df 100644 --- a/test/deprecated.jl +++ b/test/deprecated.jl @@ -46,8 +46,6 @@ const ≅ = isequal @test df."x1" === x @test df."x2" === y - @test_throws MethodError DataFrame!([1 2; 3 4], copycols=false) - @test_throws MethodError DataFrame!([1 2; 3 4]) @test_throws MethodError DataFrame!([Union{Int, Missing}, Union{Float64, Missing}], [:x1, :x2], 2) end @@ -161,7 +159,7 @@ end end @testset "categorical!" begin - df = DataFrame([["a", "b"], ['a', 'b'], [true, false], 1:2, ["x", "y"]]) + df = DataFrame([["a", "b"], ['a', 'b'], [true, false], 1:2, ["x", "y"]], :auto) @test all(map(<:, eltype.(eachcol(categorical!(deepcopy(df)))), [CategoricalArrays.CategoricalValue{String,UInt32}, Char, Bool, Int, @@ -207,7 +205,7 @@ end CategoricalArrays.CategoricalValue{Int,UInt32}, String])) - df = DataFrame([["a", missing]]) + df = DataFrame([["a", missing]], :auto) categorical!(df) @test df.x1 isa CategoricalVector{Union{Missing, String}} @@ -233,4 +231,158 @@ end DataFrame(variable=:a, min=1, min2=1, max2=2, max=2) end +@testset "deprecated DataFrame constructors" begin + @test DataFrame(([1,2], [3,4])) == DataFrame([[1,2], [3,4]], :auto) + @test DataFrame((categorical([1,2]), categorical([3,4]))) == + DataFrame([categorical([1,2]), categorical([3,4])], :auto) + @test DataFrame(([1,2], [3,4]), ("a", "b")) == DataFrame([[1,2], [3,4]], ["a", "b"]) + @test DataFrame(([1,2], [3,4]), (:a, :b)) == DataFrame([[1,2], [3,4]], [:a, :b]) + @test DataFrame(([1,2,3], [1,2,3])) == DataFrame((1:3, 1:3)) == DataFrame((1:3, [1,2,3])) + @test DataFrame(("x1"=>1:3, "x2"=>[1,2,3])) == DataFrame(["x1"=>1:3, "x2"=>[1,2,3]]) + @test DataFrame((:x1=>1:3, :x2=>[1,2,3])) == DataFrame([:x1=>1:3, :x2=>[1,2,3]]) + @inferred DataFrame((1:3, 1:3)) + @inferred DataFrame((1:3, 1:3), (:a, :b)) + @inferred DataFrame((1:3, 1:3), ("a", "b")) + @inferred DataFrame((:x1=>1:3, :x2=>[1,2,3])) + @inferred DataFrame(("x1"=>1:3, "x2"=>[1,2,3])) + @test DataFrame(Union{Float64, Missing}[0.0 1.0; + 0.0 1.0; + 0.0 1.0]) == + convert(DataFrame, Union{Float64, Missing}[0.0 1.0; + 0.0 1.0; + 0.0 1.0]) + @test names(DataFrame([0.0 1.0; + 0.0 1.0; + 0.0 1.0], ["a", "b"])) == ["a", "b"] + @test names(DataFrame([0.0 1.0; + 0.0 1.0; + 0.0 1.0], [:a, :b])) == ["a", "b"] + + x = [1,2,3] + y = [1,2,3] + + df = DataFrame((x, y)) + @test propertynames(df) == [:x1, :x2] + @test df.x1 == x + @test df.x2 == y + @test df.x1 !== x + @test df.x2 !== y + df = DataFrame((x, y), copycols=true) + @test propertynames(df) == [:x1, :x2] + @test df.x1 == x + @test df.x2 == y + @test df.x1 !== x + @test df.x2 !== y + df = DataFrame((x, y), copycols=false) + @test propertynames(df) == [:x1, :x2] + @test df.x1 === x + @test df.x2 === y + + df = DataFrame((x, y), (:x1, :x2)) + @test propertynames(df) == [:x1, :x2] + @test df.x1 == x + @test df.x2 == y + @test df.x1 !== x + @test df.x2 !== y + df = DataFrame((x, y), (:x1, :x2), copycols=true) + @test propertynames(df) == [:x1, :x2] + @test df.x1 == x + @test df.x2 == y + @test df.x1 !== x + @test df.x2 !== y + df = DataFrame((x, y), (:x1, :x2), copycols=false) + @test propertynames(df) == [:x1, :x2] + @test df.x1 === x + @test df.x2 === y + + df = DataFrame((x, y), ("x1", "x2")) + @test names(df) == ["x1", "x2"] + @test df."x1" == x + @test df."x2" == y + @test df."x1" !== x + @test df."x2" !== y + df = DataFrame((x, y), ("x1", "x2"), copycols=true) + @test names(df) == ["x1", "x2"] + @test df."x1" == x + @test df."x2" == y + @test df."x1" !== x + @test df."x2" !== y + df = DataFrame((x, y), ("x1", "x2"), copycols=false) + @test names(df) == ["x1", "x2"] + @test df."x1" === x + @test df."x2" === y + + df = DataFrame([1 2; 3 4], :auto) + @test size(df) == (2, 2) + @test df.x1 == [1, 3] + @test df.x2 == [2, 4] + + df = DataFrame([Union{Int, Missing}, Union{Float64, Missing}, Union{String, Missing}], + [:A, :B, :C], 100) + @test size(df, 1) == 100 + @test size(df, 2) == 3 + @test typeof(df[!, 1]) == Vector{Union{Int, Missing}} + @test typeof(df[!, 2]) == Vector{Union{Float64, Missing}} + @test typeof(df[!, 3]) == Vector{Union{String, Missing}} + @test all(ismissing, df[!, 1]) + @test all(ismissing, df[!, 2]) + @test all(ismissing, df[!, 3]) + + df = DataFrame([Union{Int, Missing}, Union{Float64, Missing}, Union{String, Missing}], + ["A", "B", "C"], 100) + @test size(df, 1) == 100 + @test size(df, 2) == 3 + @test typeof(df[!, "A"]) == Vector{Union{Int, Missing}} + @test typeof(df[!, "B"]) == Vector{Union{Float64, Missing}} + @test typeof(df[!, "C"]) == Vector{Union{String, Missing}} + @test all(ismissing, df[!, "A"]) + @test all(ismissing, df[!, "B"]) + @test all(ismissing, df[!, "C"]) + + df = DataFrame([Union{Int, Missing}, Union{Float64, Missing}], [:x1, :x2], 2) + @test size(df) == (2, 2) + @test eltype.(eachcol(df)) == [Union{Int, Missing}, Union{Float64, Missing}] + + @test_throws MethodError DataFrame([Union{Int, Missing}, Union{Float64, Missing}], + [:x1, :x2], 2, copycols=false) + @test size(df) == (2, 2) + @test eltype.(eachcol(df)) == [Union{Int, Missing}, Union{Float64, Missing}] + + df = DataFrame([Union{Int, Missing}, Union{Float64, Missing}, Union{String, Missing}], + [:A, :B, :C]) + @test size(df, 1) == 0 + @test size(df, 2) == 3 + @test typeof(df[!, 1]) == Vector{Union{Int, Missing}} + @test typeof(df[!, 2]) == Vector{Union{Float64, Missing}} + @test typeof(df[!, 3]) == Vector{Union{String, Missing}} + @test propertynames(df) == [:A, :B, :C] + + df = DataFrame([Union{Int, Missing}, Union{Float64, Missing}, Union{String, Missing}], + ["A", "B", "C"]) + @test size(df, 1) == 0 + @test size(df, 2) == 3 + @test typeof(df[!, "A"]) == Vector{Union{Int, Missing}} + @test typeof(df[!, "B"]) == Vector{Union{Float64, Missing}} + @test typeof(df[!, "C"]) == Vector{Union{String, Missing}} + @test names(df) == ["A", "B", "C"] + + df = convert(DataFrame, zeros(10, 5)) + @test size(df, 1) == 10 + @test size(df, 2) == 5 + @test typeof(df[!, 1]) == Vector{Float64} + @test typeof(df[:, 1]) == Vector{Float64} + + df = convert(DataFrame, ones(10, 5)) + @test size(df, 1) == 10 + @test size(df, 2) == 5 + @test typeof(df[!, 1]) == Vector{Float64} + @test typeof(df[:, 1]) == Vector{Float64} + + df = convert(DataFrame, Matrix{Float64}(undef, 10, 5)) + @test size(df, 1) == 10 + @test size(df, 2) == 5 + @test typeof(df[!, 1]) == Vector{Float64} + @test typeof(df[:, 1]) == Vector{Float64} +end + end # module diff --git a/test/grouping.jl b/test/grouping.jl index ef7de6fb7a..cafb5d7e08 100644 --- a/test/grouping.jl +++ b/test/grouping.jl @@ -1826,8 +1826,8 @@ end @testset "Allow returning DataFrame() or NamedTuple() to drop group" begin N = 4 for (i, x1) in enumerate(collect.(Iterators.product(repeat([[true, false]], N)...))), - er in (DataFrame(), view(DataFrame(ones(2,2)), 2:1, 2:1), - view(DataFrame(ones(2,2)), 1:2, 2:1), + er in (DataFrame(), view(DataFrame(ones(2,2), :auto), 2:1, 2:1), + view(DataFrame(ones(2,2), :auto), 1:2, 2:1), NamedTuple(), rand(0,0), rand(5,0), DataFrame(x1=Int[]), DataFrame(x1=Any[]), (x1=Int[],), (x1=Any[],), rand(0,1)), @@ -1992,7 +1992,7 @@ end DataFrame(g=1:100, g_function=1:100) end - df_ref = DataFrame(rand(10, 4)) + df_ref = DataFrame(rand(10, 4), :auto) df_ref.g = shuffle!([1,2,2,3,3,3,4,4,4,4]) for i in 0:nrow(df_ref), dosort in [true, false], dokeepkeys in [true, false] @@ -2009,7 +2009,7 @@ end end @testset "passing columns" begin - df = DataFrame(rand(10, 4)) + df = DataFrame(rand(10, 4), :auto) df.g = shuffle!([1,2,2,3,3,3,4,4,4,4]) gdf = groupby_checked(df, :g) @@ -2932,9 +2932,10 @@ end maximum, minimum, maximum∘skipmissing, minimum∘skipmissing, first, last, length, first∘skipmissing, last∘skipmissing), col in ([ones(2,2), zeros(2,2), ones(2,2)], [ones(2,2), zeros(2,2), missing], - [DataFrame(ones(2,2)), DataFrame(zeros(2,2)), DataFrame(ones(2,2))], - [DataFrame(ones(2,2)), DataFrame(zeros(2,2)), ones(2,2)], - [DataFrame(ones(2,2)), DataFrame(zeros(2,2)), missing], + [DataFrame(ones(2,2), :auto), DataFrame(zeros(2,2), :auto), + DataFrame(ones(2,2), :auto)], [DataFrame(ones(2,2), :auto), + DataFrame(zeros(2,2), :auto), ones(2,2)], + [DataFrame(ones(2,2), :auto), DataFrame(zeros(2,2), :auto), missing], [(a=1, b=2), (a=3, b=4), (a=5, b=6)], [(a=1, b=2), (a=3, b=4), missing]) gdf = groupby_checked(DataFrame(g=[1, 1, 1], x=col), :g) if fun === length diff --git a/test/indexing.jl b/test/indexing.jl index 845a4c9a0e..feaefe27fe 100644 --- a/test/indexing.jl +++ b/test/indexing.jl @@ -562,7 +562,7 @@ end end @testset "additional tests of post-! getindex rules" begin - df = DataFrame(reshape(1.5:16.5, (4,4))) + df = DataFrame(reshape(1.5:16.5, (4,4)), :auto) @test df[2,2] == df[!, 2][2] == 6.5 @test_throws BoundsError df[0,2] @@ -1089,7 +1089,7 @@ end df = DataFrame(a=1:3, b=4:6, c=7:9) df2 = df[!, :] @test_throws MethodError df[1:2, 1:2] = 1 - @test_throws ArgumentError df[1:2, 1:2] = DataFrame(ones(2,2)) + @test_throws ArgumentError df[1:2, 1:2] = DataFrame(ones(2,2), :auto) @test df == DataFrame(a=1:3, b=4:6, c=7:9) df[:, :] = DataFrame(a=11:13, b=14:16, c=17:19) @test df2 == DataFrame(a=11:13, b=14:16, c=17:19) @@ -1340,7 +1340,7 @@ end @test_throws DimensionMismatch df[1:2, 1:2] = m @test_throws MethodError sdf[row_sel, col_sel] = 1 - @test_throws ArgumentError sdf[row_sel, col_sel] = DataFrame(ones(3, 3)) + @test_throws ArgumentError sdf[row_sel, col_sel] = DataFrame(ones(3, 3), :auto) @test (sdf[row_sel, col_sel] = df2) == df2 @test df == df2 end @@ -1561,68 +1561,68 @@ end end @testset "setindex! with ! or : and multiple cols" begin - df = DataFrame(fill("x", 3, 4)) - df[!, :] = DataFrame(reshape(1:12, 3, :)) - @test df == DataFrame(reshape(1:12, 3, :)) - @test_throws ArgumentError df[!, :] = DataFrame(fill(1, 3, 4))[:, [3,2,1]] - @test_throws ArgumentError df[!, :] = DataFrame(fill(1, 3, 4))[1:2, :] + df = DataFrame(fill("x", 3, 4), :auto) + df[!, :] = DataFrame(reshape(1:12, 3, :), :auto) + @test df == DataFrame(reshape(1:12, 3, :), :auto) + @test_throws ArgumentError df[!, :] = DataFrame(fill(1, 3, 4), :auto)[:, [3,2,1]] + @test_throws ArgumentError df[!, :] = DataFrame(fill(1, 3, 4), :auto)[1:2, :] - df = DataFrame(fill("x", 3, 4)) - df[!, Not(4)] = DataFrame(reshape(1:12, 3, :))[:, 1:3] - @test df[:, 1:3] == DataFrame(reshape(1:12, 3, :))[:, 1:3] + df = DataFrame(fill("x", 3, 4), :auto) + df[!, Not(4)] = DataFrame(reshape(1:12, 3, :), :auto)[:, 1:3] + @test df[:, 1:3] == DataFrame(reshape(1:12, 3, :), :auto)[:, 1:3] - df = DataFrame(fill("x", 3, 4)) + df = DataFrame(fill("x", 3, 4), :auto) df[!, :] = reshape(1:12, 3, :) - @test df == DataFrame(reshape(1:12, 3, :)) + @test df == DataFrame(reshape(1:12, 3, :), :auto) - df = DataFrame(fill("x", 3, 4)) + df = DataFrame(fill("x", 3, 4), :auto) df[!, Not(4)] = reshape(1:12, 3, :)[:, 1:3] - @test df[:, 1:3] == DataFrame(reshape(1:12, 3, :))[:, 1:3] + @test df[:, 1:3] == DataFrame(reshape(1:12, 3, :), :auto)[:, 1:3] dfv = view(df, :, :) - @test_throws ArgumentError dfv[!, :] = DataFrame(reshape(1:12, 3, :)) + @test_throws ArgumentError dfv[!, :] = DataFrame(reshape(1:12, 3, :), :auto) @test_throws ArgumentError dfv[!, :] = reshape(1:12, 3, :) for rows in [:, 1:3], cols in [:, r"", Not(r"xx"), 1:4] - df = DataFrame(ones(3,4)) - df[rows, cols] = DataFrame(reshape(1:12, 3, :)) - @test df == DataFrame(reshape(1:12, 3, :)) + df = DataFrame(ones(3,4), :auto) + df[rows, cols] = DataFrame(reshape(1:12, 3, :), :auto) + @test df == DataFrame(reshape(1:12, 3, :), :auto) end for rows in [:, 1:3], cols in [:, r"", Not(r"xx"), 1:4] - df = DataFrame(ones(3,4)) + df = DataFrame(ones(3,4), :auto) df[rows, cols] = reshape(1:12, 3, :) - @test df == DataFrame(reshape(1:12, 3, :)) + @test df == DataFrame(reshape(1:12, 3, :), :auto) end end @testset "additional setindex! tests" begin - df = DataFrame(reshape(1:12, 4, :)) + df = DataFrame(reshape(1:12, 4, :), :auto) df[1:2, :] = df[3:4, :] @test df == DataFrame([3 7 11 - 4 8 12 - 3 7 11 - 4 8 12]) + 4 8 12 + 3 7 11 + 4 8 12], :auto) df[[true,false,true,false], :] = df[[2,4], :] @test df == DataFrame([4 8 12 - 4 8 12 - 4 8 12 - 4 8 12]) + 4 8 12 + 4 8 12 + 4 8 12], :auto) @test_throws MethodError df[1, :] = 1 df[:, 2] = ones(4) @test df == DataFrame([4 1 12 - 4 1 12 - 4 1 12 - 4 1 12]) + 4 1 12 + 4 1 12 + 4 1 12], :auto) @test_throws InexactError df[:, 2] = fill(1.5, 4) end @testset "invalid view tests" begin - dfr = DataFrame(ones(2,3)) + dfr = DataFrame(ones(2,3), :auto) for df in (dfr, view(dfr, 1:2, 1:3)) for r in (1, 1:1) @test_throws BoundsError view(df, r, 0:1) @@ -1785,10 +1785,10 @@ end end @testset "old setindex! tests" begin - df = DataFrame(reshape(1:12, 4, :)) + df = DataFrame(reshape(1:12, 4, :), :auto) @test_throws MethodError df[1, :] = df[1:1, :] - df = DataFrame(reshape(1:12, 4, :)) + df = DataFrame(reshape(1:12, 4, :), :auto) # Scalar broadcasting assignment of rows @test_throws MethodError df[1:2, :] = 1 @@ -1818,13 +1818,13 @@ end @test_throws MethodError df[[true,false,false,true], 2:3] = [2,3] # test of 1-row DataFrame assignment - df = DataFrame([1 2 3]) - @test_throws MethodError df[1, 2:3] = DataFrame([11 12]) - @test_throws MethodError df[1, [false, true, true]] = DataFrame([11 12]) + df = DataFrame([1 2 3], :auto) + @test_throws MethodError df[1, 2:3] = DataFrame([11 12], :auto) + @test_throws MethodError df[1, [false, true, true]] = DataFrame([11 12], :auto) end @testset "cornercase of view indexing" begin - df = DataFrame(reshape(1:12, 4, :)) + df = DataFrame(reshape(1:12, 4, :), :auto) dfr = df[1, 3:2] for idx in [:x1, :x2, :x3, :x4] @test_throws ArgumentError dfr[idx] diff --git a/test/indexing_begin_tests.jl b/test/indexing_begin_tests.jl index 772e0afe68..75a7a8c64a 100644 --- a/test/indexing_begin_tests.jl +++ b/test/indexing_begin_tests.jl @@ -1,5 +1,5 @@ @testset "begin and end tests" begin - df = DataFrame([(i, j) for i in 1:3, j in 1:4]) + df = DataFrame([(i, j) for i in 1:3, j in 1:4], :auto) @test df[begin, begin] == df[1, 1] @test df[begin, end] == df[1, 4] @test df[end, begin] == df[3, 1] diff --git a/test/io.jl b/test/io.jl index 99bf28dc88..aaeaa57259 100644 --- a/test/io.jl +++ b/test/io.jl @@ -30,7 +30,7 @@ using Test, DataFrames, CategoricalArrays, Dates, Markdown @test repr(MIME("text/latex"), eachrow(df)) == str @test_throws ArgumentError DataFrames._show(stdout, MIME("text/latex"), - DataFrame(ones(2,2)), rowid=10) + DataFrame(ones(2,2), :auto), rowid=10) end @testset "Huge LaTeX export" begin @@ -131,7 +131,7 @@ end "1#undef1.5" @test_throws ArgumentError DataFrames._show(stdout, MIME("text/html"), - DataFrame(ones(2,2)), rowid=10) + DataFrame(ones(2,2), :auto), rowid=10) df = DataFrame( A=Int64[1,4,9,16], @@ -343,7 +343,7 @@ end end @testset "summary tests" begin - df = DataFrame(ones(2,3)) + df = DataFrame(ones(2,3), :auto) for (v, s) in [(df, "2×3 DataFrame"), (view(df, :, :), "2×3 SubDataFrame"), diff --git a/test/iteration.jl b/test/iteration.jl index 7ef71e643e..3d70c3334e 100644 --- a/test/iteration.jl +++ b/test/iteration.jl @@ -98,7 +98,7 @@ end end @testset "SubDataFrame" begin - df = DataFrame([11:16 21:26 31:36 41:46]) + df = DataFrame([11:16 21:26 31:36 41:46], :auto) sdf = view(df, [3,1,4], [3,1,4]) @test sdf == df[[3,1,4], [3,1,4]] @test eachrow(sdf) == eachrow(df[[3,1,4], [3,1,4]]) @@ -108,7 +108,7 @@ end end @testset "parent mutation" begin - df = DataFrame([11:16 21:26 31:36 41:46]) + df = DataFrame([11:16 21:26 31:36 41:46], :auto) sdf = view(df, [3,1,4], [3,1,4]) erd = eachrow(df) erv = eachrow(sdf) @@ -123,7 +123,7 @@ end end @testset "getproperty and propertynames" begin - df_base = DataFrame([11:16 21:26 31:36 41:46]) + df_base = DataFrame([11:16 21:26 31:36 41:46], :auto) for df in (df_base, view(df_base, 1:3, 1:3)) for x in (eachcol(df), eachrow(df)) @test propertynames(x) == propertynames(df) @@ -142,7 +142,7 @@ end end @testset "keys, values and pairs for eachcol" begin - df = DataFrame([11:16 21:26 31:36 41:46]) + df = DataFrame([11:16 21:26 31:36 41:46], :auto) cols = eachcol(df) diff --git a/test/reshape.jl b/test/reshape.jl index ff6e7e6b1e..0c1cb27f79 100644 --- a/test/reshape.jl +++ b/test/reshape.jl @@ -83,7 +83,7 @@ const ≅ = isequal @test_throws ArgumentError unstack(df, Int[], :Key, :Value) @test_throws ArgumentError unstack(df, r"xxxxx", :Key, :Value) @test_throws ArgumentError unstack(df, Symbol[], :Key, :Value) - @test_throws ArgumentError unstack(stack(DataFrame(rand(10, 10))), + @test_throws ArgumentError unstack(stack(DataFrame(rand(10, 10), :auto)), :id, :variable, :value) @test_throws TypeError unstack(df, :Key, :Value, renamecols=Symbol) @@ -187,7 +187,7 @@ end @testset "stack-unstack correctness" begin Random.seed!(1234) - x = DataFrame(rand(100, 50)) + x = DataFrame(rand(100, 50), :auto) x[!, :id] = [1:99; missing] x[!, :id2] = string.("a", x[!, :id]) x[!, :s] = [i % 2 == 0 ? randstring() : missing for i in 1:100] @@ -329,13 +329,13 @@ end @test d1us3 == unstack(d1s2) # test unstack with exactly one key column that is not passed - df1 = stack(DataFrame(rand(10, 10))) + df1 = stack(DataFrame(rand(10,10), :auto)) df1[!, :id] = 1:100 @test size(unstack(df1, :variable, :value)) == (100, 11) @test unstack(df1, :variable, :value) ≅ unstack(df1) # test empty keycol - @test_throws ArgumentError unstack(stack(DataFrame(rand(3, 2))), :variable, :value) + @test_throws ArgumentError unstack(stack(DataFrame(rand(3,2), :auto)), :variable, :value) end @testset "column names duplicates" begin @@ -477,7 +477,7 @@ end end @testset "test stack eltype" begin - df = DataFrame(rand(4, 5)) + df = DataFrame(rand(4,5), :auto) sdf = stack(df) @test eltype(sdf.variable) === String @test eltype(typeof(sdf.variable)) === String diff --git a/test/select.jl b/test/select.jl index fe612794de..169bdee722 100644 --- a/test/select.jl +++ b/test/select.jl @@ -568,7 +568,7 @@ end end @testset "select and select! with multiple selectors passed" begin - df = DataFrame(rand(10, 4)) + df = DataFrame(rand(10, 4), :auto) @test select(df, :x2, :x4, All()) == select(df, :x2, :x4, :x1, :x3) @test select(df, :x2, :x4, Cols(:)) == select(df, :x2, :x4, :x1, :x3) @test select(df, :x2, :x4, Cols()) == select(df, :x2, :x4) @@ -600,7 +600,7 @@ end end @testset "select and select! renaming" begin - df = DataFrame(rand(10, 4)) + df = DataFrame(rand(10, 4), :auto) @test select(df, :x1 => :x2, :x2 => :x1) == rename(df[:, 1:2], [:x2, :x1]) @test select(df, :x2 => :x1, :x1 => :x2) == DataFrame(x1=df.x2, x2=df.x1) @test_throws ArgumentError select(df, [:x1, :x2] => :x3) @@ -628,16 +628,16 @@ end @test x2 === df.x1 @test names(df) == ["x2", "x1"] - df = DataFrame(rand(10, 4)) + df = DataFrame(rand(10, 4), :auto) select!(df, :x1, :x1 => :x2) @test df2.x1 === df2.x2 - df = DataFrame(rand(10, 4)) + df = DataFrame(rand(10, 4), :auto) df2 = select(df, :, :x1 => :x3) - @test df2 == DataFrame(collect(eachcol(df))[[1,2,1,4]]) + @test df2 == DataFrame(collect(eachcol(df))[[1,2,1,4]], :auto) @test df2.x1 !== df2.x3 df2 = select(df, :, :x1 => :x3, copycols=false) - @test df2 == DataFrame(collect(eachcol(df))[[1,2,1,4]]) + @test df2 == DataFrame(collect(eachcol(df))[[1,2,1,4]], :auto) @test df2.x1 === df2.x3 @test select(df, :x1 => :x3, :) == DataFrame(collect(eachcol(df))[[1,1,2,4]], [:x3, :x1, :x2, :x4]) @@ -647,7 +647,7 @@ end end @testset "select and select! many columns naming" begin - df = DataFrame(rand(10, 4)) + df = DataFrame(rand(10, 4), :auto) for fun in (+, ByRow(+)), copycols in [true, false] @test select(df, 1 => fun, copycols=copycols) == DataFrame(Symbol("x1_+") => df.x1) @@ -675,7 +675,7 @@ end end @testset "select and select! many different transforms" begin - df = DataFrame(rand(10, 4)) + df = DataFrame(rand(10, 4), :auto) df2 = select(df, :x2, :, :x1 => ByRow(x -> x^2) => :r1, :x1 => (x -> x .^ 2) => :r2, [:x1, :x2] => (+) => :x1, 1:2 => ByRow(/) => :x3, :x1 => :x4) @@ -715,7 +715,7 @@ end end @testset "nrow in select" begin - df_ref = DataFrame(ones(3,4)) + df_ref = DataFrame(ones(3,4), :auto) for df in [df_ref, view(df_ref, 1:2, 1:2), df_ref[1:2, []], view(df_ref, 1:2, []), df_ref[[], 1:2], view(df_ref, [], 1:2)] @@ -769,7 +769,7 @@ end end @testset "select and select! empty selection" begin - df = DataFrame(rand(10, 4)) + df = DataFrame(rand(10, 4), :auto) x = [1:10;] y = [1,2,3] @@ -796,7 +796,7 @@ end end @testset "wrong selection patterns" begin - df = DataFrame(rand(10, 4)) + df = DataFrame(rand(10, 4), :auto) @test_throws ArgumentError select(df, "z") @test_throws ArgumentError select(df, "z" => :x1) @@ -805,7 +805,7 @@ end end @testset "select and select! duplicates" begin - df = DataFrame(rand(10, 4)) + df = DataFrame(rand(10, 4), :auto) df_ref = copy(df) @test_throws ArgumentError select(df, :x1, :x1) @@ -826,14 +826,14 @@ end select!(df, :x2, r"x", :x1, :) @test df == df_ref[:, [:x2, :x1, :x3, :x4]] - df = DataFrame(rand(10, 2)) + df = DataFrame(rand(10, 2), [:x1, :x2]) @test select(df, [:x1, :x1] => -) == DataFrame(Symbol("x1_x1_-") => zeros(10)) select!(df, [:x1, :x1] => -) @test df == DataFrame(Symbol("x1_x1_-") => zeros(10)) end @testset "SubDataFrame selection" begin - df = DataFrame(rand(12, 5)) + df = DataFrame(rand(12, 5), :auto) sdf = view(df, 1:10, 1:4) df_ref = copy(sdf) @@ -865,8 +865,8 @@ end @testset "pseudo-broadcasting" begin df = DataFrame([1 2 3 - 4 5 6]) - df2 = DataFrame([1 2 3]) + 4 5 6], :auto) + df2 = DataFrame([1 2 3], :auto) df3 = DataFrame(x1=Char[], x2=Int[], x3=Int[]) for v in [9, Ref(9), view([9], 1)] @test select(df, [] => (() -> v) => :a, :, (:) => (+) => :d) == @@ -890,15 +890,15 @@ end @test select(df2, [] => (() -> v) => :a, :, (:) => (+) => :d) == DataFrame([9 1 2 3 6], [:a, :x1, :x2, :x3, :d]) @test select(df2, (:) => (+) => :d, :, r"z" => (() -> v) => :a) == - DataFrame([6 1 2 3 9], [:d, :x1, :x2, :x3, :a]) + DataFrame([6 1 2 3 9], [:d, :x1, :x2, :x3, :a]) @test select(df2, [] => (() -> v) => :a, :x1 => :b, (:) => (+) => :d) == DataFrame([9 1 6], [:a, :b, :d]) @test select(df2, (:) => (+) => :d, :x1 => :b, [] => (() -> v) => :a) == - DataFrame([6 1 9], [:d, :b, :a]) + DataFrame([6 1 9], [:d, :b, :a]) @test select(df2, [] => (() -> v) => :a, :x1 => (x -> x) => :b, (:) => (+) => :d) == DataFrame([9 1 6], [:a, :b, :d]) @test select(df2, (:) => (+) => :d, :x1 => (x -> x) => :b, [] => (() -> v) => :a) == - DataFrame([6 1 9], [:d, :b, :a]) + DataFrame([6 1 9], [:d, :b, :a]) @test isequal_coltyped(select(df3, [] => (() -> v) => :a, :x1 => x -> []), DataFrame(a=Int[], x1_function=Any[])) @@ -912,17 +912,17 @@ end @test_throws ArgumentError select(df, [] => (() -> [9]) => :a, :) @test_throws ArgumentError select(df, :, [] => (() -> [9]) => :a) @test transform(df, names(df) .=> (x -> 9) .=> names(df)) == - repeat(DataFrame([9 9 9]), nrow(df)) + repeat(DataFrame([9 9 9], :auto), nrow(df)) @test combine(df, names(df) .=> (x -> 9) .=> names(df)) == - DataFrame([9 9 9]) + DataFrame([9 9 9], :auto) @test transform(df, names(df) .=> (x -> 9) .=> names(df), :x1 => :x4) == - DataFrame([9 9 9 1; 9 9 9 4]) + DataFrame([9 9 9 1; 9 9 9 4], :auto) @test transform(df3, names(df3) .=> (x -> 9) .=> names(df3)) == - repeat(DataFrame([9 9 9]), nrow(df3)) + repeat(DataFrame([9 9 9], :auto), nrow(df3)) @test combine(df3, names(df3) .=> (x -> 9) .=> names(df3)) == - DataFrame([9 9 9]) + DataFrame([9 9 9], :auto) @test transform(df3, names(df3) .=> (x -> 9) .=> names(df3), :x1 => :x4) == - DataFrame(ones(0, 4)) + DataFrame(ones(0, 4), :auto) df = DataFrame(x1=1:2, x2=categorical(1:2), x3=[missing,2], x4=categorical([missing, 2])) @@ -1055,7 +1055,7 @@ end end @testset "empty select" begin - df_ref = DataFrame(rand(10, 4)) + df_ref = DataFrame(rand(10, 4), :auto) for df in (df_ref, view(df_ref, 1:9, 1:3)) @test ncol(select(df)) == 0 @@ -1066,7 +1066,7 @@ end end @testset "transform and transform!" begin - df = DataFrame(rand(10,4)) + df = DataFrame(rand(10, 4), :auto) for dfx in (df, view(df, :, :)) df2 = transform(dfx, [:x1, :x2] => +, :x2 => :x3) diff --git a/test/show.jl b/test/show.jl index 94eb6b47bf..2fe08caf3b 100644 --- a/test/show.jl +++ b/test/show.jl @@ -59,7 +59,8 @@ end end @testset "displaysize test" begin - df_big = DataFrame(reshape(Int64(10000001):Int64(10000000+25*5), 25, 5)) + df_big = DataFrame(reshape(Int64(10000001):Int64(10000000+25*5), 25, 5), + :auto) io = IOContext(IOBuffer(), :displaysize=>(11,40), :limit=>true) show(io, df_big) @@ -205,7 +206,7 @@ end @test str1 == str2 Random.seed!(1) - df_big = DataFrame(rand(25,5)) + df_big = DataFrame(rand(25,5), :auto) str1, size = capture_stdout() do show(df_big) end diff --git a/test/sort.jl b/test/sort.jl index 33f89a70f8..f552279b77 100644 --- a/test/sort.jl +++ b/test/sort.jl @@ -98,7 +98,7 @@ using DataFrames, Random, Test, CategoricalArrays Random.seed!(1) # here there will be probably no ties - df_rand1 = DataFrame(rand(100, 4)) + df_rand1 = DataFrame(rand(100, 4), :auto) # but here we know we will have ties df_rand2 = copy(df_rand1) df_rand2.x1 = shuffle([fill(1, 50); fill(2, 50)]) @@ -140,7 +140,7 @@ end @testset "non standard selectors" begin Random.seed!(1234) - df = DataFrame(rand(1:2, 1000, 4)) + df = DataFrame(rand(1:2, 1000, 4), :auto) for f in [sort, sort!, sortperm, issorted] @test f(df) == f(df, :) == f(df, All()) == f(df, Cols(:)) == f(df, r"x") == f(df, Between(1, 4)) == f(df, Not([])) @@ -148,7 +148,7 @@ end end @testset "view kwarg test" begin - df = DataFrame(rand(3,4)) + df = DataFrame(rand(3,4), :auto) @test sort(df) isa DataFrame @inferred sort(df) @test sort(view(df, 1:2, 1:2)) isa DataFrame diff --git a/test/subdataframe.jl b/test/subdataframe.jl index a02be46fcf..85a1b8d3c6 100644 --- a/test/subdataframe.jl +++ b/test/subdataframe.jl @@ -215,7 +215,7 @@ end @test names(DataFrames.index(df2)) == ["y"] @test DataFrames._names(DataFrames.index(df2)) == [:y] - x = DataFrame(ones(5,4)) + x = DataFrame(ones(5,4), :auto) df = view(x, 2:3, 2:3) @test names(df) == names(x)[2:3] df = view(x, 2:3, [4,2]) @@ -241,12 +241,12 @@ end end @testset "duplicate column" begin - df = DataFrame([11:16 21:26 31:36 41:46]) + df = DataFrame([11:16 21:26 31:36 41:46], :auto) @test_throws ArgumentError view(df, [3,1,4], [3,3,3]) end @testset "conversion to DataFrame" begin - df = DataFrame([11:16 21:26 31:36 41:46]) + df = DataFrame([11:16 21:26 31:36 41:46], :auto) sdf = view(df, [3,1,4], [3,2,1]) df2 = DataFrame(sdf) @test df2 isa DataFrame diff --git a/test/tables.jl b/test/tables.jl index 7b3673e189..e03c008fa4 100644 --- a/test/tables.jl +++ b/test/tables.jl @@ -196,7 +196,7 @@ end end @testset "columnindex" begin - df = DataFrame(rand(3,4)) + df = DataFrame(rand(3,4), :auto) for x in (df, view(df, 1, :), view(df, 1:1, :)) @test columnindex.(Ref(x), names(df)) == 1:4 @@ -264,4 +264,19 @@ end @test Tables.columnnames(eachrow(df)) == Tables.columnnames(df) end +@testset "test constructor with vectors" begin + @test DataFrame(Any[]) == DataFrame() + @test DataFrame(Vector[], :auto) == DataFrame() + @test DataFrame(Pair{Symbol, Vector}[], :auto) == DataFrame() + @test DataFrame(Pair[]) == DataFrame() + @test DataFrame([[1]], :auto) == DataFrame(x1=1) + @test DataFrame(Any[[1]], :auto) == DataFrame(x1=1) + @test DataFrame([:a => [1]]) == DataFrame(a=1) + @test DataFrame(Any[:a => [1]]) == DataFrame(a=1) + @test DataFrame(["a" => [1]]) == DataFrame(a=1) + @test DataFrame(Any["a" => [1]]) == DataFrame(a=1) + @test DataFrame([SubString("a", 1) => [1]]) == DataFrame(a=1) + @test DataFrame(Any[SubString("a", 1) => [1]]) == DataFrame(a=1) +end + end # module