Merge b52dadb into b11fe97

JuliaData · Oct 13, 2020 · a90e7fc · a90e7fc
2 parents b11fe97 + b52dadb
commit a90e7fc
Show file tree

Hide file tree

Showing 23 changed files with 531 additions and 556 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -80,6 +80,8 @@
 ## Deprecated
 
 * `DataFrame!` is now deprecated ([#2338](https://github.com/JuliaData/DataFrames.jl/pull/2338))
+* several in-standard `DataFrame` constructors are now deprecated
+  ([#2464](https://github.com/JuliaData/DataFrames.jl/pull/2464))
 * all old deprecations now throw an error
   ([#2350](https://github.com/JuliaData/DataFrames.jl/pull/2350))
 

diff --git a/Project.toml b/Project.toml
@@ -44,5 +44,5 @@ Missings = "0.4.2"
 PooledArrays = "0.5"
 Reexport = "0.1, 0.2"
 SortingAlgorithms = "0.1, 0.2, 0.3"
-Tables = "1"
+Tables = "1.1"
 TableTraits = "0.4, 1"
diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl
@@ -12,32 +12,20 @@ DataFrame(columns::AbstractVector, names::AbstractVector{Symbol};
           makeunique::Bool=false, copycols::Bool=true)
 DataFrame(columns::AbstractVector, names::AbstractVector{<:AbstractString};
           makeunique::Bool=false, copycols::Bool=true)
-DataFrame(columns::NTuple{N,AbstractVector}, names::NTuple{N,Symbol};
-          makeunique::Bool=false, copycols::Bool=true)
-DataFrame(columns::NTuple{N,AbstractVector}, names::NTuple{N,<:AbstractString};
-          makeunique::Bool=false, copycols::Bool=true)
-DataFrame(columns::Matrix, names::AbstractVector{Symbol}; makeunique::Bool=false)
-DataFrame(columns::Matrix, names::AbstractVector{<:AbstractString};
-          makeunique::Bool=false)
 DataFrame(kwargs...)
 DataFrame(pairs::Pair{Symbol,<:Any}...; makeunique::Bool=false, copycols::Bool=true)
 DataFrame(pairs::Pair{<:AbstractString,<:Any}...; makeunique::Bool=false,
           copycols::Bool=true)
 DataFrame(pairs::AbstractVector{<:Pair}; makeunique::Bool=false, copycols::Bool=true)
-DataFrame(pairs::NTuple{N, Pair}; makeunique::Bool=false, copycols::Bool=true) where {N}
-DataFrame() # an empty DataFrame
-DataFrame(column_eltypes::AbstractVector, names::AbstractVector{Symbol},
-          nrows::Integer=0; makeunique::Bool=false)
-DataFrame(column_eltypes::AbstractVector, names::AbstractVector{<:AbstractString},
-          nrows::Integer=0; makeunique::Bool=false)
 DataFrame(ds::AbstractDict; copycols::Bool=true)
 DataFrame(table; makeunique::Bool=false, copycols::Bool=true)
 DataFrame(::Union{DataFrame, SubDataFrame}; copycols::Bool=true)
 DataFrame(::GroupedDataFrame; keepkeys::Bool=true)
+DataFrame() # an empty DataFrame
 ```
 
 # Arguments
-- `columns` : a Vector with each column as contents or a Matrix
+- `columns` : a vector with each column as contents
 - `names` : the column names
 - `makeunique` : if `false` (the default), an error will be raised
   if duplicates in `names` are found; if `true`, duplicate names will be suffixed
@@ -46,26 +34,24 @@ DataFrame(::GroupedDataFrame; keepkeys::Bool=true)
   column contents; note that the `copycols` keyword argument indicates if
   if vectors passed as columns should be copied so it is not possible to create
   a column whose name is `:copycols` using this constructor
-- `t` : elemental type of all columns
-- `nrows`, `ncols` : number of rows and columns
+- `nrows` : number of rows
 - `column_eltypes` : element type of each column
-- `categorical` : a vector of `Bool` indicating which columns should be converted
-  to `CategoricalVector`
 - `ds` : `AbstractDict` of columns
 - `table` : any type that implements the
   [Tables.jl](https://github.com/JuliaData/Tables.jl) interface
 - `copycols` : whether vectors passed as columns should be copied; if set
   to `false` then the constructor will still copy the passed columns
   if it is not possible to construct a `DataFrame` without materializing new columns.
+- `keepkeys` : whether the resulting `DataFrame` should contain the grouping columns
+  of a `GroupedDataFrame`
 
 All columns in `columns` must be `AbstractVector`s and have the same length. An
-exception are `DataFrame(kwargs...)`, `DataFrame(pairs::Pair...)`,
-`DataFrame(pairs::AbstractVector{<:Pair})`, and `DataFrame(pairs::NTuple{N, Pair})` form
-constructors which additionally allow a column to be of any other type that is
-not an `AbstractArray`, in which case the passed value is automatically repeated
-to fill a new vector of the appropriate length. As a particular rule values
-stored in a `Ref` or a `0`-dimensional `AbstractArray` are unwrapped and treated
-in the same way.
+exception are `DataFrame(kwargs...)`, `DataFrame(pairs::Pair...)`, and
+`DataFrame(pairs::AbstractVector{<:Pair})` form constructors which additionally
+allow a column to be of any other type that is not an `AbstractArray`, in which
+case the passed value is automatically repeated to fill a new vector of the
+appropriate length. As a particular rule values stored in a `Ref` or a
+`0`-dimensional `AbstractArray` are unwrapped and treated in the same way.
 
 Additionally `DataFrame` can be used to collect a [`GroupedDataFrame`](@ref)
 into a `DataFrame`. In this case the order of rows in the result follows the order
@@ -85,28 +71,53 @@ performance-critical code, do not index into a `DataFrame` inside of loops.
 
 # Examples
 ```julia
-df = DataFrame()
-v = ["x","y","z"][rand(1:3, 10)]
-df1 = DataFrame(Any[collect(1:10), v, rand(10)], [:A, :B, :C])
-df2 = DataFrame(A = 1:10, B = v, C = rand(10))
-summary(df1)
-describe(df2)
-first(df1, 10)
-df1.B
-df2[!, :C]
-df1[:, :A]
-df1[1:4, 1:2]
-df1[Not(1:4), Not(1:2)]
-df1[1:2, [:A,:C]]
-df1[1:2, r"[AC]"]
-df1[:, [:A,:C]]
-df1[:, [1,3]]
-df1[1:4, :]
-df1[1:4, :C]
-df1[1:4, :C] = 40. * df1[1:4, :C]
-[df1; df2]  # vcat
-[df1 df2]  # hcat
-size(df1)
+julia> DataFrame(a=1:2, b=0)
+2×2 DataFrame
+│ Row │ a     │ b     │
+│     │ Int64 │ Int64 │
+├─────┼───────┼───────┤
+│ 1   │ 1     │ 0     │
+│ 2   │ 2     │ 0     │
+
+julia> DataFrame("a" => 1:2, "b" => 0)
+2×2 DataFrame
+│ Row │ a     │ b     │
+│     │ Int64 │ Int64 │
+├─────┼───────┼───────┤
+│ 1   │ 1     │ 0     │
+│ 2   │ 2     │ 0     │
+
+julia> DataFrame([[1, 2], [0, 0]], [:a, :b])
+2×2 DataFrame
+│ Row │ a     │ b     │
+│     │ Int64 │ Int64 │
+├─────┼───────┼───────┤
+│ 1   │ 1     │ 0     │
+│ 2   │ 2     │ 0     │
+
+julia> DataFrame((a=[1, 2], b=[0, 0]))
+2×2 DataFrame
+│ Row │ a     │ b     │
+│     │ Int64 │ Int64 │
+├─────┼───────┼───────┤
+│ 1   │ 1     │ 0     │
+│ 2   │ 2     │ 0     │
+
+julia> DataFrame([(a=1, b=0), (a=2, b=0)])
+2×2 DataFrame
+│ Row │ a     │ b     │
+│     │ Int64 │ Int64 │
+├─────┼───────┼───────┤
+│ 1   │ 1     │ 0     │
+│ 2   │ 2     │ 0     │
+
+julia> DataFrame(Tables.table([1 0; 2 0], header=[:a, :b]))
+2×2 DataFrame
+│ Row │ a     │ b     │
+│     │ Int64 │ Int64 │
+├─────┼───────┼───────┤
+│ 1   │ 1     │ 0     │
+│ 2   │ 2     │ 0     │
 ```
 """
 struct DataFrame <: AbstractDataFrame
@@ -182,14 +193,15 @@ function DataFrame(pairs::Pair{<:AbstractString,<:Any}...; makeunique::Bool=fals
                      copycols=copycols)
 end
 
-# these two are needed as a workaround Tables.jl dispatch
-DataFrame(pairs::AbstractVector{<:Pair}; makeunique::Bool=false,
-          copycols::Bool=true) =
-    DataFrame(pairs..., makeunique=makeunique, copycols=copycols)
-
-DataFrame(pairs::NTuple{N, Pair}; makeunique::Bool=false,
-          copycols::Bool=true) where {N} =
-    DataFrame(pairs..., makeunique=makeunique, copycols=copycols)
+# this is needed as a workaround for Tables.jl dispatch
+function DataFrame(pairs::AbstractVector{<:Pair}; makeunique::Bool=false,
+                   copycols::Bool=true)
+    if isempty(pairs)
+        return DataFrame()
+    else
+        return DataFrame(pairs..., makeunique=makeunique, copycols=copycols)
+    end
+end
 
 function DataFrame(d::AbstractDict; copycols::Bool=true)
     if isa(d, Dict)
@@ -251,44 +263,6 @@ DataFrame(columns::AbstractVector{<:AbstractVector},
           makeunique::Bool=false, copycols::Bool=true) =
     DataFrame(columns, Symbol.(cnames); makeunique=makeunique, copycols=copycols)
 
-DataFrame(columns::NTuple{N, AbstractVector}, cnames::NTuple{N, Symbol};
-          makeunique::Bool=false, copycols::Bool=true) where {N} =
-    DataFrame(collect(AbstractVector, columns), collect(Symbol, cnames),
-              makeunique=makeunique, copycols=copycols)
-
-DataFrame(columns::NTuple{N, AbstractVector}, cnames::NTuple{N, AbstractString};
-          makeunique::Bool=false, copycols::Bool=true) where {N} =
-    DataFrame(columns, Symbol.(cnames); makeunique=makeunique, copycols=copycols)
-
-DataFrame(columns::NTuple{N, AbstractVector}; copycols::Bool=true) where {N} =
-    DataFrame(collect(AbstractVector, columns), gennames(length(columns)),
-              copycols=copycols)
-
-DataFrame(columns::AbstractMatrix,
-          cnames::AbstractVector{Symbol} = gennames(size(columns, 2));
-          makeunique::Bool=false) =
-    DataFrame(AbstractVector[columns[:, i] for i in 1:size(columns, 2)], cnames,
-              makeunique=makeunique, copycols=false)
-
-DataFrame(columns::AbstractMatrix, cnames::AbstractVector{<:AbstractString};
-          makeunique::Bool=false) =
-    DataFrame(columns, Symbol.(cnames); makeunique=makeunique)
-
-function DataFrame(column_eltypes::AbstractVector{T}, cnames::AbstractVector{Symbol},
-                   nrows::Integer=0; makeunique::Bool=false)::DataFrame where T<:Type
-    columns = AbstractVector[elty >: Missing ?
-                             fill!(Tables.allocatecolumn(elty, nrows), missing) :
-                             Tables.allocatecolumn(elty, nrows)
-                             for elty in column_eltypes]
-    return DataFrame(columns, Index(convert(Vector{Symbol}, cnames),
-                     makeunique=makeunique), copycols=false)
-end
-
-DataFrame(column_eltypes::AbstractVector{<:Type},
-          cnames::AbstractVector{<:AbstractString},
-          nrows::Integer=0; makeunique::Bool=false) =
-    DataFrame(column_eltypes, Symbol.(cnames), nrows; makeunique=makeunique)
-
 ##############################################################################
 ##
 ## AbstractDataFrame interface
@@ -1177,8 +1151,6 @@ function Base.append!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol=:sete
     return df1
 end
 
-Base.convert(::Type{DataFrame}, A::AbstractMatrix) = DataFrame(A)
-
 Base.convert(::Type{DataFrame}, d::AbstractDict) = DataFrame(d, copycols=false)
 
 function Base.push!(df::DataFrame, row::Union{AbstractDict, NamedTuple};

diff --git a/src/deprecated.jl b/src/deprecated.jl
@@ -105,3 +105,43 @@ function categorical!(df::DataFrame, cols::Union{Type, Nothing}=nothing;
     end
     return transform!(df, names(df, cols) .=> (x -> categorical(x, compress=compress)), renamecols=false)
 end
+
+@deprecate DataFrame(pairs::NTuple{N, Pair}; makeunique::Bool=false,
+          copycols::Bool=true) where {N} DataFrame(pairs..., makeunique=makeunique, copycols=copycols)
+@deprecate DataFrame(columns::NTuple{N, AbstractVector}, cnames::NTuple{N, Symbol}; makeunique::Bool=false,
+          copycols::Bool=true) where {N} DataFrame(collect(columns), collect(cnames);
+              makeunique=makeunique, copycols=copycols)
+@deprecate DataFrame(columns::NTuple{N, AbstractVector}, cnames::NTuple{N, AbstractString}; makeunique::Bool=false,
+                     copycols::Bool=true) where {N} DataFrame(collect(columns), [Symbol(c) for c in cnames];
+                                                              makeunique=makeunique, copycols=copycols)
+@deprecate DataFrame(columns::NTuple{N, AbstractVector};
+                     copycols::Bool=true) where {N} DataFrame(collect(columns),
+                                                              Symbol.(:x, 1:length(columns)), copycols=copycols)
+
+# this is not a 100% correct deprecation as it does not support makeunique, but
+# we leave it as is to show users a recommended way to create a DataFrame from a matrix
+@deprecate DataFrame(columns::AbstractMatrix, cnames::AbstractVector{Symbol} = gennames(size(columns, 2));
+                     makeunique::Bool=false) DataFrame(Tables.table(columns, header=cnames))
+@deprecate DataFrame(columns::AbstractMatrix, cnames::AbstractVector{<:AbstractString};
+                     makeunique::Bool=false) DataFrame(Tables.table(columns, header=cnames))
+
+function DataFrame(column_eltypes::AbstractVector{T}, cnames::AbstractVector{Symbol},
+                   nrows::Integer=0; makeunique::Bool=false)::DataFrame where T<:Type
+    Base.depwarn("`DataFrame` constructor with passed eltypes is deprecated. " *
+                 "Pass explicitly created columns to a `DataFrame` constructor instead.",
+                     :DataFrame)
+    columns = AbstractVector[elty >: Missing ?
+                             fill!(Tables.allocatecolumn(elty, nrows), missing) :
+                             Tables.allocatecolumn(elty, nrows)
+                             for elty in column_eltypes]
+    return DataFrame(columns, Index(convert(Vector{Symbol}, cnames),
+                     makeunique=makeunique), copycols=false)
+end
+
+DataFrame(column_eltypes::AbstractVector{<:Type},
+          cnames::AbstractVector{<:AbstractString},
+          nrows::Integer=0; makeunique::Bool=false) =
+    DataFrame(column_eltypes, Symbol.(cnames), nrows; makeunique=makeunique)
+
+import Base: convert
+@deprecate convert(::Type{DataFrame}, A::AbstractMatrix) DataFrame(Tables.table(A, header=Symbol.(:x, axes(A, 2))))
diff --git a/src/other/tables.jl b/src/other/tables.jl
@@ -43,11 +43,13 @@ fromcolumns(x, names; copycols::Bool=true) =
               copycols=copycols)
 
 function DataFrame(x::T; copycols::Bool=true) where {T}
-    if !Tables.istable(x)
-        if x isa AbstractVector && all(col -> isa(col, AbstractVector), x)
+    if !Tables.istable(x) && x isa AbstractVector && !isempty(x)
+        # here we handle the cases that are accepted by standard DataFrame constructors
+        # but eltype(x) is more flexible than assumed in these methods
+        if all(col -> isa(col, AbstractVector), x)
             return DataFrame(Vector{AbstractVector}(x), copycols=copycols)
-        elseif (x isa AbstractVector || x isa Tuple) &&
-            all(v -> v isa Pair{Symbol, <:AbstractVector}, x)
+        elseif all(v -> v isa Pair{Symbol, <:AbstractVector}, x) ||
+            all(v -> v isa Pair{<:AbstractString, <:AbstractVector}, x)
             return DataFrame(AbstractVector[last(v) for v in x], [first(v) for v in x],
                              copycols=copycols)
         end