Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improve DataFrame constructors and conversions for Vector and Matrix #1325

Merged
merged 3 commits into from
Dec 30, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 14 additions & 12 deletions src/dataframe/dataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ particularly a Vector or CategoricalVector.

```julia
DataFrame(columns::Vector, names::Vector{Symbol}; makeunique::Bool=false)
DataFrame(columns::Matrix, names::Vector{Symbol}; makeunique::Bool=false)
DataFrame(kwargs...)
DataFrame(pairs::Pair{Symbol}...; makeunique::Bool=false)
DataFrame() # an empty DataFrame
Expand All @@ -20,7 +21,7 @@ DataFrame(ds::Vector{Associative})

**Arguments**

* `columns` : a Vector with each column as contents
* `columns` : a Vector with each column as contents or a Matrix
* `names` : the column names
* `makeunique` : if `false` (the default), an error will be raised
if duplicates in `names` are found; if `true`, duplicate names will be suffixed
Expand Down Expand Up @@ -81,7 +82,8 @@ mutable struct DataFrame <: AbstractDataFrame
if length(columns) == length(colindex) == 0
return new(Vector{Any}(0), Index())
elseif length(columns) != length(colindex)
throw(DimensionMismatch("Number of columns ($(length(columns))) and number of column names ($(length(colindex))) are not equal"))
throw(DimensionMismatch("Number of columns ($(length(columns))) and number of" *
" column names ($(length(colindex))) are not equal"))
end
lengths = [isa(col, AbstractArray) ? length(col) : 1 for col in columns]
minlen, maxlen = extrema(lengths)
Expand Down Expand Up @@ -127,13 +129,20 @@ function DataFrame(; kwargs...)
end
end

function DataFrame(columns::AbstractVector,
cnames::AbstractVector{Symbol} = gennames(length(columns));
function DataFrame(columns::AbstractVector, cnames::AbstractVector{Symbol};
makeunique::Bool=false)::DataFrame
if !all(col -> isa(col, AbstractVector), columns)
# change to throw(ArgumentError("columns argument must be a vector of AbstractVector objects"))
Base.depwarn("passing columns argument with non-AbstractVector entries is deprecated", :DataFrame)
end
return DataFrame(convert(Vector{Any}, columns), Index(convert(Vector{Symbol}, cnames),
makeunique=makeunique))
end

DataFrame(columns::AbstractMatrix, cnames::AbstractVector{Symbol} = gennames(size(columns, 2));
makeunique::Bool=false) =
DataFrame(Any[columns[:, i] for i in 1:size(columns, 2)], cnames, makeunique=makeunique)

# Initialize an empty DataFrame with specific eltypes and names
function DataFrame(column_eltypes::AbstractVector{T}, cnames::AbstractVector{Symbol},
nrows::Integer; makeunique::Bool=false)::DataFrame where T<:Type
Expand Down Expand Up @@ -912,14 +921,7 @@ function Base.append!(df1::DataFrame, df2::AbstractDataFrame)
return df1
end

function Base.convert(::Type{DataFrame}, A::AbstractMatrix)
n = size(A, 2)
cols = Vector{Any}(n)
for i in 1:n
cols[i] = A[:, i]
end
return DataFrame(cols, Index(gennames(n)))
end
Base.convert(::Type{DataFrame}, A::AbstractMatrix) = DataFrame(A)

function Base.convert(::Type{DataFrame}, d::Associative)
colnames = keys(d)
Expand Down
5 changes: 5 additions & 0 deletions src/deprecated.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import Base: @deprecate

function DataFrame(columns::AbstractVector)
Base.depwarn("calling vector of vectors constructor without passing column names is deprecated", :DataFrame)
DataFrame(columns, gennames(length(columns)))
end

@deprecate by(d::AbstractDataFrame, cols, s::Vector{Symbol}) aggregate(d, cols, map(eval, s))
@deprecate by(d::AbstractDataFrame, cols, s::Symbol) aggregate(d, cols, eval(s))

Expand Down
14 changes: 14 additions & 0 deletions test/constructors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,20 @@ module TestConstructors
@test df[:x1] == df2[:x1]
@test df[:x2] == df2[:x2]

df2 = DataFrame([0.0 1.0;
0.0 1.0;
0.0 1.0])
names!(df2, [:x1, :x2])
@test df[:x1] == df2[:x1]
@test df[:x2] == df2[:x2]

df2 = DataFrame([0.0 1.0;
0.0 1.0;
0.0 1.0], [:a, :b])
names!(df2, [:a, :b])
@test df[:x1] == df2[:a]
@test df[:x2] == df2[:b]

@test df == DataFrame(x1 = Union{Float64, Missing}[0.0, 0.0, 0.0],
x2 = Union{Float64, Missing}[1.0, 1.0, 1.0])
@test df == DataFrame(x1 = Union{Float64, Missing}[0.0, 0.0, 0.0],
Expand Down