Merge branch 'master' into deprecate_constructors

JuliaData · Oct 16, 2020 · 231f6c0 · 231f6c0
2 parents b52dadb + e07b08d
commit 231f6c0
Show file tree

Hide file tree

Showing 18 changed files with 647 additions and 161 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -74,6 +74,9 @@
   which if set to `true` makes them retun a `SubDataFrame` view into the passed
   data frame.
 * add `only` method for `AbstractDataFrame` ([#2449](https://github.com/JuliaData/DataFrames.jl/pull/2449))
+* passing empty sets of columns in `filter`/`filter!` and in `select`/`transform`/`combine`
+  with `ByRow` is now accepted ([#2476](https://github.com/JuliaData/DataFrames.jl/pull/2476))
+* add `permutedims` method for `AbstractDataFrame` ([#2447](https://github.com/JuliaData/DataFrames.jl/pull/2447))
 
 ## Deprecated
 

diff --git a/Project.toml b/Project.toml
@@ -35,7 +35,7 @@ test = ["DataStructures", "DataValues", "Dates", "Logging", "Random", "Test"]
 
 [compat]
 julia = "1"
-CategoricalArrays = "0.8"
+CategoricalArrays = "0.8.3"
 Compat = "3.17"
 DataAPI = "1.2"
 InvertedIndices = "1"

diff --git a/docs/make.jl b/docs/make.jl
@@ -14,7 +14,10 @@ makedocs(
     doctest = false,
     clean = false,
     sitename = "DataFrames.jl",
-    format = Documenter.HTML(canonical = "https://juliadata.github.io/DataFrames.jl/stable/"),
+    format = Documenter.HTML(
+        canonical = "https://juliadata.github.io/DataFrames.jl/stable/",
+        assets = ["assets/favicon.ico"]
+    ),
     pages = Any[
         "Introduction" => "index.md",
         "User Guide" => Any[

diff --git a/docs/src/assets/favicon.ico b/docs/src/assets/favicon.ico
diff --git a/docs/src/lib/functions.md b/docs/src/lib/functions.md
@@ -57,6 +57,7 @@ vcat
 ```@docs
 stack
 unstack
+permutedims
 ```
 
 ## Sorting

diff --git a/docs/src/man/reshaping_and_pivoting.md b/docs/src/man/reshaping_and_pivoting.md
@@ -380,3 +380,53 @@ julia> first(unstack(x, :Species, :vsum), 6)
 │ 4   │ PetalWidth  │ 0.244       │ 1.326           │ 2.026          │
 │ 5   │ id          │ 25.5        │ 75.5            │ 125.5          │
 ```
+
+To turn an `AbstractDataFrame` on its side, use [`permutedims`](@ref).
+
+```jldoctest reshape
+julia> df1 = DataFrame(a=["x", "y"], b=[1.0, 2.0], c=[3, 4], d=[true, false])
+2×4 DataFrame
+│ Row │ a      │ b       │ c     │ d    │
+│     │ String │ Float64 │ Int64 │ Bool │
+├─────┼────────┼─────────┼───────┼──────┤
+│ 1   │ x      │ 1.0     │ 3     │ 1    │
+│ 2   │ y      │ 2.0     │ 4     │ 0    │
+
+julia> permutedims(df1, 1)
+3×3 DataFrame
+│ Row │ a      │ x       │ y       │
+│     │ String │ Float64 │ Float64 │
+├─────┼────────┼─────────┼─────────┤
+│ 1   │ b      │ 1.0     │ 2.0     │
+│ 2   │ c      │ 3.0     │ 4.0     │
+│ 3   │ d      │ 1.0     │ 0.0     │
+```
+
+Note that the column indexed by `src_colnames` in the original `df`
+becomes the column names in the permuted result,
+and the column names of the original become a new column.
+Typically, this would be used on columns with homogenous element types,
+since the element types of the other columns
+are the result of `promote_type` on _all_ the permuted columns.
+Note also that, by default, the new column created from the column names
+of the original `df` has the same name as `src_namescol`.
+An optional positional argument `dest_namescol` can alter this:
+
+```jldoctest reshape
+julia> df2 = DataFrame(a=["x", "y"], b=[1, "two"], c=[3, 4], d=[true, false])
+2×4 DataFrame
+│ Row │ a      │ b   │ c     │ d    │
+│     │ String │ Any │ Int64 │ Bool │
+├─────┼────────┼─────┼───────┼──────┤
+│ 1   │ x      │ 1   │ 3     │ 1    │
+│ 2   │ y      │ two │ 4     │ 0    │
+
+julia> permutedims(df2, 1, "different_name")
+3×3 DataFrame
+│ Row │ different_name │ x   │ y   │
+│     │ String         │ Any │ Any │
+├─────┼────────────────┼─────┼─────┤
+│ 1   │ b              │ 1   │ two │
+│ 2   │ c              │ 3   │ 4   │
+│ 3   │ d              │ 1   │ 0   │
+```
diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl
@@ -994,9 +994,10 @@ end
 @inline function Base.filter((cols, f)::Pair, df::AbstractDataFrame; view::Bool=false)
     int_cols = index(df)[cols] # it will be AbstractVector{Int} or Int
     if length(int_cols) == 0
-        throw(ArgumentError("At least one column must be passed to filter on"))
+        rowidxs = [f() for _ in axes(df, 1)]
+    else
+        rowidxs = _filter_helper(f, (df[!, i] for i in int_cols)...)
     end
-    rowidxs = _filter_helper(f, (df[!, i] for i in int_cols)...)
     return view ? Base.view(df, rowidxs, :) : df[rowidxs, :]
 end
 
@@ -1006,9 +1007,10 @@ end
                                                      AbstractVector{<:Symbol}}},
                              df::AbstractDataFrame; view::Bool=false)
     if length(cols) == 0
-        throw(ArgumentError("At least one column must be passed to filter on"))
+        rowidxs = [f() for _ in axes(df, 1)]
+    else
+        rowidxs = _filter_helper(f, (df[!, i] for i in cols)...)
     end
-    rowidxs = _filter_helper(f, (df[!, i] for i in cols)...)
     return view ? Base.view(df, rowidxs, :) : df[rowidxs, :]
 end
 
@@ -1018,9 +1020,10 @@ _filter_helper(f, cols...)::BitVector = ((x...) -> f(x...)::Bool).(cols...)
                              view::Bool=false)
     df_tmp = select(df, cols.cols, copycols=false)
     if ncol(df_tmp) == 0
-        throw(ArgumentError("At least one column must be passed to filter on"))
+        rowidxs = [f(NamedTuple()) for _ in axes(df, 1)]
+    else
+        rowidxs = _filter_helper_astable(f, Tables.namedtupleiterator(df_tmp))
     end
-    rowidxs = _filter_helper_astable(f, Tables.namedtupleiterator(df_tmp))
     return view ? Base.view(df, rowidxs, :) : df[rowidxs, :]
 end
 
@@ -1101,7 +1104,7 @@ julia> filter!(AsTable(:) => nt -> nt.x == 1 || nt.y == "b", df)
 │ 3   │ 1     │ b      │
 ```
 """
-Base.filter!(f, df::AbstractDataFrame) = _filter!_helper(df, f, eachrow(df))
+Base.filter!(f, df::AbstractDataFrame) = delete!(df, findall(!f, eachrow(df)))
 Base.filter!((col, f)::Pair{<:ColumnIndex}, df::AbstractDataFrame) =
     _filter!_helper(df, f, df[!, col])
 Base.filter!((cols, f)::Pair{<:AbstractVector{Symbol}}, df::AbstractDataFrame) =
@@ -1115,17 +1118,20 @@ Base.filter!((cols, f)::Pair{<:AbstractVector{Int}}, df::AbstractDataFrame) =
 
 function _filter!_helper(df::AbstractDataFrame, f, cols...)
     if length(cols) == 0
-        throw(ArgumentError("At least one column must be passed to filter on"))
+        rowidxs = findall(x -> !f(), axes(df, 1))
+    else
+        rowidxs = findall(((x...) -> !(f(x...)::Bool)).(cols...))
     end
-    return delete!(df, findall(((x...) -> !(f(x...)::Bool)).(cols...)))
+    return delete!(df, rowidxs)
 end
 
 function Base.filter!((cols, f)::Pair{<:AsTable}, df::AbstractDataFrame)
     dff = select(df, cols.cols, copycols=false)
     if ncol(dff) == 0
-        throw(ArgumentError("At least one column must be passed to filter on"))
+        return delete!(df, findall(x -> !f(NamedTuple()), axes(df, 1)))
+    else
+        return _filter!_helper_astable(df, Tables.namedtupleiterator(dff), f)
     end
-    return _filter!_helper_astable(df, Tables.namedtupleiterator(dff), f)
 end
 
 _filter!_helper_astable(df::AbstractDataFrame, nti::Tables.NamedTupleIterator, f) =

diff --git a/src/abstractdataframe/iteration.jl b/src/abstractdataframe/iteration.jl
@@ -411,6 +411,8 @@ julia> df
 """
 function mapcols!(f::Union{Function,Type}, df::DataFrame)
     # note: `f` must return a consistent length
+    ncol(df) == 0 && return df # skip if no columns
+
     vs = AbstractVector[]
     seenscalar = false
     seenvector = false

diff --git a/src/abstractdataframe/reshape.jl b/src/abstractdataframe/reshape.jl
@@ -399,3 +399,107 @@ function CategoricalArrays.CategoricalArray(v::RepeatedVector)
     res.refs = repeat(res.refs, inner = [v.inner], outer = [v.outer])
     res
 end
+
+
+Base.transpose(::AbstractDataFrame, args...; kwargs...) =
+    MethodError("`transpose` not defined for `AbstractDataFrame`s. Try `permutedims` instead")
+
+"""
+    permutedims(df::AbstractDataFrame, src_namescol::Union{Int, Symbol, AbstractString},
+                [dest_namescol::Union{Symbol, AbstractString}];
+                makeunique::Bool=false)
+
+Turn `df` on its side such that rows become columns
+and values in the column indexed by `src_namescol` become the names of new columns.
+In the resulting `DataFrame`, column names of `df` will become the first column
+with name specified by `dest_namescol`.
+
+# Arguments
+- `df` : the `AbstractDataFrame`
+- `src_namescol` : the column that will become the new header.
+  This column's element type must be `AbstractString` or `Symbol`.
+- `dest_namescol` : the name of the first column in the returned `DataFrame`.
+  Defaults to the same name as `src_namescol`.
+- `makeunique` : if `false` (the default), an error will be raised
+  if duplicate names are found; if `true`, duplicate names will be suffixed
+  with `_i` (`i` starting at 1 for the first duplicate).
+
+Note: The element types of columns in resulting `DataFrame`
+(other than the first column, which always has element type `String`)
+will depend on the element types of _all_ input columns
+based on the result of `promote_type`.
+That is, if the source data frame contains `Int` and `Float64` columns,
+resulting columns will have element type `Float64`. If the source has
+`Int` and `String` columns, resulting columns will have element type `Any`.
+
+# Examples
+
+```jldoctest
+julia> df1 = DataFrame(a=["x", "y"], b=[1., 2.], c=[3, 4], d=[true,false])
+2×4 DataFrame
+│ Row │ a      │ b       │ c     │ d    │
+│     │ String │ Float64 │ Int64 │ Bool │
+├─────┼────────┼─────────┼───────┼──────┤
+│ 1   │ x      │ 1.0     │ 3     │ 1    │
+│ 2   │ y      │ 2.0     │ 4     │ 0    │
+
+julia> permutedims(df1, 1) # note the column types
+3×3 DataFrame
+│ Row │ a      │ x       │ y       │
+│     │ String │ Float64 │ Float64 │
+├─────┼────────┼─────────┼─────────┤
+│ 1   │ b      │ 1.0     │ 2.0     │
+│ 2   │ c      │ 3.0     │ 4.0     │
+│ 3   │ d      │ 1.0     │ 0.0     │
+
+julia> df2 = DataFrame(a=["x", "y"], b=[1, "two"], c=[3, 4], d=[true, false])
+2×4 DataFrame
+│ Row │ a      │ b   │ c     │ d    │
+│     │ String │ Any │ Int64 │ Bool │
+├─────┼────────┼─────┼───────┼──────┤
+│ 1   │ x      │ 1   │ 3     │ 1    │
+│ 2   │ y      │ two │ 4     │ 0    │
+
+julia> permutedims(df2, 1, "different_name")
+3×3 DataFrame
+│ Row │ different_name │ x   │ y   │
+│     │ String         │ Any │ Any │
+├─────┼────────────────┼─────┼─────┤
+│ 1   │ b              │ 1   │ two │
+│ 2   │ c              │ 3   │ 4   │
+│ 3   │ d              │ 1   │ 0   │
+```
+"""
+function Base.permutedims(df::AbstractDataFrame, src_namescol::ColumnIndex,
+                          dest_namescol::Union{Symbol, AbstractString};
+                          makeunique::Bool=false)
+
+    if src_namescol isa Integer
+        1 <= src_namescol <= ncol(df) || throw(BoundsError(index(df), src_namescol))
+    end
+    eltype(df[!, src_namescol]) <: SymbolOrString ||
+        throw(ArgumentError("src_namescol must have eltype `Symbol` or `<:AbstractString`"))
+
+    df_notsrc = df[!, Not(src_namescol)]
+    df_permuted = DataFrame(dest_namescol => names(df_notsrc))
+
+    if ncol(df_notsrc) == 0
+        df_tmp = DataFrame(AbstractVector[[] for _ in 1:nrow(df)], df[!, src_namescol],
+                           makeunique=makeunique, copycols=false)
+    else
+        m = permutedims(Matrix(df_notsrc))
+        df_tmp = rename!(DataFrame(Tables.table(m)), df[!, src_namescol], makeunique=makeunique)
+    end
+    return hcat!(df_permuted, df_tmp, makeunique=makeunique, copycols=false)
+end
+
+function Base.permutedims(df::AbstractDataFrame, src_namescol::ColumnIndex;
+                          makeunique::Bool=false)
+    if src_namescol isa Integer
+        1 <= src_namescol <= ncol(df) || throw(BoundsError(index(df), src_namescol))
+        dest_namescol = _names(df)[src_namescol]
+    else
+        dest_namescol = src_namescol
+    end
+    return permutedims(df, src_namescol, dest_namescol; makeunique=makeunique)
+end