Skip to content

Commit

Permalink
Merge branch 'master' into deprecate_constructors
Browse files Browse the repository at this point in the history
  • Loading branch information
bkamins committed Oct 16, 2020
2 parents b52dadb + e07b08d commit 231f6c0
Show file tree
Hide file tree
Showing 18 changed files with 647 additions and 161 deletions.
3 changes: 3 additions & 0 deletions NEWS.md
Expand Up @@ -74,6 +74,9 @@
which if set to `true` makes them retun a `SubDataFrame` view into the passed
data frame.
* add `only` method for `AbstractDataFrame` ([#2449](https://github.com/JuliaData/DataFrames.jl/pull/2449))
* passing empty sets of columns in `filter`/`filter!` and in `select`/`transform`/`combine`
with `ByRow` is now accepted ([#2476](https://github.com/JuliaData/DataFrames.jl/pull/2476))
* add `permutedims` method for `AbstractDataFrame` ([#2447](https://github.com/JuliaData/DataFrames.jl/pull/2447))

## Deprecated

Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Expand Up @@ -35,7 +35,7 @@ test = ["DataStructures", "DataValues", "Dates", "Logging", "Random", "Test"]

[compat]
julia = "1"
CategoricalArrays = "0.8"
CategoricalArrays = "0.8.3"
Compat = "3.17"
DataAPI = "1.2"
InvertedIndices = "1"
Expand Down
5 changes: 4 additions & 1 deletion docs/make.jl
Expand Up @@ -14,7 +14,10 @@ makedocs(
doctest = false,
clean = false,
sitename = "DataFrames.jl",
format = Documenter.HTML(canonical = "https://juliadata.github.io/DataFrames.jl/stable/"),
format = Documenter.HTML(
canonical = "https://juliadata.github.io/DataFrames.jl/stable/",
assets = ["assets/favicon.ico"]
),
pages = Any[
"Introduction" => "index.md",
"User Guide" => Any[
Expand Down
Binary file added docs/src/assets/favicon.ico
Binary file not shown.
1 change: 1 addition & 0 deletions docs/src/lib/functions.md
Expand Up @@ -57,6 +57,7 @@ vcat
```@docs
stack
unstack
permutedims
```

## Sorting
Expand Down
50 changes: 50 additions & 0 deletions docs/src/man/reshaping_and_pivoting.md
Expand Up @@ -380,3 +380,53 @@ julia> first(unstack(x, :Species, :vsum), 6)
│ 4 │ PetalWidth │ 0.244 │ 1.326 │ 2.026 │
│ 5 │ id │ 25.5 │ 75.5 │ 125.5 │
```

To turn an `AbstractDataFrame` on its side, use [`permutedims`](@ref).

```jldoctest reshape
julia> df1 = DataFrame(a=["x", "y"], b=[1.0, 2.0], c=[3, 4], d=[true, false])
2×4 DataFrame
│ Row │ a │ b │ c │ d │
│ │ String │ Float64 │ Int64 │ Bool │
├─────┼────────┼─────────┼───────┼──────┤
│ 1 │ x │ 1.0 │ 3 │ 1 │
│ 2 │ y │ 2.0 │ 4 │ 0 │
julia> permutedims(df1, 1)
3×3 DataFrame
│ Row │ a │ x │ y │
│ │ String │ Float64 │ Float64 │
├─────┼────────┼─────────┼─────────┤
│ 1 │ b │ 1.0 │ 2.0 │
│ 2 │ c │ 3.0 │ 4.0 │
│ 3 │ d │ 1.0 │ 0.0 │
```

Note that the column indexed by `src_colnames` in the original `df`
becomes the column names in the permuted result,
and the column names of the original become a new column.
Typically, this would be used on columns with homogenous element types,
since the element types of the other columns
are the result of `promote_type` on _all_ the permuted columns.
Note also that, by default, the new column created from the column names
of the original `df` has the same name as `src_namescol`.
An optional positional argument `dest_namescol` can alter this:

```jldoctest reshape
julia> df2 = DataFrame(a=["x", "y"], b=[1, "two"], c=[3, 4], d=[true, false])
2×4 DataFrame
│ Row │ a │ b │ c │ d │
│ │ String │ Any │ Int64 │ Bool │
├─────┼────────┼─────┼───────┼──────┤
│ 1 │ x │ 1 │ 3 │ 1 │
│ 2 │ y │ two │ 4 │ 0 │
julia> permutedims(df2, 1, "different_name")
3×3 DataFrame
│ Row │ different_name │ x │ y │
│ │ String │ Any │ Any │
├─────┼────────────────┼─────┼─────┤
│ 1 │ b │ 1 │ two │
│ 2 │ c │ 3 │ 4 │
│ 3 │ d │ 1 │ 0 │
```
28 changes: 17 additions & 11 deletions src/abstractdataframe/abstractdataframe.jl
Expand Up @@ -994,9 +994,10 @@ end
@inline function Base.filter((cols, f)::Pair, df::AbstractDataFrame; view::Bool=false)
int_cols = index(df)[cols] # it will be AbstractVector{Int} or Int
if length(int_cols) == 0
throw(ArgumentError("At least one column must be passed to filter on"))
rowidxs = [f() for _ in axes(df, 1)]
else
rowidxs = _filter_helper(f, (df[!, i] for i in int_cols)...)
end
rowidxs = _filter_helper(f, (df[!, i] for i in int_cols)...)
return view ? Base.view(df, rowidxs, :) : df[rowidxs, :]
end

Expand All @@ -1006,9 +1007,10 @@ end
AbstractVector{<:Symbol}}},
df::AbstractDataFrame; view::Bool=false)
if length(cols) == 0
throw(ArgumentError("At least one column must be passed to filter on"))
rowidxs = [f() for _ in axes(df, 1)]
else
rowidxs = _filter_helper(f, (df[!, i] for i in cols)...)
end
rowidxs = _filter_helper(f, (df[!, i] for i in cols)...)
return view ? Base.view(df, rowidxs, :) : df[rowidxs, :]
end

Expand All @@ -1018,9 +1020,10 @@ _filter_helper(f, cols...)::BitVector = ((x...) -> f(x...)::Bool).(cols...)
view::Bool=false)
df_tmp = select(df, cols.cols, copycols=false)
if ncol(df_tmp) == 0
throw(ArgumentError("At least one column must be passed to filter on"))
rowidxs = [f(NamedTuple()) for _ in axes(df, 1)]
else
rowidxs = _filter_helper_astable(f, Tables.namedtupleiterator(df_tmp))
end
rowidxs = _filter_helper_astable(f, Tables.namedtupleiterator(df_tmp))
return view ? Base.view(df, rowidxs, :) : df[rowidxs, :]
end

Expand Down Expand Up @@ -1101,7 +1104,7 @@ julia> filter!(AsTable(:) => nt -> nt.x == 1 || nt.y == "b", df)
│ 3 │ 1 │ b │
```
"""
Base.filter!(f, df::AbstractDataFrame) = _filter!_helper(df, f, eachrow(df))
Base.filter!(f, df::AbstractDataFrame) = delete!(df, findall(!f, eachrow(df)))
Base.filter!((col, f)::Pair{<:ColumnIndex}, df::AbstractDataFrame) =
_filter!_helper(df, f, df[!, col])
Base.filter!((cols, f)::Pair{<:AbstractVector{Symbol}}, df::AbstractDataFrame) =
Expand All @@ -1115,17 +1118,20 @@ Base.filter!((cols, f)::Pair{<:AbstractVector{Int}}, df::AbstractDataFrame) =

function _filter!_helper(df::AbstractDataFrame, f, cols...)
if length(cols) == 0
throw(ArgumentError("At least one column must be passed to filter on"))
rowidxs = findall(x -> !f(), axes(df, 1))
else
rowidxs = findall(((x...) -> !(f(x...)::Bool)).(cols...))
end
return delete!(df, findall(((x...) -> !(f(x...)::Bool)).(cols...)))
return delete!(df, rowidxs)
end

function Base.filter!((cols, f)::Pair{<:AsTable}, df::AbstractDataFrame)
dff = select(df, cols.cols, copycols=false)
if ncol(dff) == 0
throw(ArgumentError("At least one column must be passed to filter on"))
return delete!(df, findall(x -> !f(NamedTuple()), axes(df, 1)))
else
return _filter!_helper_astable(df, Tables.namedtupleiterator(dff), f)
end
return _filter!_helper_astable(df, Tables.namedtupleiterator(dff), f)
end

_filter!_helper_astable(df::AbstractDataFrame, nti::Tables.NamedTupleIterator, f) =
Expand Down
2 changes: 2 additions & 0 deletions src/abstractdataframe/iteration.jl
Expand Up @@ -411,6 +411,8 @@ julia> df
"""
function mapcols!(f::Union{Function,Type}, df::DataFrame)
# note: `f` must return a consistent length
ncol(df) == 0 && return df # skip if no columns

vs = AbstractVector[]
seenscalar = false
seenvector = false
Expand Down
104 changes: 104 additions & 0 deletions src/abstractdataframe/reshape.jl
Expand Up @@ -399,3 +399,107 @@ function CategoricalArrays.CategoricalArray(v::RepeatedVector)
res.refs = repeat(res.refs, inner = [v.inner], outer = [v.outer])
res
end


Base.transpose(::AbstractDataFrame, args...; kwargs...) =
MethodError("`transpose` not defined for `AbstractDataFrame`s. Try `permutedims` instead")

"""
permutedims(df::AbstractDataFrame, src_namescol::Union{Int, Symbol, AbstractString},
[dest_namescol::Union{Symbol, AbstractString}];
makeunique::Bool=false)
Turn `df` on its side such that rows become columns
and values in the column indexed by `src_namescol` become the names of new columns.
In the resulting `DataFrame`, column names of `df` will become the first column
with name specified by `dest_namescol`.
# Arguments
- `df` : the `AbstractDataFrame`
- `src_namescol` : the column that will become the new header.
This column's element type must be `AbstractString` or `Symbol`.
- `dest_namescol` : the name of the first column in the returned `DataFrame`.
Defaults to the same name as `src_namescol`.
- `makeunique` : if `false` (the default), an error will be raised
if duplicate names are found; if `true`, duplicate names will be suffixed
with `_i` (`i` starting at 1 for the first duplicate).
Note: The element types of columns in resulting `DataFrame`
(other than the first column, which always has element type `String`)
will depend on the element types of _all_ input columns
based on the result of `promote_type`.
That is, if the source data frame contains `Int` and `Float64` columns,
resulting columns will have element type `Float64`. If the source has
`Int` and `String` columns, resulting columns will have element type `Any`.
# Examples
```jldoctest
julia> df1 = DataFrame(a=["x", "y"], b=[1., 2.], c=[3, 4], d=[true,false])
2×4 DataFrame
│ Row │ a │ b │ c │ d │
│ │ String │ Float64 │ Int64 │ Bool │
├─────┼────────┼─────────┼───────┼──────┤
│ 1 │ x │ 1.0 │ 3 │ 1 │
│ 2 │ y │ 2.0 │ 4 │ 0 │
julia> permutedims(df1, 1) # note the column types
3×3 DataFrame
│ Row │ a │ x │ y │
│ │ String │ Float64 │ Float64 │
├─────┼────────┼─────────┼─────────┤
│ 1 │ b │ 1.0 │ 2.0 │
│ 2 │ c │ 3.0 │ 4.0 │
│ 3 │ d │ 1.0 │ 0.0 │
julia> df2 = DataFrame(a=["x", "y"], b=[1, "two"], c=[3, 4], d=[true, false])
2×4 DataFrame
│ Row │ a │ b │ c │ d │
│ │ String │ Any │ Int64 │ Bool │
├─────┼────────┼─────┼───────┼──────┤
│ 1 │ x │ 1 │ 3 │ 1 │
│ 2 │ y │ two │ 4 │ 0 │
julia> permutedims(df2, 1, "different_name")
3×3 DataFrame
│ Row │ different_name │ x │ y │
│ │ String │ Any │ Any │
├─────┼────────────────┼─────┼─────┤
│ 1 │ b │ 1 │ two │
│ 2 │ c │ 3 │ 4 │
│ 3 │ d │ 1 │ 0 │
```
"""
function Base.permutedims(df::AbstractDataFrame, src_namescol::ColumnIndex,
dest_namescol::Union{Symbol, AbstractString};
makeunique::Bool=false)

if src_namescol isa Integer
1 <= src_namescol <= ncol(df) || throw(BoundsError(index(df), src_namescol))
end
eltype(df[!, src_namescol]) <: SymbolOrString ||
throw(ArgumentError("src_namescol must have eltype `Symbol` or `<:AbstractString`"))

df_notsrc = df[!, Not(src_namescol)]
df_permuted = DataFrame(dest_namescol => names(df_notsrc))

if ncol(df_notsrc) == 0
df_tmp = DataFrame(AbstractVector[[] for _ in 1:nrow(df)], df[!, src_namescol],
makeunique=makeunique, copycols=false)
else
m = permutedims(Matrix(df_notsrc))
df_tmp = rename!(DataFrame(Tables.table(m)), df[!, src_namescol], makeunique=makeunique)
end
return hcat!(df_permuted, df_tmp, makeunique=makeunique, copycols=false)
end

function Base.permutedims(df::AbstractDataFrame, src_namescol::ColumnIndex;
makeunique::Bool=false)
if src_namescol isa Integer
1 <= src_namescol <= ncol(df) || throw(BoundsError(index(df), src_namescol))
dest_namescol = _names(df)[src_namescol]
else
dest_namescol = src_namescol
end
return permutedims(df, src_namescol, dest_namescol; makeunique=makeunique)
end

0 comments on commit 231f6c0

Please sign in to comment.