Skip to content

Commit

Permalink
Merge 844e867 into eb3d10a
Browse files Browse the repository at this point in the history
  • Loading branch information
cjprybol committed Oct 9, 2017
2 parents eb3d10a + 844e867 commit a408cc0
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 26 deletions.
31 changes: 12 additions & 19 deletions src/groupeddataframe/grouping.jl
Original file line number Diff line number Diff line change
Expand Up @@ -320,9 +320,6 @@ aggregate(gd::GroupedDataFrame, fs)
* `fs` : a function or vector of functions to be applied to vectors
within groups; expects each argument to be a column vector
Each `fs` should return a value or vector. All returns must be the
same length.
### Returns
* `::DataFrame`
Expand All @@ -342,16 +339,23 @@ df |> groupby(:a) |> [sum, x->mean(Nulls.skip(x))] # equivalent
"""
aggregate(d::AbstractDataFrame, fs::Function; sort::Bool=false) = aggregate(d, [fs], sort=sort)
function aggregate(d::AbstractDataFrame, fs::Vector{T}; sort::Bool=false) where T<:Function
headers = _makeheaders(fs, _names(d))
_aggregate(d, fs, headers, sort)
headers = [Symbol(c, "_", f) for f in fs for c in names(d)]
res = DataFrame(Any[f(d[c]) for f in fs for c in names(d)], headers)
sort && sort!(res)
res
end

# Applies aggregate to non-key cols of each SubDataFrame of a GroupedDataFrame
aggregate(gd::GroupedDataFrame, f::Function; sort::Bool=false) = aggregate(gd, [f], sort=sort)
function aggregate(gd::GroupedDataFrame, fs::Vector{T}; sort::Bool=false) where T<:Function
headers = _makeheaders(fs, setdiff(_names(gd), gd.cols))
res = combine(map(x -> _aggregate(without(x, gd.cols), fs, headers), gd))
sort && sort!(res, cols=headers)
res = gd.parent[gd.idx[gd.starts], gd.cols]
cols = setdiff(names(gd.parent), gd.cols)
for f in fs
for c in cols
res[Symbol(c, "_", f)] = [f(g[c]) for g in gd]
end
end
sort && sort!(res)
res
end

Expand All @@ -365,14 +369,3 @@ function aggregate(d::AbstractDataFrame,
sort::Bool=false) where {S<:ColumnIndex, T <:Function}
aggregate(groupby(d, cols, sort=sort), fs)
end

function _makeheaders(fs::Vector{T}, cn::Vector{Symbol}) where T<:Function
fnames = _fnames(fs) # see other/utils.jl
[Symbol(colname,'_',fname) for fname in fnames for colname in cn]
end

function _aggregate(d::AbstractDataFrame, fs::Vector{T}, headers::Vector{Symbol}, sort::Bool=false) where T<:Function
res = DataFrame(Any[vcat(f(d[i])) for f in fs for i in 1:size(d, 2)], headers)
sort && sort!(res, cols=headers)
res
end
36 changes: 36 additions & 0 deletions test/grouping.jl
Original file line number Diff line number Diff line change
Expand Up @@ -181,4 +181,40 @@ module TestGrouping
@test gd[2] == DataFrame(Key1="A", Key2="B", Value=2)
@test gd[3] == DataFrame(Key1="B", Key2="A", Value=3)
@test gd[4] == DataFrame(Key1="B", Key2="B", Value=4)

@testset "aggregate" begin
# test converting functions to valid column names
@test Symbol.([mean, sum]) == [:mean, :sum]
@test ismatch(r"#\d+", string(Symbol(x -> reduce(^, x))))

dt = DataFrame(group = repeat('c':-1:'a', inner = 4), x = 12:-1:1)

@test aggregate(groupby(dt, :group), sum) ==
aggregate(dt, :group, sum) ==
DataFrame(group = 'c':-1:'a', x_sum = [42, 26, 10])

@test aggregate(groupby(dt, :group), sum, sort = true) ==
aggregate(dt, :group, sum, sort = true) ==
DataFrame(group = 'a':'c', x_sum = [10, 26, 42])

@test aggregate(dt, length) == DataFrame(group_length = 12, x_length = 12)
anonymous = x -> length(x)
@test aggregate(dt, anonymous) == DataFrame(Symbol("group_$anonymous") => 12,
Symbol("x_$anonymous") => 12)

dt = DataFrame(year = repeat(1:4, inner = 12), month = repeat(1:12, outer = 4),
a = 1:48, b = fill(24.5, 48))
@test aggregate(dt, [sum, length]) ==
DataFrame(year_sum = 120, month_sum = 312, a_sum = 1176, b_sum = 1176,
year_length = 48, month_length = 48, a_length = 48, b_length = 48)
@test aggregate(dt, [:year], [sum, length]) ==
DataFrame(year = 1:4, month_sum = fill(78, 4), a_sum = [78, 222, 366, 510],
b_sum = fill(294, 4), month_length = fill(12, 4),
a_length = fill(12, 4), b_length = fill(12, 4))

@test aggregate(dt, [:month], [sum, length]) ==
DataFrame(month = 1:12, year_sum = fill(10, 12), a_sum = collect(76:4:120),
b_sum = fill(98, 12), year_length = fill(4, 12),
a_length = fill(4, 12), b_length = fill(4, 12))
end
end
7 changes: 0 additions & 7 deletions test/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,6 @@ module TestUtils
pdata[1:end] = null
@test DataFrames.countnull(pdata) == 20

funs = [mean, sum, var, x -> sum(x)]
if string(funs[end]) == "(anonymous function)" # Julia < 0.5
@test DataFrames._fnames(funs) == ["mean", "sum", "var", "λ1"]
else
@test DataFrames._fnames(funs) == ["mean", "sum", "var", string(funs[end])]
end

@testset "describe" begin
io = IOBuffer()
df = DataFrame(Any[collect(1:4), Vector{Union{Int, Null}}(2:5),
Expand Down

0 comments on commit a408cc0

Please sign in to comment.