-
Notifications
You must be signed in to change notification settings - Fork 367
/
sort.jl
103 lines (92 loc) · 3.39 KB
/
sort.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
"""
sort!(df::AbstractDataFrame, cols;
alg::Union{Algorithm, Nothing}=nothing, lt=isless, by=identity,
rev::Bool=false, order::Ordering=Forward)
Sort data frame `df` by column(s) `cols`.
`cols` can be either a `Symbol` or `Integer` column index, or
a tuple or vector of such indices.
If `alg` is `nothing` (the default), the most appropriate algorithm is
chosen automatically among `TimSort`, `MergeSort` and `RadixSort` depending
on the type of the sorting columns and on the number of rows in `df`.
If `rev` is `true`, reverse sorting is performed. To enable reverse sorting
only for some columns, pass `order(c, rev=true)` in `cols`, with `c` the
corresponding column index (see example below).
See other methods for a description of other keyword arguments.
# Examples
```jldoctest
julia> df = DataFrame(x = [3, 1, 2, 1], y = ["b", "c", "a", "b"])
4×2 DataFrame
│ Row │ x │ y │
│ │ Int64 │ String │
├─────┼───────┼────────┤
│ 1 │ 3 │ b │
│ 2 │ 1 │ c │
│ 3 │ 2 │ a │
│ 4 │ 1 │ b │
julia> sort!(df, :x)
4×2 DataFrame
│ Row │ x │ y │
│ │ Int64 │ String │
├─────┼───────┼────────┤
│ 1 │ 1 │ c │
│ 2 │ 1 │ b │
│ 3 │ 2 │ a │
│ 4 │ 3 │ b │
julia> sort!(df, (:x, :y))
4×2 DataFrame
│ Row │ x │ y │
│ │ Int64 │ String │
├─────┼───────┼────────┤
│ 1 │ 1 │ b │
│ 2 │ 1 │ c │
│ 3 │ 2 │ a │
│ 4 │ 3 │ b │
julia> sort!(df, (:x, :y), rev=true)
4×2 DataFrame
│ Row │ x │ y │
│ │ Int64 │ String │
├─────┼───────┼────────┤
│ 1 │ 3 │ b │
│ 2 │ 2 │ a │
│ 3 │ 1 │ c │
│ 4 │ 1 │ b │
julia> sort!(df, (:x, order(:y, rev=true)))
4×2 DataFrame
│ Row │ x │ y │
│ │ Int64 │ String │
├─────┼───────┼────────┤
│ 1 │ 1 │ c │
│ 2 │ 1 │ b │
│ 3 │ 2 │ a │
│ 4 │ 3 │ b │
```
"""
function Base.sort!(df::DataFrame, cols_new=[]; cols=[], alg=nothing,
lt=isless, by=identity, rev=false, order=Forward)
if !(isa(by, Function) || eltype(by) <: Function)
msg = "'by' must be a Function or a vector of Functions. Perhaps you wanted 'cols'."
throw(ArgumentError(msg))
end
if cols != []
Base.depwarn("sort!(df, cols=cols) is deprecated, use sort!(df, cols) instead",
:sort!)
cols_new = cols
end
ord = ordering(df, cols_new, lt, by, rev, order)
_alg = Sort.defalg(df, ord; alg=alg, cols=cols_new)
sort!(df, _alg, ord)
end
function Base.sort!(df::DataFrame, a::Base.Sort.Algorithm, o::Base.Sort.Ordering)
p = sortperm(df, a, o)
pp = similar(p)
c = _columns(df)
for (i,col) in enumerate(c)
# Check if this column has been sorted already
if any(j -> c[j]===col, 1:i-1)
continue
end
copyto!(pp,p)
Base.permute!!(col, pp)
end
df
end