From 9d36ce4053b7d1f801832c21af400de7e8ddd817 Mon Sep 17 00:00:00 2001 From: Lukas Schwerdt Date: Fri, 10 Feb 2023 19:23:28 +0100 Subject: [PATCH 01/10] Add PatternDefeatingQuicksort --- Project.toml | 1 + src/SortingAlgorithms.jl | 550 ++++++++++++++++++++++++++++++++++++++- test/runtests.jl | 6 +- 3 files changed, 553 insertions(+), 4 deletions(-) diff --git a/Project.toml b/Project.toml index 23e0339..928c4e6 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ version = "1.1.0" [deps] DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" +StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" [compat] julia = "1" diff --git a/src/SortingAlgorithms.jl b/src/SortingAlgorithms.jl index 9f528fa..a70a708 100644 --- a/src/SortingAlgorithms.jl +++ b/src/SortingAlgorithms.jl @@ -5,16 +5,21 @@ module SortingAlgorithms using DataStructures using Base.Sort using Base.Order +using Base: Cartesian import Base.Sort: sort! import DataStructures: heapify!, percolate_down! +import StaticArrays: MVector -export HeapSort, TimSort, RadixSort, CombSort +export HeapSort, TimSort, RadixSort, CombSort, BranchyPatternDefeatingQuicksort, BranchlessPatternDefeatingQuicksort, BranchyPdqSort, BranchlessPdqSort struct HeapSortAlg <: Algorithm end struct TimSortAlg <: Algorithm end struct RadixSortAlg <: Algorithm end struct CombSortAlg <: Algorithm end +abstract type PatternDefeatingQuicksortAlg <: Algorithm end +struct BranchyPatternDefeatingQuicksortAlg <: PatternDefeatingQuicksortAlg end +struct BranchlessPatternDefeatingQuicksortAlg <: PatternDefeatingQuicksortAlg end function maybe_optimize(x::Algorithm) isdefined(Base.Sort, :InitialOptimizations) ? Base.Sort.InitialOptimizations(x) : x @@ -51,6 +56,59 @@ Characteristics: """ const CombSort = maybe_optimize(CombSortAlg()) +""" + BranchyPatternDefeatingQuicksortAlg + +Quicksort with improved performance on special input patterns. + +Presorted inputs (including reverse and almost presorted ones), as well as inputs with many duplicates are +sorted in less than n log n time. +The code is based closely on the original C++ implementation by Orson Peters (see References). + +Characteristics: + - *not stable* does not preserve the ordering of elements which + compare equal (e.g. "a" and "A" in a sort of letters which + ignores case). + - *in-place* in memory. + - *`n log n` garuanteed runtime* by falling back to heapsort for pathological inputs. + +## References + - https://arxiv.org/pdf/2106.05123.pdf + - https://github.com/orlp/pdqsort +""" +const BranchyPatternDefeatingQuicksort = BranchyPatternDefeatingQuicksortAlg() +const BranchyPdqSort = BranchyPatternDefeatingQuicksort + +""" + BranchlessPatternDefeatingQuicksortAlg + +Quicksort with improved performance on special input patterns. + +Presorted inputs (including reverse and almost presorted ones), as well as inputs with many duplicates are +sorted in less than n log n time. Uses branchless block partitioning scheme, which is faster for simple types. +The code is based closely on the original C++ implementation by Orson Peters (see References). + +Characteristics: + - *not stable* does not preserve the ordering of elements which + compare equal (e.g. "a" and "A" in a sort of letters which + ignores case). + - *constant* auxilary memory (approximately 1KiB on 64-bit systems). + - *`n log n` garuanteed runtime* by falling back to heapsort for pathological inputs. + +## References + - https://arxiv.org/pdf/2106.05123.pdf + - https://github.com/orlp/pdqsort + - https://dl.acm.org/doi/10.1145/3274660 + - http://arxiv.org/abs/1604.06697 + +""" +const BranchlessPatternDefeatingQuicksort = BranchlessPatternDefeatingQuicksortAlg() +const BranchlessPdqSort = BranchlessPatternDefeatingQuicksort + +const PDQ_SMALL_THRESHOLD = 32 +const PDQ_NINTHER_THRESHOLD = 128 +const PDQ_PARTIAL_INSERTION_SORT_LIMIT = 8 +const PDQ_BLOCK_SIZE = 64 ## Heap sort @@ -631,4 +689,494 @@ else end end +""" + unguarded_insertion_sort!(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering) + +Sorts v[lo:hi] using insertion sort with the given ordering. Assumes +v[lo-1] is an element smaller than or equal to any element in v[lo:hi]. +""" +function unguarded_insertion_sort!(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering) + lo_plus_1 = (lo + 1)::Integer + @inbounds for i = lo_plus_1:hi + j = i + x = v[i] + while true + y = v[j-1] + if !(lt(o, x, y)::Bool) + break + end + v[j] = y + j -= 1 + end + v[j] = x + end +end + +""" + partial_insertion_sort!(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering) + +Attempts to use insertion sort on v[lo:hi]. Will return false if more than +PDQ_PARTIAL_INSERTION_SORT_LIMIT elements were moved, and abort sorting. Otherwise it will +successfully sort and return true. +""" +function partial_insertion_sort!(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering) + limit = 0 + lo_plus_1 = (lo + 1)::Integer + @inbounds for i = lo_plus_1:hi + j = i + x = v[i] + while j > lo + y = v[j-1] + if !(lt(o, x, y)::Bool) + break + end + v[j] = y + j -= 1 + end + v[j] = x + limit += i - j + limit > PDQ_PARTIAL_INSERTION_SORT_LIMIT && return false + end + return true +end + +""" + partition_right!(v::AbstractVector, lo::Integer, hi::Integer, a::BranchlessPatternDefeatingQuicksortAlg, o::Ordering, offsets_l::AbstractVector{Integer}, offsets_r::AbstractVector{Integer}) + +Partitions v[lo:hi] around pivot v[lo] using ordering o. + +Elements equal to the pivot are put in the right-hand partition. Returns the position of the pivot +after partitioning and whether the passed sequence already was correctly partitioned. Assumes the +pivot is a median of at least 3 elements and that v[lo:hi] is at least PDQ_SMALL_THRESHOLD long. +Uses branchless partitioning. +""" +function partition_right!(v::AbstractVector, lo::Integer, hi::Integer, a::BranchlessPatternDefeatingQuicksortAlg, o::Ordering, offsets_l::AbstractVector{Int}, offsets_r::AbstractVector{Int}) + # input: + # v[lo] -> pivot + # output: + # v[lo:pivot_position-1] < pivot + # v[pivot_position] == pivot + # v[pivot_position+1:hi] >= pivot + @inbounds begin + pivot = v[lo] + + # swap pointers + # v[lo] is pivot -> start at lo + 1 + left = lo + 1 + right = hi + # Find the first element greater than or equal than the pivot (the median of 3 guarantees + # this exists). + while lt(o, v[left], pivot) + left += 1 + end + # Find the first element strictly smaller than the pivot. We have to guard this search if + # there was no element before v[left]. + if left - 1 == lo + while left < right && !lt(o, v[right], pivot) + right -= 1 + end + else + while !lt(o, v[right], pivot) + right -= 1 + end + end + + # If the first pair of elements that should be swapped to partition are the same element, + # the passed in sequence already was correctly partitioned. + was_already_partitioned = left >= right + if !was_already_partitioned + v[left], v[right] = v[right], v[left] + left += 1 + right -= 1 + + offsets_l_base = left + offsets_r_base = right + start_l = 0; start_r = 0 + num_l = 0; num_r = 0 + + while left < right + 1 + # Fill up offset blocks with elements that are on the wrong side. + # First we determine how much elements are considered for each offset block. + num_unknown = right - left + 1 + left_split = num_l == 0 ? (num_r == 0 ? num_unknown ÷ 2 : num_unknown) : 0 + right_split = num_r == 0 ? (num_unknown - left_split) : 0 + + # Fill the offset blocks. + if left_split >= PDQ_BLOCK_SIZE + i = 0 + while i < PDQ_BLOCK_SIZE + Cartesian.@nexprs 8 _ -> + begin + offsets_l[num_l+1] = i + num_l += Int(!lt(o, v[left], pivot)) + left += 1 + i += 1 + end + end + else + for i in 0:left_split-1 + offsets_l[num_l+1] = i + num_l += Int(!lt(o, v[left], pivot)) + left += 1 + end + end + if right_split >= PDQ_BLOCK_SIZE + i = 0 + while i < PDQ_BLOCK_SIZE + Cartesian.@nexprs 8 _ -> + begin + offsets_r[num_r+1] = i + num_r += Int(lt(o, v[right], pivot)) + right -= 1 + i += 1 + end + end + else + for i in 0:right_split-1 + offsets_r[num_r+1] = i + num_r += Int(lt(o, v[right], pivot)) + right -= 1 + end + end + + # Swap elements and update block sizes and left/right boundaries. + num = min(num_l, num_r) + for i = 1:num + swap!(v, offsets_l_base + offsets_l[i+start_l], offsets_r_base - offsets_r[i+start_r]) + end + num_l -= num; num_r -= num + start_l += num; start_r += num + + if num_l == 0 + start_l = 0 + offsets_l_base = left + end + + if num_r == 0 + start_r = 0 + offsets_r_base = right + end + end + + # We have now fully identified [left, right)'s proper position. Swap the last elements. + if num_l > 0 + while num_l > 0 + swap!(v, offsets_l_base + offsets_l[start_l+num_l], right) + num_l -= 1 + right -= 1 + end + left = right + 1 + end + if num_r > 0 + while num_r > 0 + swap!(v, left, offsets_r_base - offsets_r[start_r+num_r]) + num_r -= 1 + left += 1 + end + right = left + end + + end + + # Put the pivot in the right place. + pivot_position = left - 1 + v[lo] = v[pivot_position] + v[pivot_position] = pivot + end + return pivot_position, was_already_partitioned +end + +""" + partition_right!(v::AbstractVector, lo::Integer, hi::Integer, a::BranchyPatternDefeatingQuicksortAlg, o::Ordering, _, _) + +Partitions v[lo:hi] around pivot v[lo] using ordering o. + +Elements equal to the pivot are put in the right-hand partition. Returns the position of the pivot +after partitioning and whether the passed sequence already was correctly partitioned. Assumes the +pivot is a median of at least 3 elements and that v[lo:hi] is at least PDQ_SMALL_THRESHOLD long. +""" +function partition_right!(v::AbstractVector, lo::Integer, hi::Integer, a::BranchyPatternDefeatingQuicksortAlg, o::Ordering, _, _) + # input: + # v[lo] -> pivot + # output: + # v[lo:pivot_position-1] < pivot + # v[pivot_position] == pivot + # v[pivot_position+1:hi] >= pivot + @inbounds begin + pivot = v[lo] + + # swap pointers + # v[lo] is pivot + left = lo + 1 + right = hi + # Find the left element greater than or equal than the pivot (the median of 3 guarantees + # this exists). + while lt(o, v[left], pivot) + left += 1 + end + # Find the first element strictly smaller than the pivot. We have to guard this search if + # there was no element before v[left]. + if left - 1 == lo + while left < right && !lt(o, v[right], pivot) + right -= 1 + end + else + while !lt(o, v[right], pivot) + right -= 1 + end + end + + # If the first pair of elements that should be swapped to partition are the same element, + # the passed in sequence already was correctly partitioned. + was_already_partitioned = left >= right + + # Keep swapping pairs of elements that are on the wrong side of the pivot. Previously + # swapped pairs guard the searches, which is why the first iteration is special-cased + # above. + while left < right + swap!(v, left, right) + left += 1 + right -= 1 + while lt(o, v[left], pivot) + left += 1 + end + while !lt(o, v[right], pivot) + right -= 1 + end + end + + # Put the pivot in the right place. + pivot_position = left - 1 + v[lo] = v[pivot_position] + v[pivot_position] = pivot + + end + return pivot_position, was_already_partitioned + +end + +""" + partition_left!(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering) + +Partitions v[lo:hi] around pivot v[lo] using ordering o. + +Similar function to the one above, except elements equal to the pivot are put to the left of +the pivot and it doesn't check or return if the passed sequence already was partitioned. +Since this is rarely used (the many equal case), and in that case pdqsort already has O(n) +performance, no block quicksort is applied here for simplicity. +""" +function partition_left!(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering) + # input: + # v[hi] -> pivot + # output: + # v[lo:pivot_position-1] <= pivot + # v[pivot_position] == pivot + # v[pivot_position+1:hi] > pivot + + @inbounds begin + pivot = v[lo] + left = lo + 1 + right = hi + + while lt(o, pivot, v[right]) + right -= 1 + end + if right == hi + while left < right && !lt(o, pivot, v[left]) + left += 1 + end + else + while !lt(o, pivot, v[left]) + left += 1 + end + end + + while left < right + swap!(v, left, right) + while lt(o, pivot, v[right]) + right -= 1 + end + while !lt(o, pivot, v[left]) + left += 1 + end + end + + # Put the pivot in the right place. + pivot_position = right + v[lo] = v[pivot_position] + v[pivot_position] = pivot + end + return pivot_position + +end + +# midpoint was added to Base.sort in version 1.4 and later moved to Base +# -> redefine for compatibility with earlier versions +_midpoint(lo::Integer, hi::Integer) = lo + ((hi - lo) >>> 0x01) + +# modified from Base.sort +@inline function selectpivot!(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering) + @inbounds begin + # use hi+1 to ensure reverse sorted list is swapped perfectly + mi = _midpoint(lo, hi+1) + # sort v[mi] <= v[lo] <= v[hi] such that the pivot is immediately in place + if lt(o, v[lo], v[mi]) + v[mi], v[lo] = v[lo], v[mi] + end + + if lt(o, v[hi], v[lo]) + if lt(o, v[hi], v[mi]) + v[hi], v[lo], v[mi] = v[lo], v[mi], v[hi] + else + v[hi], v[lo] = v[lo], v[hi] + end + end + end +end + +@inline function swap!(v::AbstractVector, i::Integer, j::Integer) + v[i], v[j] = v[j], v[i] +end + +@inline function sort2!(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering) + lt(o, v[hi], v[lo]) && swap!(v, lo, hi) +end + +@inline function sort3!(v::AbstractVector, lo::Integer, mid::Integer, hi::Integer, o::Ordering) + sort2!(v, lo, mid, o) + sort2!(v, mid, hi, o) + sort2!(v, lo, mid, o) +end + +@inline function selectpivot_ninther(v::AbstractVector, lo::Integer, hi::Integer, len::Integer, o::Ordering) + s2 = len ÷ 2 + sort3!(v, lo, lo + s2, hi, o) + sort3!(v, lo + 1, lo + (s2 - 1), hi - 1, o) + sort3!(v, lo + 2, lo + (s2 + 1), hi - 2, o) + sort3!(v, lo + (s2 - 1), lo + s2, lo + (s2 + 1), o) + swap!(v, lo, lo + s2) +end + +pdqsort_loop!(v::AbstractVector, lo::Integer, hi::Integer, a::BranchlessPatternDefeatingQuicksortAlg, o::Ordering, bad_allowed::Integer, offsets_l::Nothing, offsets_r::Nothing, leftmost=true) = +pdqsort_loop!(v, lo, hi, a, o, bad_allowed, MVector{PDQ_BLOCK_SIZE, Int}(undef), MVector{PDQ_BLOCK_SIZE, Int}(undef), leftmost) + +function pdqsort_loop!(v::AbstractVector, lo::Integer, hi::Integer, a::PatternDefeatingQuicksortAlg, o::Ordering, bad_allowed::Integer, offsets_l, offsets_r, leftmost=true) + # Use a while loop for tail recursion elimination. + @inbounds while true + len = hi - lo + 1 + # Insertion sort is faster for small arrays. + if len < PDQ_SMALL_THRESHOLD + if leftmost + Base.Sort._sort!(v, InsertionSort, o, (;lo, hi)) + else + unguarded_insertion_sort!(v, lo, hi, o) + end + return + end + + # Choose pivot as median of 3 or pseudomedian of 9. + if len > PDQ_NINTHER_THRESHOLD + selectpivot_ninther(v, lo, hi, len, o) + else + selectpivot!(v, lo, hi, o) + end + # If v[lo - 1] is the end of the right partition of a previous partition operation + # there is no element in [begin, end) that is smaller than v[lo - 1]. Then if our + # pivot compares equal to v[lo - 1] we change strategy, putting equal elements in + # the left partition, greater elements in the right partition. We do not have to + # recurse on the left partition, since it's sorted (all equal). + if !leftmost && !lt(o, v[lo-1], v[lo]) + lo = partition_left!(v, lo, hi, o) + 1 + continue + end + + # Partition and get results. + pivot_pos, was_already_partitioned = partition_right!(v, lo, hi, a, o, offsets_l, offsets_r) + + # Check for a highly unbalanced partition. + l_len = pivot_pos - lo; + r_len = hi - (pivot_pos + 1); + is_highly_unbalanced = l_len < len ÷ 8 || r_len < len ÷ 8 + + # If we got a highly unbalanced partition we shuffle elements to break many patterns. + if is_highly_unbalanced + # If we had too many bad partitions, switch to heapsort to guarantee O(n log n). + bad_allowed -= 1 + if bad_allowed == 0 + sort!(v, lo, hi, SortingAlgorithms.HeapSort, o) + return + end + + if l_len > PDQ_SMALL_THRESHOLD + swap!(v, lo, lo + l_len ÷ 4) + swap!(v, pivot_pos - 1, pivot_pos - l_len ÷ 4) + + if (l_len > PDQ_NINTHER_THRESHOLD) + swap!(v, lo + 1, lo + (l_len ÷ 4 + 1)) + swap!(v, lo + 2, lo + (l_len ÷ 4 + 2)) + swap!(v, pivot_pos - 2, pivot_pos - (l_len ÷ 4 + 1)) + swap!(v, pivot_pos - 3, pivot_pos - (l_len ÷ 4 + 2)) + end + end + + if r_len > PDQ_SMALL_THRESHOLD + swap!(v, pivot_pos + 1, pivot_pos + (1 + r_len ÷ 4)) + swap!(v, hi, hi - r_len ÷ 4) + + if (r_len > PDQ_NINTHER_THRESHOLD) + swap!(v, pivot_pos + 2, pivot_pos + (2 + r_len ÷ 4)) + swap!(v, pivot_pos + 3, pivot_pos + (3 + r_len ÷ 4)) + swap!(v, hi - 1 , hi - 1 - r_len ÷ 4) + swap!(v, hi - 2, hi - 2 - r_len ÷ 4) + end + end + else + # If we were decently balanced and we tried to sort an already partitioned + # sequence try to use insertion sort. + if was_already_partitioned && + partial_insertion_sort!(v, lo, pivot_pos, o) && + partial_insertion_sort!(v, pivot_pos + 1, hi, o) + return + end + end + + # Sort the left partition first using recursion and do tail recursion elimination for + # the right-hand partition. + pdqsort_loop!(v, lo, pivot_pos-1, a, o, bad_allowed, offsets_l, offsets_r, leftmost) + lo = pivot_pos + 1 + leftmost = false + end +end + +# integer logarithm base two, ignoring sign +function log2i(n::Integer) + sizeof(n) << 3 - leading_zeros(abs(n)) +end + +sort!(v::AbstractVector, lo::Int, hi::Int, a::PatternDefeatingQuicksortAlg, o::Ordering) = +pdqsort_loop!(v, lo, hi, a, o, log2i(hi + 1 - lo), nothing, nothing) + +#= +This implementation of pattern-defeating quicksort is based on the original code from Orson Peters, +available at https://github.com/orlp/pdqsort. +Original license notice: +""" +Copyright (c) 2021 Orson Peters + +This software is provided 'as-is', without any express or implied warranty. In no event will the +authors be held liable for any damages arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, including commercial +applications, and to alter it and redistribute it freely, subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the + original software. If you use this software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + +2. Altered source versions must be plainly marked as such, and must not be misrepresented as + being the original software. + +3. This notice may not be removed or altered from any source distribution. +""" +=# end # module diff --git a/test/runtests.jl b/test/runtests.jl index 5738105..91bd6b7 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,7 +5,7 @@ using Random a = rand(1:10000, 1000) -for alg in [TimSort, HeapSort, RadixSort, CombSort] +for alg in [TimSort, HeapSort, RadixSort, CombSort, BranchyPatternDefeatingQuicksort, BranchlessPatternDefeatingQuicksort] b = sort(a, alg=alg) @test issorted(b) ix = sortperm(a, alg=alg) @@ -85,7 +85,7 @@ for n in [0:10..., 100, 101, 1000, 1001] end # unstable algorithms - for alg in [HeapSort, CombSort] + for alg in [HeapSort, CombSort, BranchyPatternDefeatingQuicksort, BranchlessPatternDefeatingQuicksort] p = sortperm(v, alg=alg, order=ord) @test isperm(p) @test v[p] == si @@ -99,7 +99,7 @@ for n in [0:10..., 100, 101, 1000, 1001] v = randn_with_nans(n,0.1) for ord in [Base.Order.Forward, Base.Order.Reverse], - alg in [TimSort, HeapSort, RadixSort, CombSort] + alg in [TimSort, HeapSort, RadixSort, CombSort, BranchyPatternDefeatingQuicksort, BranchlessPatternDefeatingQuicksort] # test float sorting with NaNs s = sort(v, alg=alg, order=ord) @test issorted(s, order=ord) From a53f9894f5cad387d6566abb3c530f54206dc698 Mon Sep 17 00:00:00 2001 From: Lukas Schwerdt Date: Sat, 11 Feb 2023 12:38:43 +0100 Subject: [PATCH 02/10] Use backwards-compatible way to call insertionsort --- src/SortingAlgorithms.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SortingAlgorithms.jl b/src/SortingAlgorithms.jl index a70a708..51fac46 100644 --- a/src/SortingAlgorithms.jl +++ b/src/SortingAlgorithms.jl @@ -1067,7 +1067,7 @@ function pdqsort_loop!(v::AbstractVector, lo::Integer, hi::Integer, a::PatternDe # Insertion sort is faster for small arrays. if len < PDQ_SMALL_THRESHOLD if leftmost - Base.Sort._sort!(v, InsertionSort, o, (;lo, hi)) + sort!(v, lo, hi, InsertionSort, o) else unguarded_insertion_sort!(v, lo, hi, o) end From 82a9d52f1f3efe7bb4e20e6b85beb482639cc01c Mon Sep 17 00:00:00 2001 From: Lukas Schwerdt Date: Sat, 11 Feb 2023 12:49:03 +0100 Subject: [PATCH 03/10] Return sorted vector --- src/SortingAlgorithms.jl | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/SortingAlgorithms.jl b/src/SortingAlgorithms.jl index 51fac46..26ae1d2 100644 --- a/src/SortingAlgorithms.jl +++ b/src/SortingAlgorithms.jl @@ -710,6 +710,7 @@ function unguarded_insertion_sort!(v::AbstractVector, lo::Integer, hi::Integer, end v[j] = x end + v end """ @@ -952,7 +953,6 @@ function partition_right!(v::AbstractVector, lo::Integer, hi::Integer, a::Branch end return pivot_position, was_already_partitioned - end """ @@ -1007,7 +1007,6 @@ function partition_left!(v::AbstractVector, lo::Integer, hi::Integer, o::Orderin v[pivot_position] = pivot end return pivot_position - end # midpoint was added to Base.sort in version 1.4 and later moved to Base @@ -1071,7 +1070,7 @@ function pdqsort_loop!(v::AbstractVector, lo::Integer, hi::Integer, a::PatternDe else unguarded_insertion_sort!(v, lo, hi, o) end - return + return v end # Choose pivot as median of 3 or pseudomedian of 9. @@ -1104,7 +1103,7 @@ function pdqsort_loop!(v::AbstractVector, lo::Integer, hi::Integer, a::PatternDe bad_allowed -= 1 if bad_allowed == 0 sort!(v, lo, hi, SortingAlgorithms.HeapSort, o) - return + return v end if l_len > PDQ_SMALL_THRESHOLD @@ -1136,7 +1135,7 @@ function pdqsort_loop!(v::AbstractVector, lo::Integer, hi::Integer, a::PatternDe if was_already_partitioned && partial_insertion_sort!(v, lo, pivot_pos, o) && partial_insertion_sort!(v, pivot_pos + 1, hi, o) - return + return v end end From bd4f35177a34cf7ca0f99d854ca5bbae88451b5e Mon Sep 17 00:00:00 2001 From: Lukas Schwerdt Date: Tue, 14 Feb 2023 16:11:20 +0100 Subject: [PATCH 04/10] Add initial optimizations --- src/SortingAlgorithms.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/SortingAlgorithms.jl b/src/SortingAlgorithms.jl index 26ae1d2..383bebd 100644 --- a/src/SortingAlgorithms.jl +++ b/src/SortingAlgorithms.jl @@ -76,7 +76,7 @@ Characteristics: - https://arxiv.org/pdf/2106.05123.pdf - https://github.com/orlp/pdqsort """ -const BranchyPatternDefeatingQuicksort = BranchyPatternDefeatingQuicksortAlg() +const BranchyPatternDefeatingQuicksort = maybe_optimize(BranchyPatternDefeatingQuicksortAlg()) const BranchyPdqSort = BranchyPatternDefeatingQuicksort """ @@ -102,7 +102,7 @@ Characteristics: - http://arxiv.org/abs/1604.06697 """ -const BranchlessPatternDefeatingQuicksort = BranchlessPatternDefeatingQuicksortAlg() +const BranchlessPatternDefeatingQuicksort = maybe_optimize(BranchlessPatternDefeatingQuicksortAlg()) const BranchlessPdqSort = BranchlessPatternDefeatingQuicksort const PDQ_SMALL_THRESHOLD = 32 From fdd73c508885492e50deb9053cecbd6e9c2e97f6 Mon Sep 17 00:00:00 2001 From: Lukas Schwerdt Date: Wed, 15 Feb 2023 14:37:40 +0100 Subject: [PATCH 05/10] Use HeapSort without initial optimization --- src/SortingAlgorithms.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/SortingAlgorithms.jl b/src/SortingAlgorithms.jl index 383bebd..b595295 100644 --- a/src/SortingAlgorithms.jl +++ b/src/SortingAlgorithms.jl @@ -1101,8 +1101,8 @@ function pdqsort_loop!(v::AbstractVector, lo::Integer, hi::Integer, a::PatternDe if is_highly_unbalanced # If we had too many bad partitions, switch to heapsort to guarantee O(n log n). bad_allowed -= 1 - if bad_allowed == 0 - sort!(v, lo, hi, SortingAlgorithms.HeapSort, o) + if bad_allowed <= 0 + sort!(v, lo, hi, HeapSortAlg(), o) return v end From e6594ec2cb36f12eea69602782f8067cadb3978c Mon Sep 17 00:00:00 2001 From: Lukas Schwerdt Date: Wed, 15 Feb 2023 14:39:37 +0100 Subject: [PATCH 06/10] Add tests with special inputs for PdqSort --- test/runtests.jl | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/test/runtests.jl b/test/runtests.jl index 91bd6b7..44d32de 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -117,3 +117,17 @@ for n in [0:10..., 100, 101, 1000, 1001] @test reinterpret(UInt64,vp) == reinterpret(UInt64,s) end end + +# additional tests to cover spacial cases of PdqSort +# test partial insertionsort, shuffle elements, partition_left +for v in [[1:1000;10], [1:500;500:-1:1], rand(Int,1000).%4] + for alg in [BranchyPatternDefeatingQuicksort, BranchlessPatternDefeatingQuicksort] + @test issorted(sort(v, alg=alg)) + end +end +# test fallback to HeapSort +let v = [1:500;500:-1:1] + bad_allowed = 1 + SortingAlgorithms.pdqsort_loop!(v, 1, length(v), SortingAlgorithms.BranchyPatternDefeatingQuicksortAlg(), Base.Order.Forward, bad_allowed, nothing, nothing) + @test issorted(v) +end From 76efc098c69226245699bbea9330f78ca8187092 Mon Sep 17 00:00:00 2001 From: Lukas Schwerdt Date: Wed, 15 Feb 2023 22:46:04 +0100 Subject: [PATCH 07/10] Refactor pivot selection - remove selectpivot! function - use m instead of mid - use lo, m, hi in function signatures - midpoint instead of _midpoint --- src/SortingAlgorithms.jl | 51 +++++++++++++--------------------------- 1 file changed, 16 insertions(+), 35 deletions(-) diff --git a/src/SortingAlgorithms.jl b/src/SortingAlgorithms.jl index b595295..e80daf0 100644 --- a/src/SortingAlgorithms.jl +++ b/src/SortingAlgorithms.jl @@ -1011,27 +1011,7 @@ end # midpoint was added to Base.sort in version 1.4 and later moved to Base # -> redefine for compatibility with earlier versions -_midpoint(lo::Integer, hi::Integer) = lo + ((hi - lo) >>> 0x01) - -# modified from Base.sort -@inline function selectpivot!(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering) - @inbounds begin - # use hi+1 to ensure reverse sorted list is swapped perfectly - mi = _midpoint(lo, hi+1) - # sort v[mi] <= v[lo] <= v[hi] such that the pivot is immediately in place - if lt(o, v[lo], v[mi]) - v[mi], v[lo] = v[lo], v[mi] - end - - if lt(o, v[hi], v[lo]) - if lt(o, v[hi], v[mi]) - v[hi], v[lo], v[mi] = v[lo], v[mi], v[hi] - else - v[hi], v[lo] = v[lo], v[hi] - end - end - end -end +midpoint(lo::Integer, hi::Integer) = lo + ((hi - lo) >>> 0x01) @inline function swap!(v::AbstractVector, i::Integer, j::Integer) v[i], v[j] = v[j], v[i] @@ -1041,19 +1021,18 @@ end lt(o, v[hi], v[lo]) && swap!(v, lo, hi) end -@inline function sort3!(v::AbstractVector, lo::Integer, mid::Integer, hi::Integer, o::Ordering) - sort2!(v, lo, mid, o) - sort2!(v, mid, hi, o) - sort2!(v, lo, mid, o) +@inline function sort3!(v::AbstractVector, lo::Integer, m::Integer, hi::Integer, o::Ordering) + sort2!(v, lo, m, o) + sort2!(v, m, hi, o) + sort2!(v, lo, m, o) end -@inline function selectpivot_ninther(v::AbstractVector, lo::Integer, hi::Integer, len::Integer, o::Ordering) - s2 = len ÷ 2 - sort3!(v, lo, lo + s2, hi, o) - sort3!(v, lo + 1, lo + (s2 - 1), hi - 1, o) - sort3!(v, lo + 2, lo + (s2 + 1), hi - 2, o) - sort3!(v, lo + (s2 - 1), lo + s2, lo + (s2 + 1), o) - swap!(v, lo, lo + s2) +@inline function selectpivot_ninther!(v::AbstractVector, lo::Integer, m::Integer, hi::Integer, o::Ordering) + sort3!(v, lo , m , hi , o) + sort3!(v, lo+1, m-1, hi-1, o) + sort3!(v, lo+2, m+1, hi-2, o) + sort3!(v, m-1, m, m+1, o) + swap!(v, lo, m) end pdqsort_loop!(v::AbstractVector, lo::Integer, hi::Integer, a::BranchlessPatternDefeatingQuicksortAlg, o::Ordering, bad_allowed::Integer, offsets_l::Nothing, offsets_r::Nothing, leftmost=true) = @@ -1074,13 +1053,15 @@ function pdqsort_loop!(v::AbstractVector, lo::Integer, hi::Integer, a::PatternDe end # Choose pivot as median of 3 or pseudomedian of 9. + # use hi+1 to ensure reverse sorted list is swapped perfectly + m = midpoint(lo, hi+1) if len > PDQ_NINTHER_THRESHOLD - selectpivot_ninther(v, lo, hi, len, o) + selectpivot_ninther!(v, lo, m, hi, o) else - selectpivot!(v, lo, hi, o) + sort3!(v, m, lo, hi, o) end # If v[lo - 1] is the end of the right partition of a previous partition operation - # there is no element in [begin, end) that is smaller than v[lo - 1]. Then if our + # there is no element in v[lo:hi] that is smaller than v[lo - 1]. Then if our # pivot compares equal to v[lo - 1] we change strategy, putting equal elements in # the left partition, greater elements in the right partition. We do not have to # recurse on the left partition, since it's sorted (all equal). From 8b44ef3335bc1d71d66994be62ab2661154e46f5 Mon Sep 17 00:00:00 2001 From: Lukas Schwerdt Date: Thu, 16 Feb 2023 16:12:35 +0100 Subject: [PATCH 08/10] Use name pivot_index to match ScratchQuickSort --- src/SortingAlgorithms.jl | 86 +++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 46 deletions(-) diff --git a/src/SortingAlgorithms.jl b/src/SortingAlgorithms.jl index e80daf0..1f5a04c 100644 --- a/src/SortingAlgorithms.jl +++ b/src/SortingAlgorithms.jl @@ -752,12 +752,10 @@ pivot is a median of at least 3 elements and that v[lo:hi] is at least PDQ_SMALL Uses branchless partitioning. """ function partition_right!(v::AbstractVector, lo::Integer, hi::Integer, a::BranchlessPatternDefeatingQuicksortAlg, o::Ordering, offsets_l::AbstractVector{Int}, offsets_r::AbstractVector{Int}) - # input: - # v[lo] -> pivot # output: - # v[lo:pivot_position-1] < pivot - # v[pivot_position] == pivot - # v[pivot_position+1:hi] >= pivot + # v[lo:pivot_index-1] < pivot + # v[pivot_index] == pivot + # v[pivot_index+1:hi] >= pivot @inbounds begin pivot = v[lo] @@ -880,11 +878,11 @@ function partition_right!(v::AbstractVector, lo::Integer, hi::Integer, a::Branch end # Put the pivot in the right place. - pivot_position = left - 1 - v[lo] = v[pivot_position] - v[pivot_position] = pivot + pivot_index = left - 1 + v[lo] = v[pivot_index] + v[pivot_index] = pivot end - return pivot_position, was_already_partitioned + return pivot_index, was_already_partitioned end """ @@ -897,12 +895,10 @@ after partitioning and whether the passed sequence already was correctly partiti pivot is a median of at least 3 elements and that v[lo:hi] is at least PDQ_SMALL_THRESHOLD long. """ function partition_right!(v::AbstractVector, lo::Integer, hi::Integer, a::BranchyPatternDefeatingQuicksortAlg, o::Ordering, _, _) - # input: - # v[lo] -> pivot # output: - # v[lo:pivot_position-1] < pivot - # v[pivot_position] == pivot - # v[pivot_position+1:hi] >= pivot + # v[lo:pivot_index-1] < pivot + # v[pivot_index] == pivot + # v[pivot_index+1:hi] >= pivot @inbounds begin pivot = v[lo] @@ -947,12 +943,12 @@ function partition_right!(v::AbstractVector, lo::Integer, hi::Integer, a::Branch end # Put the pivot in the right place. - pivot_position = left - 1 - v[lo] = v[pivot_position] - v[pivot_position] = pivot + pivot_index = left - 1 + v[lo] = v[pivot_index] + v[pivot_index] = pivot end - return pivot_position, was_already_partitioned + return pivot_index, was_already_partitioned end """ @@ -966,12 +962,10 @@ Since this is rarely used (the many equal case), and in that case pdqsort alread performance, no block quicksort is applied here for simplicity. """ function partition_left!(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering) - # input: - # v[hi] -> pivot # output: - # v[lo:pivot_position-1] <= pivot - # v[pivot_position] == pivot - # v[pivot_position+1:hi] > pivot + # v[lo:pivot_index-1] <= pivot + # v[pivot_index] == pivot + # v[pivot_index+1:hi] > pivot @inbounds begin pivot = v[lo] @@ -1002,11 +996,11 @@ function partition_left!(v::AbstractVector, lo::Integer, hi::Integer, o::Orderin end # Put the pivot in the right place. - pivot_position = right - v[lo] = v[pivot_position] - v[pivot_position] = pivot + pivot_index = right + v[lo] = v[pivot_index] + v[pivot_index] = pivot end - return pivot_position + return pivot_index end # midpoint was added to Base.sort in version 1.4 and later moved to Base @@ -1071,11 +1065,11 @@ function pdqsort_loop!(v::AbstractVector, lo::Integer, hi::Integer, a::PatternDe end # Partition and get results. - pivot_pos, was_already_partitioned = partition_right!(v, lo, hi, a, o, offsets_l, offsets_r) + pivot_index, was_already_partitioned = partition_right!(v, lo, hi, a, o, offsets_l, offsets_r) # Check for a highly unbalanced partition. - l_len = pivot_pos - lo; - r_len = hi - (pivot_pos + 1); + l_len = pivot_index - lo; + r_len = hi - (pivot_index + 1); is_highly_unbalanced = l_len < len ÷ 8 || r_len < len ÷ 8 # If we got a highly unbalanced partition we shuffle elements to break many patterns. @@ -1088,42 +1082,42 @@ function pdqsort_loop!(v::AbstractVector, lo::Integer, hi::Integer, a::PatternDe end if l_len > PDQ_SMALL_THRESHOLD - swap!(v, lo, lo + l_len ÷ 4) - swap!(v, pivot_pos - 1, pivot_pos - l_len ÷ 4) + swap!(v, lo, lo + l_len ÷ 4) + swap!(v, pivot_index - 1, pivot_index - l_len ÷ 4) if (l_len > PDQ_NINTHER_THRESHOLD) - swap!(v, lo + 1, lo + (l_len ÷ 4 + 1)) - swap!(v, lo + 2, lo + (l_len ÷ 4 + 2)) - swap!(v, pivot_pos - 2, pivot_pos - (l_len ÷ 4 + 1)) - swap!(v, pivot_pos - 3, pivot_pos - (l_len ÷ 4 + 2)) + swap!(v, lo + 1, lo + (l_len ÷ 4 + 1)) + swap!(v, lo + 2, lo + (l_len ÷ 4 + 2)) + swap!(v, pivot_index - 2, pivot_index - (l_len ÷ 4 + 1)) + swap!(v, pivot_index - 3, pivot_index - (l_len ÷ 4 + 2)) end end if r_len > PDQ_SMALL_THRESHOLD - swap!(v, pivot_pos + 1, pivot_pos + (1 + r_len ÷ 4)) - swap!(v, hi, hi - r_len ÷ 4) + swap!(v, pivot_index + 1, pivot_index + (1 + r_len ÷ 4)) + swap!(v, hi, hi - r_len ÷ 4) if (r_len > PDQ_NINTHER_THRESHOLD) - swap!(v, pivot_pos + 2, pivot_pos + (2 + r_len ÷ 4)) - swap!(v, pivot_pos + 3, pivot_pos + (3 + r_len ÷ 4)) - swap!(v, hi - 1 , hi - 1 - r_len ÷ 4) - swap!(v, hi - 2, hi - 2 - r_len ÷ 4) + swap!(v, pivot_index + 2, pivot_index + (2 + r_len ÷ 4)) + swap!(v, pivot_index + 3, pivot_index + (3 + r_len ÷ 4)) + swap!(v, hi - 1, hi - 1 - r_len ÷ 4) + swap!(v, hi - 2, hi - 2 - r_len ÷ 4) end end else # If we were decently balanced and we tried to sort an already partitioned # sequence try to use insertion sort. if was_already_partitioned && - partial_insertion_sort!(v, lo, pivot_pos, o) && - partial_insertion_sort!(v, pivot_pos + 1, hi, o) + partial_insertion_sort!(v, lo, pivot_index, o) && + partial_insertion_sort!(v, pivot_index + 1, hi, o) return v end end # Sort the left partition first using recursion and do tail recursion elimination for # the right-hand partition. - pdqsort_loop!(v, lo, pivot_pos-1, a, o, bad_allowed, offsets_l, offsets_r, leftmost) - lo = pivot_pos + 1 + pdqsort_loop!(v, lo, pivot_index-1, a, o, bad_allowed, offsets_l, offsets_r, leftmost) + lo = pivot_index + 1 leftmost = false end end From b603963ee41703c7aa56264616e6f441c7a6cb6d Mon Sep 17 00:00:00 2001 From: Lukas Schwerdt Date: Thu, 16 Feb 2023 17:26:31 +0100 Subject: [PATCH 09/10] Add compat for StaticArrays Test results: - 0.8.0: fail - 0.8.1: pass - 0.9.0: pass - 0.9.2: pass - 0.10.0: pass - 0.11.0: pass - 0.12.0: pass - 1.0.0: pass - 1.5.15: pass --- Project.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Project.toml b/Project.toml index 928c4e6..707065d 100644 --- a/Project.toml +++ b/Project.toml @@ -10,6 +10,7 @@ StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" julia = "1" DataStructures = "0.9, 0.10, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18" StatsBase = "0.33" +StaticArrays = "0.8.1, 0.9, 0.10, 0.11, 0.12, 1" [extras] Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" From 98166419cda4f9834ee0dca5ce6e6dd08328d5e4 Mon Sep 17 00:00:00 2001 From: Lukas Schwerdt Date: Fri, 17 Feb 2023 15:28:15 +0100 Subject: [PATCH 10/10] Break patterns pseudorandomly ... in case of a highly unbalanced partition. The randomization is inspired by ScratchQuickSort. --- src/SortingAlgorithms.jl | 50 ++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 28 deletions(-) diff --git a/src/SortingAlgorithms.jl b/src/SortingAlgorithms.jl index 1f5a04c..b9e94d8 100644 --- a/src/SortingAlgorithms.jl +++ b/src/SortingAlgorithms.jl @@ -1029,6 +1029,22 @@ end swap!(v, lo, m) end +@inline function swap3consecutive!(v::AbstractVector, i::Integer, j::Integer) + swap!(v, i, j) + swap!(v, i+1, j+1) + swap!(v, i+2, j+2) +end + +# swap first 3 and last 3 elements each with 3 pseudorandomly chosen consecutive elements from v[lo+3:hi-3] +function breakpatterns!(v::AbstractVector, lo::Integer, hi::Integer) + # correct because hi+1-lo > PDQ_SMALL_THRESHOLD > 8 + len8 = hi - lo - 6 # length minus 8 + idx_lo = typeof(len8)(hash(lo) % len8) + lo + 3 + idx_hi = typeof(len8)(hash(hi) % len8) + lo + 3 + swap3consecutive!(v, lo, idx_lo) + swap3consecutive!(v, hi-2, idx_hi) +end + pdqsort_loop!(v::AbstractVector, lo::Integer, hi::Integer, a::BranchlessPatternDefeatingQuicksortAlg, o::Ordering, bad_allowed::Integer, offsets_l::Nothing, offsets_r::Nothing, leftmost=true) = pdqsort_loop!(v, lo, hi, a, o, bad_allowed, MVector{PDQ_BLOCK_SIZE, Int}(undef), MVector{PDQ_BLOCK_SIZE, Int}(undef), leftmost) @@ -1068,11 +1084,10 @@ function pdqsort_loop!(v::AbstractVector, lo::Integer, hi::Integer, a::PatternDe pivot_index, was_already_partitioned = partition_right!(v, lo, hi, a, o, offsets_l, offsets_r) # Check for a highly unbalanced partition. - l_len = pivot_index - lo; - r_len = hi - (pivot_index + 1); - is_highly_unbalanced = l_len < len ÷ 8 || r_len < len ÷ 8 + len_r = pivot_index - lo; + len_l = hi - pivot_index; + is_highly_unbalanced = len_r < len ÷ 8 || len_l < len ÷ 8 - # If we got a highly unbalanced partition we shuffle elements to break many patterns. if is_highly_unbalanced # If we had too many bad partitions, switch to heapsort to guarantee O(n log n). bad_allowed -= 1 @@ -1080,30 +1095,9 @@ function pdqsort_loop!(v::AbstractVector, lo::Integer, hi::Integer, a::PatternDe sort!(v, lo, hi, HeapSortAlg(), o) return v end - - if l_len > PDQ_SMALL_THRESHOLD - swap!(v, lo, lo + l_len ÷ 4) - swap!(v, pivot_index - 1, pivot_index - l_len ÷ 4) - - if (l_len > PDQ_NINTHER_THRESHOLD) - swap!(v, lo + 1, lo + (l_len ÷ 4 + 1)) - swap!(v, lo + 2, lo + (l_len ÷ 4 + 2)) - swap!(v, pivot_index - 2, pivot_index - (l_len ÷ 4 + 1)) - swap!(v, pivot_index - 3, pivot_index - (l_len ÷ 4 + 2)) - end - end - - if r_len > PDQ_SMALL_THRESHOLD - swap!(v, pivot_index + 1, pivot_index + (1 + r_len ÷ 4)) - swap!(v, hi, hi - r_len ÷ 4) - - if (r_len > PDQ_NINTHER_THRESHOLD) - swap!(v, pivot_index + 2, pivot_index + (2 + r_len ÷ 4)) - swap!(v, pivot_index + 3, pivot_index + (3 + r_len ÷ 4)) - swap!(v, hi - 1, hi - 1 - r_len ÷ 4) - swap!(v, hi - 2, hi - 2 - r_len ÷ 4) - end - end + # If we got a highly unbalanced partition we shuffle elements to break adverse patterns. + len_r > PDQ_SMALL_THRESHOLD && breakpatterns!(v, lo, pivot_index - 1) + len_l > PDQ_SMALL_THRESHOLD && breakpatterns!(v, pivot_index + 1, hi) else # If we were decently balanced and we tried to sort an already partitioned # sequence try to use insertion sort.