From b14d80f489b864b1ef484505c55595c31526901b Mon Sep 17 00:00:00 2001 From: morgen-code <229867032+morgen-code@users.noreply.github.com> Date: Mon, 6 Oct 2025 16:06:38 +0900 Subject: [PATCH 1/2] Create flash_sort.py This pull request adds an implementation of the Flash Sort algorithm in `sorts/flash_sort.py`. **Algorithm overview:** Flash Sort is a distribution-based sorting algorithm especially efficient for large datasets with elements that are uniformly distributed. Its main idea is to classify elements into buckets (classes) using a linear transformation, rearrange the array in-place using a cycle leader permutation, and finally apply insertion sort within each class for local ordering. **Implementation details:** - The number of classes (buckets) is empirically set to `int(0.43 * n)` (where `n` is the length of the array), following recommendations from the original paper and Wikipedia. This balance helps avoid both oversparse and overcrowded buckets. - The implementation includes detailed comments and uses descriptive variable names for clarity. - The function returns a new sorted list and does not modify the input array in-place. **Reference:** - [Wikipedia: Flashsort](https://en.wikipedia.org/wiki/Flashsort) **Use cases:** Most efficient when data is numeric and uniformly distributed. For other distributions, performance may degrade. Closes #13203 --- sorts/flash_sort.py | 79 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 sorts/flash_sort.py diff --git a/sorts/flash_sort.py b/sorts/flash_sort.py new file mode 100644 index 000000000000..d56b5b1a0eaa --- /dev/null +++ b/sorts/flash_sort.py @@ -0,0 +1,79 @@ +# Flash Sort Algorithm +# +# Flash Sort is a distribution sorting algorithm designed for large arrays with elements +# that are relatively uniformly distributed. The algorithm can achieve close to O(n) time +# complexity under favorable conditions. +# +# Main steps: +# 1. Find the minimum and maximum values in the array. +# 2. Choose the number of classes ("buckets") m. The typical choice is m = int(0.43 * n), +# where n is the array length. The constant 0.43 is an empirical value shown by the +# original paper and Wikipedia to provide good performance in practice. The goal is +# to have enough classes to distribute elements evenly, but not so many that classes +# become sparse. +# 3. Classify each element into one of the m classes using a linear mapping from the +# value range to class indices. +# 4. Compute prefix sums of the class counts to determine the class boundaries. +# 5. Rearrange (permute) elements in-place so that all elements belonging to the same +# class are grouped together. This is performed using a cycle leader algorithm. +# 6. For each class, perform a final sorting step (usually insertion sort), because +# elements within a class are not guaranteed to be sorted. +# +# Reference: +# https://en.wikipedia.org/wiki/Flashsort + +def flash_sort(array): + """ + Flash Sort algorithm. + + Flash Sort is a distribution sorting algorithm that achieves linear time complexity O(n) + for uniformly distributed data sets using relatively little additional memory. + See: https://en.wikipedia.org/wiki/Flashsort + + Args: + array (list): List of numeric values to be sorted. + + Returns: + list: Sorted list. + """ + n = len(array) + if n == 0: + return array.copy() + + min_value = min(array) + max_value = max(array) + if min_value == max_value: + return array.copy() + + # Step 2: Choose the number of classes (buckets) + # Empirically, 0.43 * n gives good performance; see Wikipedia and original papers. + number_of_classes = max(int(0.43 * n), 2) + class_boundaries = [0] * number_of_classes + + # Step 3: Classify elements into classes (buckets) + class_coefficient = (number_of_classes - 1) / (max_value - min_value) + for value in array: + class_index = int(class_coefficient * (value - min_value)) + class_boundaries[class_index] += 1 + + # Step 4: Compute prefix sums for class boundaries + for i in range(1, number_of_classes): + class_boundaries[i] += class_boundaries[i - 1] + + # Step 5: Permute elements into correct classes (cycle leader permutation) + sorted_array = [0] * n + for value in reversed(array): + class_index = int(class_coefficient * (value - min_value)) + class_boundaries[class_index] -= 1 + sorted_array[class_boundaries[class_index]] = value + + # Step 6: Final insertion sort within the sorted array + for i in range(1, n): + key = sorted_array[i] + j = i - 1 + while j >= 0 and sorted_array[j] > key: + sorted_array[j + 1] = sorted_array[j] + j -= 1 + sorted_array[j + 1] = key + + return sorted_array From 79932aa0e68df0d1066c94168e68f103c68019af Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 6 Oct 2025 07:55:31 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- sorts/flash_sort.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sorts/flash_sort.py b/sorts/flash_sort.py index d56b5b1a0eaa..0b059b7be493 100644 --- a/sorts/flash_sort.py +++ b/sorts/flash_sort.py @@ -1,5 +1,5 @@ # Flash Sort Algorithm -# +# # Flash Sort is a distribution sorting algorithm designed for large arrays with elements # that are relatively uniformly distributed. The algorithm can achieve close to O(n) time # complexity under favorable conditions. @@ -22,6 +22,7 @@ # Reference: # https://en.wikipedia.org/wiki/Flashsort + def flash_sort(array): """ Flash Sort algorithm.