From 4a6d37c707077ad07e300362985813e8b3974d78 Mon Sep 17 00:00:00 2001 From: yeoshuheng Date: Sat, 23 Sep 2023 00:49:10 +0800 Subject: [PATCH 1/9] docs: readme for radix sort --- .../algorithms/sorting/radixSort/README.md | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 src/main/java/algorithms/sorting/radixSort/README.md diff --git a/src/main/java/algorithms/sorting/radixSort/README.md b/src/main/java/algorithms/sorting/radixSort/README.md new file mode 100644 index 00000000..b8e179dd --- /dev/null +++ b/src/main/java/algorithms/sorting/radixSort/README.md @@ -0,0 +1,40 @@ +# Radix Sort + +Radix Sort is a non-comparison based, stable sorting algorithm. +We first treat each element as a string with *w* digits (Padding elements that have +less than *w* digits). + +From the least-significant digit to the most-significant digit, we constantly +split them into ten queues corresponding to the number range *[0, 9]*. We then move +through the queue and concatenate the elements back into a list at the next iteration. + +This takes advantage of the concept of place value. +(The value of a digit in a number relative to its position within the number) + +![Radix Sort](https://miro.medium.com/v2/resize:fit:661/1*xFnpQ4UNK0TvyxiL8r1svg.png) + +*Source: Level Up Coding* + +## Complexity Analysis +**Time**: +Note that we will always need to iterate through 10 different queues to rebuild our original array, +and we iterate through all *w* positions, this results in: + +- Worst case: O(w * (n + 10)) +- Average case: O(w * (n + 10)) +- Best case (sorted array): O(w * (n + 10)) + +**Space**: O(n + k) + +## Notes +- Radix sort's time complexity is dependent on the maximum number of digits in each element, +hence it is ideal to use it on integers with a large range and with little digits. +- This could mean that Radix Sort might end up performing worst on small sets of data +if any one given element has a in-proportionate amount of digits. +- Counting sort is used as a sub-routine within the Radix Sort process. + +### Common Misconception +- While not immediately obvious, we can see that radix sort is a stable sorting algorithm as + they are enqueued in a manner where the first observed element will be at the head of the queue. +- While it is non-comparison based, not that total ordering of elements is still required - + except now this property is forced upon the algorithm in the manner of the queues. \ No newline at end of file From 694b75ef359ecfa24a7fef15ddcd012761bb8f04 Mon Sep 17 00:00:00 2001 From: yeoshuheng Date: Sat, 23 Sep 2023 00:50:38 +0800 Subject: [PATCH 2/9] docs: update readme for radix sort --- src/main/java/algorithms/sorting/radixSort/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/algorithms/sorting/radixSort/README.md b/src/main/java/algorithms/sorting/radixSort/README.md index b8e179dd..719f4de0 100644 --- a/src/main/java/algorithms/sorting/radixSort/README.md +++ b/src/main/java/algorithms/sorting/radixSort/README.md @@ -24,7 +24,7 @@ and we iterate through all *w* positions, this results in: - Average case: O(w * (n + 10)) - Best case (sorted array): O(w * (n + 10)) -**Space**: O(n + k) +**Space**: O(n + 10) ## Notes - Radix sort's time complexity is dependent on the maximum number of digits in each element, From 34742331a6845895f72d941e4d09013874b30b16 Mon Sep 17 00:00:00 2001 From: yeoshuheng Date: Sat, 23 Sep 2023 00:57:40 +0800 Subject: [PATCH 3/9] feat: junit test for radixsort --- .../sorting/radixSort/RadixSortTest.java | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 src/test/java/algorithms/sorting/radixSort/RadixSortTest.java diff --git a/src/test/java/algorithms/sorting/radixSort/RadixSortTest.java b/src/test/java/algorithms/sorting/radixSort/RadixSortTest.java new file mode 100644 index 00000000..c80bfbc9 --- /dev/null +++ b/src/test/java/algorithms/sorting/radixSort/RadixSortTest.java @@ -0,0 +1,39 @@ +package algorithms.sorting.radixSort; + +import static org.junit.Assert.assertArrayEquals; +import org.junit.Test; +import java.util.Arrays; +public class RadixSortTest { + @Test + public void test_radixSort_shouldReturnSortedArray() { + int[] firstArray = + new int[] {2, 3, 4, 1, 2, 5, 6, 7, 10, 15, 20, 13, 15, 1, 2, + 15, 12, 20, 21, 120, 11, 5, 7, 85, 30}; + int[] firstResult = Arrays.copyOf(firstArray, firstArray.length); + RadixSort.radixSort(firstResult); + + int[] secondArray + = new int[] {9, 1, 2, 8, 7, 3, 4, 6, 5, 5, 9, 8, 7, 6, 5, 4, + 3, 2, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + int[] secondResult =Arrays.copyOf(secondArray, secondArray.length); + RadixSort.radixSort(secondResult); + + int[] thirdArray = new int[] {}; + int[] thirdResult = Arrays.copyOf(thirdArray, thirdArray.length); + RadixSort.radixSort(thirdResult); + + int[] fourthArray = new int[] {1}; + int[] fourthResult = Arrays.copyOf(fourthArray, fourthArray.length); + RadixSort.radixSort(fourthResult); + + Arrays.sort(firstArray); + Arrays.sort(secondArray); + Arrays.sort(thirdArray); + Arrays.sort(fourthArray); + + assertArrayEquals(firstResult, firstArray); + assertArrayEquals(secondResult, secondArray); + assertArrayEquals(thirdResult, thirdArray); + assertArrayEquals(fourthResult, fourthArray); + } +} From 6d8381b06f4ba43a6912df58a2e074ade173fca6 Mon Sep 17 00:00:00 2001 From: yeoshuheng Date: Sat, 23 Sep 2023 01:19:48 +0800 Subject: [PATCH 4/9] fix: checkstyle errors --- .../sorting/radixSort/RadixSort.java | 102 ++++++++++-------- .../sorting/radixSort/RadixSortTest.java | 14 +-- 2 files changed, 66 insertions(+), 50 deletions(-) diff --git a/src/main/java/algorithms/sorting/radixSort/RadixSort.java b/src/main/java/algorithms/sorting/radixSort/RadixSort.java index f09de775..c5027d31 100644 --- a/src/main/java/algorithms/sorting/radixSort/RadixSort.java +++ b/src/main/java/algorithms/sorting/radixSort/RadixSort.java @@ -1,53 +1,69 @@ package algorithms.sorting.radixSort; /** - * Implementation of Radix sort. - * O((b/r)*(N + 2^r)) - * where N is the number of integers, - * b is the total number of bits (32 bits for int), - * and r is the number of bits for each segment. - * Space: O(N) auxiliary space. - */ +* This class implements a Radix Sort Algorithm. +*/ public class RadixSort { - private static final int NUM_BITS = 8; - private static final int NUM_SEGMENTS = 4; - private static int getSegmentMasked(int num, int segment) { - int mask = ((1 << NUM_BITS) - 1) << (segment * NUM_BITS); - return (num & mask) >> (segment * NUM_BITS); - } + private static final int NUM_BITS = 8; + private static final int NUM_SEGMENTS = 4; - private static void radixSort(int[] arr, int[] sorted) { - // sort the N numbers by segments, starting from left-most segment - for (int i = 0; i < NUM_SEGMENTS; i++) { - int[] freqMap = new int[1 << NUM_BITS]; // at most this number of elements + /** + * Creates masking on the segment to obtain the value of the digit. + * + * @param num The number. + * @param segment The segment we are interested in. + * + * @return The value of the digit in the number at the given segment. + */ + private static int getSegmentMasked(int num, int segment) { + int mask = ((1 << NUM_BITS) - 1) << (segment * NUM_BITS); + return (num & mask) >> (segment * NUM_BITS); + } - // count each element - for (int num : arr) { - freqMap[getSegmentMasked(num, i)]++; - } - // get prefix sum - for (int j = 1; j < freqMap.length; j++) { - freqMap[j] += freqMap[j-1]; - } - // place each number in its correct sorted position up until the given segment - for (int k = arr.length-1; k >= 0; k--) { - int curr = arr[k]; - int id = getSegmentMasked(curr, i); - sorted[freqMap[id] - 1] = curr; - freqMap[id]--; - } - // we do a swap so that our results above for this segment is saved and passed as input to the next segment - int[] tmp = arr; - arr = sorted; - sorted = tmp; - } - sorted = arr; - } + /** + * Radix sorts a given input array and updates the output array in-place. + * + * @param arr original input array. + * @param sorted output array. + */ + private static void radixSort(int[] arr, int[] sorted) { + // sort the N numbers by segments, starting from left-most segment + for (int i = 0; i < NUM_SEGMENTS; i++) { + int[] freqMap = new int[1 << NUM_BITS]; // at most this number of elements - public static void radixSort(int[] arr) { - int[] sorted = new int[arr.length]; - radixSort(arr, sorted); - arr = sorted; // swap back lol + // count each element + for (int num : arr) { + freqMap[getSegmentMasked(num, i)]++; + } + // get prefix sum + for (int j = 1; j < freqMap.length; j++) { + freqMap[j] += freqMap[j - 1]; + } + // place each number in its correct sorted position up until the given segment + for (int k = arr.length - 1; k >= 0; k--) { + int curr = arr[k]; + int id = getSegmentMasked(curr, i); + sorted[freqMap[id] - 1] = curr; + freqMap[id]--; + } + // we do a swap so that our results above for this segment is + // saved and passed as input to the next segment + int[] tmp = arr; + arr = sorted; + sorted = tmp; } + sorted = arr; + } + + /** + * Calls radix sort inplace on a given array. + * + * @param arr The array to be sorted. + */ + public static void radixSort(int[] arr) { + int[] sorted = new int[arr.length]; + radixSort(arr, sorted); + arr = sorted; // swap back lol + } } diff --git a/src/test/java/algorithms/sorting/radixSort/RadixSortTest.java b/src/test/java/algorithms/sorting/radixSort/RadixSortTest.java index c80bfbc9..c64466ad 100644 --- a/src/test/java/algorithms/sorting/radixSort/RadixSortTest.java +++ b/src/test/java/algorithms/sorting/radixSort/RadixSortTest.java @@ -1,21 +1,21 @@ package algorithms.sorting.radixSort; import static org.junit.Assert.assertArrayEquals; -import org.junit.Test; + import java.util.Arrays; +import org.junit.Test; + public class RadixSortTest { @Test public void test_radixSort_shouldReturnSortedArray() { - int[] firstArray = - new int[] {2, 3, 4, 1, 2, 5, 6, 7, 10, 15, 20, 13, 15, 1, 2, + int[] firstArray = new int[] {2, 3, 4, 1, 2, 5, 6, 7, 10, 15, 20, 13, 15, 1, 2, 15, 12, 20, 21, 120, 11, 5, 7, 85, 30}; int[] firstResult = Arrays.copyOf(firstArray, firstArray.length); RadixSort.radixSort(firstResult); - int[] secondArray - = new int[] {9, 1, 2, 8, 7, 3, 4, 6, 5, 5, 9, 8, 7, 6, 5, 4, - 3, 2, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; - int[] secondResult =Arrays.copyOf(secondArray, secondArray.length); + int[] secondArray = new int[] {9, 1, 2, 8, 7, 3, 4, 6, 5, 5, 9, 8, 7, 6, 5, 4, + 3, 2, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + int[] secondResult = Arrays.copyOf(secondArray, secondArray.length); RadixSort.radixSort(secondResult); int[] thirdArray = new int[] {}; From 86825a61e775700caea68baad3af7869b37719b0 Mon Sep 17 00:00:00 2001 From: yeoshuheng Date: Thu, 28 Sep 2023 23:23:09 +0800 Subject: [PATCH 5/9] docs: updated readme to talk about number systems --- .../algorithms/sorting/radixSort/README.md | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/main/java/algorithms/sorting/radixSort/README.md b/src/main/java/algorithms/sorting/radixSort/README.md index 719f4de0..87829bac 100644 --- a/src/main/java/algorithms/sorting/radixSort/README.md +++ b/src/main/java/algorithms/sorting/radixSort/README.md @@ -17,24 +17,29 @@ This takes advantage of the concept of place value. ## Complexity Analysis **Time**: -Note that we will always need to iterate through 10 different queues to rebuild our original array, -and we iterate through all *w* positions, this results in: +Let *k* be the base of the number system being used. For integers in java, it is base 10 +(we have digits 0 to 9), in the case of base 2 (binary), we only have 2 digits (1, 0), +base 3, (0, 1, 2). Hence, we can see that the base of the number system determines the number of +different queues we need to iterate through to rebuild our original array, additionally, we also +need to iterate through all *w* positions (digits), this results in the following complexities: -- Worst case: O(w * (n + 10)) -- Average case: O(w * (n + 10)) -- Best case (sorted array): O(w * (n + 10)) +- Worst case: O(w * (n + k)) +- Average case: O(w * (n + k)) +- Best case (sorted array): O(w * (n + k)) -**Space**: O(n + 10) +**Space**: O(n + k) ## Notes - Radix sort's time complexity is dependent on the maximum number of digits in each element, hence it is ideal to use it on integers with a large range and with little digits. - This could mean that Radix Sort might end up performing worst on small sets of data if any one given element has a in-proportionate amount of digits. -- Counting sort is used as a sub-routine within the Radix Sort process. +- It is interesting to note that counting sort is used as a sub-routine within the +Radix Sort process. ### Common Misconception - While not immediately obvious, we can see that radix sort is a stable sorting algorithm as - they are enqueued in a manner where the first observed element will be at the head of the queue. + they are enqueued in a manner where the first observed element remains at the head of the queue. - While it is non-comparison based, not that total ordering of elements is still required - - except now this property is forced upon the algorithm in the manner of the queues. \ No newline at end of file + except now this property is forced upon the algorithm in the manner of how the queues +are structured. \ No newline at end of file From b6add13580d9c2e60fa8cefe63976b518e824ee9 Mon Sep 17 00:00:00 2001 From: yeoshuheng Date: Sat, 21 Oct 2023 20:03:15 +0800 Subject: [PATCH 6/9] docs: added comments on O(N) implementation trick --- src/main/java/algorithms/sorting/radixSort/RadixSort.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/main/java/algorithms/sorting/radixSort/RadixSort.java b/src/main/java/algorithms/sorting/radixSort/RadixSort.java index c5027d31..1260ab76 100644 --- a/src/main/java/algorithms/sorting/radixSort/RadixSort.java +++ b/src/main/java/algorithms/sorting/radixSort/RadixSort.java @@ -47,8 +47,11 @@ private static void radixSort(int[] arr, int[] sorted) { sorted[freqMap[id] - 1] = curr; freqMap[id]--; } - // we do a swap so that our results above for this segment is - // saved and passed as input to the next segment + // We do a swap so that our results above for this segment is + // saved and passed as input to the next segment. + // By doing this we no longer need to create a new array + // every time we shift to a new segment to sort. + // We can constantly reuse the array, allowing us to only use O(n) space. int[] tmp = arr; arr = sorted; sorted = tmp; From 3b3aacd637720d346756b00c0502ce1ec49e6663 Mon Sep 17 00:00:00 2001 From: yeoshuheng Date: Sat, 21 Oct 2023 20:22:59 +0800 Subject: [PATCH 7/9] docs: added comments on bit masking --- src/main/java/algorithms/sorting/radixSort/RadixSort.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/algorithms/sorting/radixSort/RadixSort.java b/src/main/java/algorithms/sorting/radixSort/RadixSort.java index 1260ab76..100cb73d 100644 --- a/src/main/java/algorithms/sorting/radixSort/RadixSort.java +++ b/src/main/java/algorithms/sorting/radixSort/RadixSort.java @@ -17,6 +17,7 @@ public class RadixSort { * @return The value of the digit in the number at the given segment. */ private static int getSegmentMasked(int num, int segment) { + // Bit masking here to extract each segment from the integer. int mask = ((1 << NUM_BITS) - 1) << (segment * NUM_BITS); return (num & mask) >> (segment * NUM_BITS); } From c2b1bb9d23fbea305b77fc9a30995610528f7a78 Mon Sep 17 00:00:00 2001 From: yeoshuheng Date: Sat, 21 Oct 2023 20:31:51 +0800 Subject: [PATCH 8/9] docs: updated readme --- .../algorithms/sorting/radixSort/README.md | 48 ++++++++++++------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/src/main/java/algorithms/sorting/radixSort/README.md b/src/main/java/algorithms/sorting/radixSort/README.md index 87829bac..c342d78d 100644 --- a/src/main/java/algorithms/sorting/radixSort/README.md +++ b/src/main/java/algorithms/sorting/radixSort/README.md @@ -1,27 +1,46 @@ # Radix Sort Radix Sort is a non-comparison based, stable sorting algorithm. -We first treat each element as a string with *w* digits (Padding elements that have -less than *w* digits). -From the least-significant digit to the most-significant digit, we constantly -split them into ten queues corresponding to the number range *[0, 9]*. We then move -through the queue and concatenate the elements back into a list at the next iteration. +Radix Sort continuously sorts based on the least-significant segment of a element +to the most-significant value of a element. -This takes advantage of the concept of place value. -(The value of a digit in a number relative to its position within the number) +The definition of a 'segment' is user defined and defers from implementation to implementation. +Within our implementation, we define each segment as a bit chunk. + +For example, if we aim to sort integers, we can sort each element +from the least to most significant digit, with the digits being our 'segments'. + +While Radix Sort is non-comparison based, +the that total ordering of elements is still required. +This is maintained in how the states are stored after sorting is conducted with respect to each digit. + +This total ordering is needed because once we assigned a element to a order based on a segment, +the order *cannot* change unless deemed by a segment with a higher significance. +Hence a stable sort is required to maintain the order as +the sorting is done with respect to each of the segments. + +Within our implementation, we take the binary representation of the elements and +partition it into 8-bit segments, a integer is represented in 32 bits, +this gives us 4 total segments to sort through. + +Note that the binary representation is weighted positional, +where each bit's value is dependent on its overall position +within the representation (the n-th bit from the right represents *2^n*), +hence we can actually increase / decrease the number segments we wish to conduct a split from. ![Radix Sort](https://miro.medium.com/v2/resize:fit:661/1*xFnpQ4UNK0TvyxiL8r1svg.png) +We place each element into a queue based on the number of possible segments that could be generated. +Suppose the values of our segments are in base-10, (limited to a value within range *[0, 9]*), +we get 10 queues. + *Source: Level Up Coding* ## Complexity Analysis **Time**: -Let *k* be the base of the number system being used. For integers in java, it is base 10 -(we have digits 0 to 9), in the case of base 2 (binary), we only have 2 digits (1, 0), -base 3, (0, 1, 2). Hence, we can see that the base of the number system determines the number of -different queues we need to iterate through to rebuild our original array, additionally, we also -need to iterate through all *w* positions (digits), this results in the following complexities: +Let *w* be the number of segments we sort through, *n* be the number of elements +and *k* be the number of queues. - Worst case: O(w * (n + k)) - Average case: O(w * (n + k)) @@ -39,7 +58,4 @@ Radix Sort process. ### Common Misconception - While not immediately obvious, we can see that radix sort is a stable sorting algorithm as - they are enqueued in a manner where the first observed element remains at the head of the queue. -- While it is non-comparison based, not that total ordering of elements is still required - - except now this property is forced upon the algorithm in the manner of how the queues -are structured. \ No newline at end of file + they are enqueued in a manner where the first observed element remains at the head of the queue. \ No newline at end of file From 587a40c6179a390ab35844f066f1d06771cb5570 Mon Sep 17 00:00:00 2001 From: yeoshuheng Date: Thu, 11 Jan 2024 16:09:40 +0800 Subject: [PATCH 9/9] docs: updated read me --- .../algorithms/sorting/radixSort/README.md | 50 ++++++++++--------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/src/main/java/algorithms/sorting/radixSort/README.md b/src/main/java/algorithms/sorting/radixSort/README.md index c342d78d..d0cc3b17 100644 --- a/src/main/java/algorithms/sorting/radixSort/README.md +++ b/src/main/java/algorithms/sorting/radixSort/README.md @@ -1,6 +1,8 @@ # Radix Sort -Radix Sort is a non-comparison based, stable sorting algorithm. +## Background + +Radix Sort is a non-comparison based, stable sorting algorithm with a counting sort subroutine. Radix Sort continuously sorts based on the least-significant segment of a element to the most-significant value of a element. @@ -11,15 +13,6 @@ Within our implementation, we define each segment as a bit chunk. For example, if we aim to sort integers, we can sort each element from the least to most significant digit, with the digits being our 'segments'. -While Radix Sort is non-comparison based, -the that total ordering of elements is still required. -This is maintained in how the states are stored after sorting is conducted with respect to each digit. - -This total ordering is needed because once we assigned a element to a order based on a segment, -the order *cannot* change unless deemed by a segment with a higher significance. -Hence a stable sort is required to maintain the order as -the sorting is done with respect to each of the segments. - Within our implementation, we take the binary representation of the elements and partition it into 8-bit segments, a integer is represented in 32 bits, this gives us 4 total segments to sort through. @@ -33,29 +26,40 @@ hence we can actually increase / decrease the number segments we wish to conduct We place each element into a queue based on the number of possible segments that could be generated. Suppose the values of our segments are in base-10, (limited to a value within range *[0, 9]*), -we get 10 queues. +we get 10 queues. We can also see that radix sort is stable since +they are enqueued in a manner where the first observed element remains at the head of the queue *Source: Level Up Coding* +### Implementation Invariant +At the start of the *i-th* segment we are sorting on, the array has already been sorted on the +previous *(i - 1)-th* segments. + +### Common Misconceptions + +While Radix Sort is non-comparison based, +the that total ordering of elements is still required. +This total ordering is needed because once we assigned a element to a order based on a segment, +the order *cannot* change unless deemed by a segment with a higher significance. +Hence, a stable sort is required to maintain the order as +the sorting is done with respect to each of the segments. + ## Complexity Analysis **Time**: -Let *w* be the number of segments we sort through, *n* be the number of elements -and *k* be the number of queues. +Let *b* be the length of a single element we are sorting and *r* is the amount of bit-string +we plan to break each element into. +(Essentially, *b/r* represents the number of segments we +sort on and hence the number of passes we do during our sort). -- Worst case: O(w * (n + k)) -- Average case: O(w * (n + k)) -- Best case (sorted array): O(w * (n + k)) +Note that we derive *(2^r + n)* from the counting sort subroutine, +since we have *2^r* represents the range since we have *r* bits. -**Space**: O(n + k) +We get a general time complexity of *O((b/r) * (2^r + n))* + +**Space**: *O(n + 2^r)* ## Notes - Radix sort's time complexity is dependent on the maximum number of digits in each element, hence it is ideal to use it on integers with a large range and with little digits. - This could mean that Radix Sort might end up performing worst on small sets of data if any one given element has a in-proportionate amount of digits. -- It is interesting to note that counting sort is used as a sub-routine within the -Radix Sort process. - -### Common Misconception -- While not immediately obvious, we can see that radix sort is a stable sorting algorithm as - they are enqueued in a manner where the first observed element remains at the head of the queue. \ No newline at end of file