Skip to content

Commit 32e01f7

Browse files
committed
str: impl Three-way quicksort
1 parent 92f9dbf commit 32e01f7

File tree

13 files changed

+295
-128
lines changed

13 files changed

+295
-128
lines changed

Makefile

+3
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ bench:
1919
eg_quadratic:
2020
@$(CARGO) run --example quadratic
2121

22+
doc:
23+
@$(CARGO) doc --open
24+
2225
stats:
2326
@echo "codes: "
2427
@cloc . --exclude-dir=target

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ never worry about system crash.
8888

8989
- Character indexed arrays (Alphabet, count.rs)
9090
- Radix sort (LSD, MSD)
91+
- Three-way quicksort (Quick3String, Quick3Way)
9192

9293
### Running
9394

benches/sort.rs

+10
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ extern crate test;
33

44
use algo::sort;
55
use algo::sort::util;
6+
use algo::strings::Quick3Way;
67
use test::Bencher;
78

89
#[bench]
@@ -154,6 +155,15 @@ fn large_quick(b: &mut Bencher) {
154155
});
155156
}
156157

158+
#[bench]
159+
fn large_quick_3way(b: &mut Bencher) {
160+
let data = util::random_data(util::DATA_LEN);
161+
b.iter(|| {
162+
let mut numbs = data.clone();
163+
Quick3Way::sort(&mut numbs);
164+
});
165+
}
166+
157167
#[bench]
158168
fn large_sorted_asc_quick(b: &mut Bencher) {
159169
let data = util::sorted_data_asc(util::DATA_LEN);

benches/strings.rs

+16-5
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
#![feature(test)]
22
extern crate test;
33

4-
use algo::strings::{util, LSD, MSD};
4+
use algo::strings::{util, Quick3String, LSD, MSD};
55
use test::Bencher;
66

77
const WORDS3: &'static str = include_str!("../res/strings/words3.txt");
8+
const SHELLS: &'static str = include_str!("../res/strings/shells.txt");
89

910
#[allow(non_snake_case)]
1011
#[bench]
@@ -36,10 +37,19 @@ fn sort_str_LSD_radix(b: &mut Bencher) {
3637
});
3738
}
3839

40+
#[allow(non_snake_case)]
41+
#[bench]
42+
fn sort_i32_LSD_radix(b: &mut Bencher) {
43+
let mut nums: Vec<i32> = (0..1000).rev().collect();
44+
b.iter(|| {
45+
LSD::sort_i32(&mut nums);
46+
});
47+
}
48+
3949
#[allow(non_snake_case)]
4050
#[bench]
4151
fn sort_str_MSD_radix(b: &mut Bencher) {
42-
let i = WORDS3;
52+
let i = SHELLS;
4353
let mut words = extract_words(i);
4454
b.iter(|| {
4555
MSD::sort(&mut words);
@@ -48,10 +58,11 @@ fn sort_str_MSD_radix(b: &mut Bencher) {
4858

4959
#[allow(non_snake_case)]
5060
#[bench]
51-
fn sort_i32_LSD_radix(b: &mut Bencher) {
52-
let mut nums: Vec<i32> = (0..1000).rev().collect();
61+
fn sort_str_quick3strings(b: &mut Bencher) {
62+
let i = SHELLS;
63+
let mut words = extract_words(i);
5364
b.iter(|| {
54-
LSD::sort_i32(&mut nums);
65+
Quick3String::sort(&mut words);
5566
});
5667
}
5768

note.txt

+2
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ todo
1616

1717
4. 画图展示Dijkstra’s algorithm的动态构建过程 (p657)
1818

19+
5. fix mod/struct/fn doc error to make doc work well, add more docs
20+
1921
### 后记
2022

2123
项目缘起

src/graph/mod.rs

+11-11
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
//!
22
//! ## 图的典型应用
33
//!
4-
//! | 应用 | 节点 | 连接
5-
//! |-----------|--------------|--------------
6-
//! | 地图 | 十字路口 | 公路
7-
//! | 网络内容 | 网页 | 超链接
8-
//! | 电路 | 元器件 | 导线
9-
//! | 任务调度 | 任务 | 限制条件
10-
//! | 商业交易 | 客户 | 交易
11-
//! | 配对 | 学生 | 申请
12-
//! | 计算机网络 | 网站 | 物理连接
13-
//! | 软件 | 方法 | 调用关系
14-
//! | 社交网络 | 人 | 友谊关系
4+
//! | 应用 | 节点 | 连接 |
5+
//! |-----------|--------------|-----------------|
6+
//! | 地图 | 十字路口 | 公路 |
7+
//! | 网络内容 | 网页 | 超链接 |
8+
//! | 电路 | 元器件 | 导线 |
9+
//! | 任务调度 | 任务 | 限制条件 |
10+
//! | 商业交易 | 客户 | 交易 |
11+
//! | 配对 | 学生 | 申请 |
12+
//! | 计算机网络 | 网站 | 物理连接 |
13+
//! | 软件 | 方法 | 调用关系 |
14+
//! | 社交网络 | 人 | 友谊关系 |
1515
1616
#[macro_use]
1717
pub mod util;

src/sort/insert.rs

+35
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
//!
33
//! 基本思想是将一个value插入到有序表中
44
5+
use std::cmp::Ordering;
6+
57
pub fn sort<T>(a: &mut [T])
68
where
79
T: Ord,
@@ -17,3 +19,36 @@ where
1719
}
1820
}
1921
}
22+
23+
/// insertion sort a[lo..=hi], starting at d-th character
24+
pub fn sort_dth<T>(a: &mut [T], lo: usize, hi: usize, d: usize)
25+
where
26+
T: AsRef<str> + Copy,
27+
{
28+
for i in lo..=hi {
29+
let mut j = i;
30+
while j > lo && is_less(a[j].as_ref(), a[j - 1].as_ref(), d) {
31+
a.swap(j, j - 1);
32+
j -= 1;
33+
}
34+
}
35+
}
36+
37+
/// is v less than w, starting at character d
38+
fn is_less(v: &str, w: &str, d: usize) -> bool {
39+
for (a, b) in v.bytes().zip(w.bytes()).skip(d) {
40+
match a.cmp(&b) {
41+
Ordering::Less => return true,
42+
Ordering::Equal => (),
43+
Ordering::Greater => return false,
44+
}
45+
}
46+
v.as_bytes().len() < w.as_bytes().len()
47+
}
48+
49+
#[test]
50+
fn t_less() {
51+
assert!(is_less("aaa", "aaaa", 0)); // len less
52+
assert!(is_less("aaa", "aaaa", 1)); // len less
53+
assert!(is_less("aaa", "abaa", 1)); // 'a' < 'b'
54+
}

src/strings/mod.rs

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
pub mod alphabet;
22
mod count;
3-
mod lsd;
4-
mod msd;
5-
mod quick3;
3+
pub mod lsd;
4+
pub mod msd;
5+
pub mod quick3;
66
pub mod util;
77

88
pub use alphabet::Alphabet;
99
pub use count::Count;
1010
pub use lsd::LSD;
1111
pub use msd::MSD;
12+
pub use quick3::{Quick3String, Quick3Way};

src/strings/msd.rs

+29-52
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
1-
//! Performance
1+
//! Sort an array of strings using MSD radix sort.
22
//!
3-
//! The running time of MSD string sort depends on the data.
3+
//! # Performance
44
//!
5-
//! For random inputs, MSD string sort examines just enough
5+
//! - The running time of MSD string sort depends on the data,
6+
//! key length does not play a role. Indeed, the random string
7+
//! model allows key length to approach infinity.
8+
//! - For random inputs, MSD string sort examines just enough
69
//! characters to distinguish among the keys, and the running
710
//! time is sublinear in the number of characters in the data
811
//! (it examines a small fraction of the input characters).
@@ -50,14 +53,31 @@
5053
//! will not be many duplicates and the random model will apply;
5154
//! for a week’s worth of data on a local road, there will be numerous
5255
//! duplicates and performance will be closer to the worst case.
56+
//!
57+
//! The main challenge in getting maximum efficiency from MSD string
58+
//! sort on keys that are long strings is to deal with lack of randomness
59+
//! in the data. Typically, keys may have long stretches of equal data,
60+
//! or parts of them might fall in only a narrow range. For example,
61+
//! an information-processing application for student data might have
62+
//! keys that include graduation year (4 bytes, but one of four different
63+
//! values), state names (perhaps 10 bytes, but one of 50 different values),
64+
//! and gender (1 byte with one of two given values), as well as a person’s
65+
//! name (more similar to random strings, but probably not short, with
66+
//! nonuniform letter distributions, and with trailing blanks in a fixed-length
67+
//! field). Restrictions like these lead to large numbers of empty subarrays
68+
//! during the MSD string sort.
5369
5470
#![allow(clippy::many_single_char_names)]
55-
use std::cmp::Ordering;
71+
use crate::sort;
72+
use crate::strings::util;
5673
use std::marker::PhantomData;
5774

5875
const R: usize = 256; // extended ASCII alphabet size
5976
const CUTOFF: usize = 15; // cutoff to insertion sort
6077

78+
/// The MSD provides static methods for sorting an
79+
/// array of extended ASCII strings using MSD radix
80+
/// sort.
6181
pub struct MSD<T> {
6282
_marker: PhantomData<T>,
6383
}
@@ -66,6 +86,7 @@ impl<T> MSD<T>
6686
where
6787
T: AsRef<str> + Copy,
6888
{
89+
/// Rearranges the array of extended ASCII strings in ascending order.
6990
pub fn sort(a: &mut [T]) {
7091
let n = a.len();
7192
if n > 0 {
@@ -74,17 +95,18 @@ where
7495
}
7596
}
7697

98+
/// sort from a[lo] to a[hi], starting at the d-th character
7799
fn do_sort(a: &mut [T], lo: usize, hi: usize, d: usize, aux: &mut [T]) {
78100
// cutoff to insertion sort for small subarrays
79101
if hi <= lo + CUTOFF {
80-
Self::insertion(a, lo, hi, d);
102+
sort::insert::sort_dth(a, lo, hi, d);
81103
return;
82104
}
83105

84106
// compute frequency counts
85107
let mut count = [0; R + 2];
86108
for it in a.iter().take(hi + 1).skip(lo) {
87-
let c = char_at(it.as_ref(), d);
109+
let c = util::char_at(it.as_ref(), d);
88110
count[(c + 2) as usize] += 1;
89111
}
90112

@@ -95,7 +117,7 @@ where
95117

96118
// distribute
97119
for it in a.iter().take(hi + 1).skip(lo) {
98-
let c = char_at(it.as_ref(), d);
120+
let c = util::char_at(it.as_ref(), d);
99121
aux[count[(c + 1) as usize]] = *it;
100122
count[(c + 1) as usize] += 1;
101123
}
@@ -114,49 +136,4 @@ where
114136
}
115137
}
116138
}
117-
118-
fn insertion(a: &mut [T], lo: usize, hi: usize, d: usize) {
119-
for i in lo..=hi {
120-
let mut j = i;
121-
while j > lo && less(a[j].as_ref(), a[j - 1].as_ref(), d) {
122-
a.swap(j, j - 1);
123-
j -= 1;
124-
}
125-
}
126-
}
127-
}
128-
129-
fn less(v: &str, w: &str, d: usize) -> bool {
130-
for (a, b) in v.bytes().zip(w.bytes()).skip(d) {
131-
match a.cmp(&b) {
132-
Ordering::Less => return true,
133-
Ordering::Equal => (),
134-
Ordering::Greater => return false,
135-
}
136-
}
137-
v.as_bytes().len() < w.as_bytes().len()
138-
}
139-
140-
fn char_at(s: &str, d: usize) -> i32 {
141-
let len = s.as_bytes().len();
142-
if d >= len {
143-
-1
144-
} else {
145-
s.as_bytes()[d] as i32
146-
}
147-
}
148-
149-
#[test]
150-
fn t_less() {
151-
assert!(less("aaa", "aaaa", 0)); // len less
152-
assert!(less("aaa", "aaaa", 1)); // len less
153-
assert!(less("aaa", "abaa", 1)); // 'a' < 'b'
154-
}
155-
156-
#[test]
157-
fn t_char_at() {
158-
assert_eq!(b'a' as i32, char_at("abc", 0));
159-
assert_eq!(b'b' as i32, char_at("abc", 1));
160-
assert_eq!(b'c' as i32, char_at("abc", 2));
161-
assert_eq!(-1, char_at("abc", 3));
162139
}

0 commit comments

Comments
 (0)