-
Notifications
You must be signed in to change notification settings - Fork 5
/
test.rs
150 lines (143 loc) · 4.14 KB
/
test.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
use super::*;
use crate::util::generate_keys;
/// Construct the MPHF and test all keys are mapped to unique indices.
#[test]
fn construct() {
for n in [
2,
10,
100,
1000,
10_000,
100_000,
1_000_000,
10_000_000,
100_000_000,
] {
let keys = generate_keys(n);
let ptr_hash = FastPtrHash::new(&keys, Default::default());
let mut done = bitvec![0; n];
for key in keys {
let idx = ptr_hash.index_minimal(&key);
assert!(!done[idx]);
done.set(idx, true);
}
}
}
#[test]
fn index_stream() {
for n in [2, 10, 100, 1000, 10_000, 100_000, 1_000_000] {
let keys = generate_keys(n);
let ptr_hash = FastPtrHash::new(&keys, Default::default());
let sum = ptr_hash.index_stream::<32, true>(&keys).sum::<usize>();
assert_eq!(sum, (n * (n - 1)) / 2);
}
}
#[test]
fn new_par_iter() {
let n = 10_000_000;
let keys = generate_keys(n);
FastPtrHash::new_from_par_iter(n, keys.par_iter(), Default::default());
}
#[test]
fn in_memory_sharding() {
let n = 1 << 29;
let range = 0..n as u64;
let keys = range.clone().into_par_iter();
let ptr_hash = FastPtrHash::new_from_par_iter(
n,
keys.clone(),
PtrHashParams {
keys_per_shard: 1 << 27,
shard_to_disk: false,
..Default::default()
},
);
eprintln!("Checking duplicates...");
let mut done = bitvec![0; n];
for key in range {
let idx = ptr_hash.index_minimal(&key);
assert!(!done[idx]);
done.set(idx, true);
}
}
#[test]
fn on_disk_sharding() {
let n = 1 << 29;
let range = 0..n as u64;
let keys = range.clone().into_par_iter();
let ptr_hash = FastPtrHash::new_from_par_iter(
n,
keys.clone(),
PtrHashParams {
keys_per_shard: 1 << 27,
shard_to_disk: true,
..Default::default()
},
);
eprintln!("Checking duplicates...");
let mut done = bitvec![0; n];
for key in range {
let idx = ptr_hash.index_minimal(&key);
assert!(!done[idx]);
done.set(idx, true);
}
}
/// Test that sharded construction and queries work with more than 2^32 keys.
#[test]
#[ignore = "very slow"]
fn many_keys_memory() {
let n = 1 << 33;
let n_query = 1 << 27;
let range = 0..n as u64;
let keys = range.clone().into_par_iter();
let ptr_hash = FastPtrHash::new_from_par_iter(
n,
keys.clone(),
PtrHashParams {
keys_per_shard: 1 << 30,
shard_to_disk: false,
..Default::default()
},
);
// Since running all queries is super slow, we only check a subset of them.
// Although this doesn't completely check that there are no duplicate
// mappings, by the birthday paradox we can be quite sure there are none
// since we check way more than sqrt(n) of them.
eprintln!("Checking duplicates...");
let mut done = bitvec![0; n];
for key in 0..n_query {
let idx = ptr_hash.index_minimal(&key);
assert!(!done[idx]);
done.set(idx, true);
}
}
/// Test that sharded construction and queries work with more than 2^32 keys.
#[test]
#[ignore = "very slow; writes 64GB to disk"]
fn many_keys_disk() {
let n = 1 << 33;
let n_query = 1 << 27;
let range = 0..n as u64;
let keys = range.clone().into_par_iter();
let ptr_hash = FastPtrHash::new_from_par_iter(
n,
keys.clone(),
PtrHashParams {
keys_per_shard: 1 << 30,
shard_to_disk: true,
..Default::default()
},
);
// Since running all queries is super slow, we only check a subset of them.
// Although this doesn't completely check that there are no duplicate
// mappings, by the birthday paradox we can be quite sure there are none
// since we check way more than sqrt(n) of them.
eprintln!("Checking duplicates...");
let mut done = bitvec![0; n];
for key in 0..n_query {
let idx = ptr_hash.index_minimal(&key);
assert!(!done[idx]);
done.set(idx, true);
}
}