Skip to content

Commit f86a275

Browse files
LukeMathWalkerjturner314
authored andcommitted
Add extension traits for percentile and sorting
1 parent 8f823dc commit f86a275

File tree

6 files changed

+443
-0
lines changed

6 files changed

+443
-0
lines changed

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ authors = ["Jim Turner <rust@turner.link>"]
66
[dependencies]
77
ndarray = "0.12"
88
noisy_float = "0.1"
9+
num-traits = "0.2"
10+
rand = "0.5"
911

1012
[dev-dependencies]
1113
quickcheck = "0.7"

src/lib.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,19 @@
11
#[macro_use(s)]
22
extern crate ndarray;
33
extern crate noisy_float;
4+
extern crate num_traits;
5+
extern crate rand;
46

57
#[cfg(test)]
68
#[macro_use(quickcheck)]
79
extern crate quickcheck;
810

911
pub use maybe_nan::MaybeNan;
1012
pub use min_max::MinMaxExt;
13+
pub use percentile::{interpolate, PercentileExt};
14+
pub use sort::Sort1dExt;
1115

1216
mod maybe_nan;
1317
mod min_max;
18+
mod percentile;
19+
mod sort;

src/percentile.rs

Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
use interpolate::Interpolate;
2+
use ndarray::prelude::*;
3+
use ndarray::{Data, DataMut, RemoveAxis};
4+
use sort::Sort1dExt;
5+
6+
pub mod interpolate {
7+
use ndarray::prelude::*;
8+
use num_traits::FromPrimitive;
9+
use std::ops::{Add, Div, Mul, Sub};
10+
11+
/// Used to provide an interpolation strategy to [`percentile_axis_mut`].
12+
///
13+
/// [`percentile_axis_mut`]: ../trait.PercentileExt.html#tymethod.percentile_axis_mut
14+
pub trait Interpolate<T> {
15+
fn float_percentile_index(q: f64, len: usize) -> f64 {
16+
((len - 1) as f64) * q
17+
}
18+
fn lower_index(q: f64, len: usize) -> usize {
19+
Self::float_percentile_index(q, len).floor() as usize
20+
}
21+
fn upper_index(q: f64, len: usize) -> usize {
22+
Self::float_percentile_index(q, len).ceil() as usize
23+
}
24+
fn float_percentile_index_fraction(q: f64, len: usize) -> f64 {
25+
Self::float_percentile_index(q, len) - (Self::lower_index(q, len) as f64)
26+
}
27+
fn needs_lower(q: f64, len: usize) -> bool;
28+
fn needs_upper(q: f64, len: usize) -> bool;
29+
fn interpolate<D>(
30+
lower: Option<Array<T, D>>,
31+
upper: Option<Array<T, D>>,
32+
q: f64,
33+
len: usize,
34+
) -> Array<T, D>
35+
where
36+
D: Dimension;
37+
}
38+
39+
pub struct Upper;
40+
pub struct Lower;
41+
pub struct Nearest;
42+
pub struct Midpoint;
43+
pub struct Linear;
44+
45+
impl<T> Interpolate<T> for Upper {
46+
fn needs_lower(_q: f64, _len: usize) -> bool {
47+
false
48+
}
49+
fn needs_upper(_q: f64, _len: usize) -> bool {
50+
true
51+
}
52+
fn interpolate<D>(
53+
_lower: Option<Array<T, D>>,
54+
upper: Option<Array<T, D>>,
55+
_q: f64,
56+
_len: usize,
57+
) -> Array<T, D> {
58+
upper.unwrap()
59+
}
60+
}
61+
62+
impl<T> Interpolate<T> for Lower {
63+
fn needs_lower(_q: f64, _len: usize) -> bool {
64+
true
65+
}
66+
fn needs_upper(_q: f64, _len: usize) -> bool {
67+
false
68+
}
69+
fn interpolate<D>(
70+
lower: Option<Array<T, D>>,
71+
_upper: Option<Array<T, D>>,
72+
_q: f64,
73+
_len: usize,
74+
) -> Array<T, D> {
75+
lower.unwrap()
76+
}
77+
}
78+
79+
impl<T> Interpolate<T> for Nearest {
80+
fn needs_lower(q: f64, len: usize) -> bool {
81+
let lower = <Self as Interpolate<T>>::lower_index(q, len);
82+
((lower as f64) - <Self as Interpolate<T>>::float_percentile_index(q, len)) <= 0.
83+
}
84+
fn needs_upper(q: f64, len: usize) -> bool {
85+
!<Self as Interpolate<T>>::needs_lower(q, len)
86+
}
87+
fn interpolate<D>(
88+
lower: Option<Array<T, D>>,
89+
upper: Option<Array<T, D>>,
90+
q: f64,
91+
len: usize,
92+
) -> Array<T, D> {
93+
if <Self as Interpolate<T>>::needs_lower(q, len) {
94+
lower.unwrap()
95+
} else {
96+
upper.unwrap()
97+
}
98+
}
99+
}
100+
101+
impl<T> Interpolate<T> for Midpoint
102+
where
103+
T: Add<T, Output = T> + Div<T, Output = T> + Clone + FromPrimitive,
104+
{
105+
fn needs_lower(_q: f64, _len: usize) -> bool {
106+
true
107+
}
108+
fn needs_upper(_q: f64, _len: usize) -> bool {
109+
true
110+
}
111+
fn interpolate<D>(
112+
lower: Option<Array<T, D>>,
113+
upper: Option<Array<T, D>>,
114+
_q: f64,
115+
_len: usize,
116+
) -> Array<T, D>
117+
where
118+
D: Dimension,
119+
{
120+
let denom = T::from_u8(2).unwrap();
121+
(lower.unwrap() + upper.unwrap()).mapv_into(|x| x / denom.clone())
122+
}
123+
}
124+
125+
impl<T> Interpolate<T> for Linear
126+
where
127+
T: Add<T, Output = T> + Sub<T, Output = T> + Mul<T, Output = T> + Clone + FromPrimitive,
128+
{
129+
fn needs_lower(_q: f64, _len: usize) -> bool {
130+
true
131+
}
132+
fn needs_upper(_q: f64, _len: usize) -> bool {
133+
true
134+
}
135+
fn interpolate<D>(
136+
lower: Option<Array<T, D>>,
137+
upper: Option<Array<T, D>>,
138+
q: f64,
139+
len: usize,
140+
) -> Array<T, D>
141+
where
142+
D: Dimension,
143+
{
144+
let fraction = T::from_f64(<Self as Interpolate<T>>::float_percentile_index_fraction(
145+
q, len,
146+
)).unwrap();
147+
let a = lower.unwrap().mapv_into(|x| x * fraction.clone());
148+
let b = upper
149+
.unwrap()
150+
.mapv_into(|x| x * (T::from_u8(1).unwrap() - fraction.clone()));
151+
a + b
152+
}
153+
}
154+
}
155+
156+
pub trait PercentileExt<A, S, D>
157+
where
158+
S: Data<Elem = A>,
159+
D: Dimension,
160+
{
161+
/// Return the qth percentile of the data along the specified axis.
162+
///
163+
/// `q` needs to be a float between 0 and 1, bounds included.
164+
/// The qth percentile for a 1-dimensional lane of length `N` is defined
165+
/// as the element that would be indexed as `(N-1)q` if the lane were to be sorted
166+
/// in increasing order.
167+
/// If `(N-1)q` is not an integer the desired percentile lies between
168+
/// two data points: we return the lower, nearest, higher or interpolated
169+
/// value depending on the type `Interpolate` bound `I`.
170+
///
171+
/// Some examples:
172+
/// - `q=0.` returns the minimum along each 1-dimensional lane;
173+
/// - `q=0.5` returns the median along each 1-dimensional lane;
174+
/// - `q=1.` returns the maximum along each 1-dimensional lane.
175+
/// (`q=0` and `q=1` are considered improper percentiles)
176+
///
177+
/// The array is shuffled **in place** along each 1-dimensional lane in
178+
/// order to produce the required percentile without allocating a copy
179+
/// of the original array. Each 1-dimensional lane is shuffled independently
180+
/// from the others.
181+
/// No assumptions should be made on the ordering of the array elements
182+
/// after this computation.
183+
///
184+
/// Complexity ([quickselect](https://en.wikipedia.org/wiki/Quickselect)):
185+
/// - average case: O(`m`);
186+
/// - worst case: O(`m`^2);
187+
/// where `m` is the number of elements in the array.
188+
///
189+
/// **Panics** if `axis` is out of bounds or if `q` is not between
190+
/// `0.` and `1.` (inclusive).
191+
fn percentile_axis_mut<I>(&mut self, axis: Axis, q: f64) -> Array<A, D::Smaller>
192+
where
193+
D: RemoveAxis,
194+
A: Ord + Clone,
195+
S: DataMut,
196+
I: Interpolate<A>;
197+
}
198+
199+
impl<A, S, D> PercentileExt<A, S, D> for ArrayBase<S, D>
200+
where
201+
S: Data<Elem = A>,
202+
D: Dimension,
203+
{
204+
fn percentile_axis_mut<I>(&mut self, axis: Axis, q: f64) -> Array<A, D::Smaller>
205+
where
206+
D: RemoveAxis,
207+
A: Ord + Clone,
208+
S: DataMut,
209+
I: Interpolate<A>,
210+
{
211+
assert!((0. <= q) && (q <= 1.));
212+
let mut lower = None;
213+
let mut upper = None;
214+
let axis_len = self.len_of(axis);
215+
if I::needs_lower(q, axis_len) {
216+
let lower_index = I::lower_index(q, axis_len);
217+
lower = Some(self.map_axis_mut(axis, |mut x| x.sorted_get_mut(lower_index)));
218+
if I::needs_upper(q, axis_len) {
219+
let upper_index = I::upper_index(q, axis_len);
220+
let relative_upper_index = upper_index - lower_index;
221+
upper = Some(self.map_axis_mut(axis, |mut x| {
222+
x.slice_mut(s![lower_index..])
223+
.sorted_get_mut(relative_upper_index)
224+
}));
225+
};
226+
} else {
227+
upper = Some(
228+
self.map_axis_mut(axis, |mut x| x.sorted_get_mut(I::upper_index(q, axis_len))),
229+
);
230+
};
231+
I::interpolate(lower, upper, q, axis_len)
232+
}
233+
}

src/sort.rs

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
use ndarray::prelude::*;
2+
use ndarray::{Data, DataMut};
3+
use rand::prelude::*;
4+
use rand::thread_rng;
5+
6+
pub trait Sort1dExt<A, S>
7+
where
8+
S: Data<Elem = A>,
9+
{
10+
/// Return the element that would occupy the `i`-th position if
11+
/// the array were sorted in increasing order.
12+
///
13+
/// The array is shuffled **in place** to retrieve the desired element:
14+
/// no copy of the array is allocated.
15+
/// After the shuffling, all elements with an index smaller than `i`
16+
/// are smaller than the desired element, while all elements with
17+
/// an index greater or equal than `i` are greater than or equal
18+
/// to the desired element.
19+
///
20+
/// No other assumptions should be made on the ordering of the
21+
/// elements after this computation.
22+
///
23+
/// Complexity ([quickselect](https://en.wikipedia.org/wiki/Quickselect)):
24+
/// - average case: O(`n`);
25+
/// - worst case: O(`n`^2);
26+
/// where n is the number of elements in the array.
27+
///
28+
/// **Panics** if `i` is greater than or equal to `n`.
29+
fn sorted_get_mut(&mut self, i: usize) -> A
30+
where
31+
A: Ord + Clone,
32+
S: DataMut;
33+
34+
/// Return the index of `self[partition_index]` if `self` were to be sorted
35+
/// in increasing order.
36+
///
37+
/// `self` elements are rearranged in such a way that `self[partition_index]`
38+
/// is in the position it would be in an array sorted in increasing order.
39+
/// All elements smaller than `self[partition_index]` are moved to its
40+
/// left and all elements equal or greater than `self[partition_index]`
41+
/// are moved to its right.
42+
/// The ordering of the elements in the two partitions is undefined.
43+
///
44+
/// `self` is shuffled **in place** to operate the desired partition:
45+
/// no copy of the array is allocated.
46+
///
47+
/// The method uses Hoare's partition algorithm.
48+
/// Complexity: O(`n`), where `n` is the number of elements in the array.
49+
/// Average number of element swaps: n/6 - 1/3 (see
50+
/// [link](https://cs.stackexchange.com/questions/11458/quicksort-partitioning-hoare-vs-lomuto/11550))
51+
///
52+
/// **Panics** if `partition_index` is greater than or equal to `n`.
53+
fn partition_mut(&mut self, pivot_index: usize) -> usize
54+
where
55+
A: Ord + Clone,
56+
S: DataMut;
57+
}
58+
59+
impl<A, S> Sort1dExt<A, S> for ArrayBase<S, Ix1>
60+
where
61+
S: Data<Elem = A>,
62+
{
63+
fn sorted_get_mut(&mut self, i: usize) -> A
64+
where
65+
A: Ord + Clone,
66+
S: DataMut,
67+
{
68+
let n = self.len();
69+
if n == 1 {
70+
self[0].clone()
71+
} else {
72+
let mut rng = thread_rng();
73+
let pivot_index = rng.gen_range(0, n);
74+
let partition_index = self.partition_mut(pivot_index);
75+
if i < partition_index {
76+
self.slice_mut(s![..partition_index]).sorted_get_mut(i)
77+
} else if i == partition_index {
78+
self[i].clone()
79+
} else {
80+
self.slice_mut(s![partition_index + 1..])
81+
.sorted_get_mut(i - (partition_index + 1))
82+
}
83+
}
84+
}
85+
86+
fn partition_mut(&mut self, pivot_index: usize) -> usize
87+
where
88+
A: Ord + Clone,
89+
S: DataMut,
90+
{
91+
let pivot_value = self[pivot_index].clone();
92+
self.swap(pivot_index, 0);
93+
let n = self.len();
94+
let mut i = 1;
95+
let mut j = n - 1;
96+
loop {
97+
loop {
98+
if i > j {
99+
break;
100+
}
101+
if self[i] >= pivot_value {
102+
break;
103+
}
104+
i += 1;
105+
}
106+
while pivot_value <= self[j] {
107+
if j == 1 {
108+
break;
109+
}
110+
j -= 1;
111+
}
112+
if i >= j {
113+
break;
114+
} else {
115+
self.swap(i, j);
116+
i += 1;
117+
j -= 1;
118+
}
119+
}
120+
self.swap(0, i - 1);
121+
i - 1
122+
}
123+
}

0 commit comments

Comments
 (0)