Skip to content

Commit

Permalink
Numerical sorting now treats non-integer numbers as f64.
Browse files Browse the repository at this point in the history
To fix an issue where xsv would not sort non-integers properly, this
commit makes xsv able to consider non-integers as f64. For integers, we
still compare using i64 v i64, while we treat any other number as f64
vs. f64 comparsion.
  • Loading branch information
Anton Hägerstrand authored and BurntSushi committed Sep 21, 2017
1 parent dbddb10 commit 4b308ad
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 5 deletions.
33 changes: 30 additions & 3 deletions src/cmd/sort.rs
Expand Up @@ -6,6 +6,8 @@ use select::SelectColumns;
use util;
use std::str::from_utf8;

use self::Number::{Float, Int};

static USAGE: &'static str = "
Sorts CSV data lexicographically.
Expand Down Expand Up @@ -116,17 +118,42 @@ pub fn iter_cmp_num<'a, L, R>(mut a: L, mut b: R) -> cmp::Ordering
(None, None) => return cmp::Ordering::Equal,
(None, _ ) => return cmp::Ordering::Less,
(_ , None) => return cmp::Ordering::Greater,
(Some(x), Some(y)) => match x.cmp(&y) {
(Some(x), Some(y)) => match compare_num(x, y) {
cmp::Ordering::Equal => (),
non_eq => return non_eq,
},
}
}
}

fn next_num<'a, X>(xs: &mut X) -> Option<i64>
#[derive(Clone, Copy, PartialEq)]
enum Number {
Int(i64),
Float(f64),
}

fn compare_num(n1: Number, n2: Number) -> cmp::Ordering{
match (n1, n2) {
(Int(i1), Int(i2)) => i1.cmp(&i2),
(Int(i1), Float(f2)) => compare_float(i1 as f64, f2),
(Float(f1), Int(i2)) => compare_float(f1, i2 as f64),
(Float(f1), Float(f2)) => compare_float(f1, f2),
}
}

fn compare_float(f1: f64, f2: f64) -> cmp::Ordering {
f1.partial_cmp(&f2).unwrap_or(cmp::Ordering::Equal)
}



fn next_num<'a, X>(xs: &mut X) -> Option<Number>
where X: Iterator<Item=&'a [u8]> {
xs.next()
.and_then(|bytes| from_utf8(bytes).ok())
.and_then(|s| s.parse::<i64>().ok())
.and_then(|s| {
if let Ok(i) = s.parse::<i64>() { Some(Number::Int(i)) }
else if let Ok(f) = s.parse::<f64>() { Some(Number::Float(f)) }
else { None }
})
}
35 changes: 33 additions & 2 deletions tests/test_sort.rs
Expand Up @@ -59,8 +59,9 @@ fn sort_numeric() {
wrk.create("in.csv", vec![
svec!["N", "S"],
svec!["10", "a"],
svec!["LETTER", "b"],
svec!["2", "c"],
svec!["1", "b"],
svec!["1", "d"],
]);

let mut cmd = wrk.command("sort");
Expand All @@ -69,13 +70,43 @@ fn sort_numeric() {
let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
let expected = vec![
svec!["N", "S"],
svec!["1", "b"],
//Non-numerics should be put first
svec!["LETTER", "b"],
svec!["1", "d"],
svec!["2", "c"],
svec!["10", "a"],
];
assert_eq!(got, expected);
}

#[test]
fn sort_numeric_non_natural() {
let wrk = Workdir::new("sort_numeric_non_natural");
wrk.create("in.csv", vec![
svec!["N", "S"],
svec!["8.33", "a"],
svec!["5", "b"],
svec!["LETTER", "c"],
svec!["7.4", "d"],
svec!["3.33", "e"],
]);

let mut cmd = wrk.command("sort");
cmd.arg("-N").arg("in.csv");

let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
let expected = vec![
svec!["N", "S"],
//Non-numerics should be put first
svec!["LETTER", "c"],
svec!["3.33", "e"],
svec!["5", "b"],
svec!["7.4", "d"],
svec!["8.33", "a"],
];
assert_eq!(got, expected);
}

#[test]
fn sort_reverse() {
let wrk = Workdir::new("sort_reverse");
Expand Down

0 comments on commit 4b308ad

Please sign in to comment.