Permalink
Fetching contributors…
Cannot retrieve contributors at this time
executable file 93 lines (83 sloc) 2.43 KB
#!/bin/sh
# This script does some very basic benchmarks with 'xsv' on a city population
# data set (which is a strict subset of the `worldcitiespop` data set). If it
# doesn't exist on your system, it will be downloaded to /tmp for you.
#
# These aren't meant to be overly rigorous, but they should be enough to catch
# significant regressions.
#
# Make sure you're using an `xsv` generated by `cargo build --release`.
set -e
pat="$1"
data=/tmp/worldcitiespop_mil.csv
data_idx=/tmp/worldcitiespop_mil.csv.idx
if [ ! -r "$data" ]; then
curl -sS http://burntsushi.net/stuff/worldcitiespop_mil.csv > "$data"
fi
data_size=$(stat --format '%s' "$data")
function real_seconds {
cmd=$(echo $@ "> /dev/null 2>&1")
t=$(
$(which time) -p sh -c "$cmd" 2>&1 \
| grep '^real' \
| awk '{print $2}')
if [ $(echo "$t < 0.01" | bc) = 1 ]; then
t=0.01
fi
echo $t
}
function benchmark {
rm -f "$data_idx"
t1=$(real_seconds "$@")
rm -f "$data_idx"
t2=$(real_seconds "$@")
rm -f "$data_idx"
t3=$(real_seconds "$@")
echo "scale=2; ($t1 + $t2 + $t3) / 3" | bc
}
function benchmark_with_index {
rm -f "$data_idx"
xsv index "$data"
t1=$(real_seconds "$@")
t2=$(real_seconds "$@")
t3=$(real_seconds "$@")
rm -f "$data_idx"
echo "scale=2; ($t1 + $t2 + $t3) / 3" | bc
}
function run {
index=
while true; do
case "$1" in
--index) index="yes" && shift ;;
*) break ;;
esac
done
name="$1"
shift
if [ -z "$pat" ] || echo "$name" | grep -E -q "^$pat$"; then
if [ -z "$index" ]; then
t=$(benchmark "$@")
else
t=$(benchmark_with_index "$@")
fi
mb_per=$(echo "scale=2; ($data_size / $t) / 2^20" | bc)
printf "%s\t%0.02f seconds\t%s MB/sec\n" $name $t $mb_per
fi
}
run count xsv count "$data"
run flatten xsv flatten "$data"
run flatten_condensed xsv flatten "$data" --condense 50
run frequency xsv frequency "$data"
run index xsv index "$data"
run sample_10 xsv sample 10 "$data"
run sample_1000 xsv sample 1000 "$data"
run sample_100000 xsv sample 100000 "$data"
run search xsv search -s Country "'(?i)us'" "$data"
run select xsv select Country "$data"
run sort xsv sort -s AccentCity "$data"
run slice_one_middle xsv slice -i 500000 "$data"
run --index slice_one_middle_index xsv slice -i 500000 "$data"
run stats xsv stats "$data"
run --index stats_index xsv stats "$data"
run stats_everything xsv stats "$data" --everything
run --index stats_everything_index xsv stats "$data" --everything