Permalink
Cannot retrieve contributors at this time
Join GitHub today
GitHub is home to over 28 million developers working together to host and review code, manage projects, and build software together.
Sign up
Fetching contributors…
| #!/bin/sh | |
| # This script does some very basic benchmarks with 'xsv' on a city population | |
| # data set (which is a strict subset of the `worldcitiespop` data set). If it | |
| # doesn't exist on your system, it will be downloaded to /tmp for you. | |
| # | |
| # These aren't meant to be overly rigorous, but they should be enough to catch | |
| # significant regressions. | |
| # | |
| # Make sure you're using an `xsv` generated by `cargo build --release`. | |
| set -e | |
| pat="$1" | |
| data=/tmp/worldcitiespop_mil.csv | |
| data_idx=/tmp/worldcitiespop_mil.csv.idx | |
| if [ ! -r "$data" ]; then | |
| curl -sS http://burntsushi.net/stuff/worldcitiespop_mil.csv > "$data" | |
| fi | |
| data_size=$(stat --format '%s' "$data") | |
| function real_seconds { | |
| cmd=$(echo $@ "> /dev/null 2>&1") | |
| t=$( | |
| $(which time) -p sh -c "$cmd" 2>&1 \ | |
| | grep '^real' \ | |
| | awk '{print $2}') | |
| if [ $(echo "$t < 0.01" | bc) = 1 ]; then | |
| t=0.01 | |
| fi | |
| echo $t | |
| } | |
| function benchmark { | |
| rm -f "$data_idx" | |
| t1=$(real_seconds "$@") | |
| rm -f "$data_idx" | |
| t2=$(real_seconds "$@") | |
| rm -f "$data_idx" | |
| t3=$(real_seconds "$@") | |
| echo "scale=2; ($t1 + $t2 + $t3) / 3" | bc | |
| } | |
| function benchmark_with_index { | |
| rm -f "$data_idx" | |
| xsv index "$data" | |
| t1=$(real_seconds "$@") | |
| t2=$(real_seconds "$@") | |
| t3=$(real_seconds "$@") | |
| rm -f "$data_idx" | |
| echo "scale=2; ($t1 + $t2 + $t3) / 3" | bc | |
| } | |
| function run { | |
| index= | |
| while true; do | |
| case "$1" in | |
| --index) index="yes" && shift ;; | |
| *) break ;; | |
| esac | |
| done | |
| name="$1" | |
| shift | |
| if [ -z "$pat" ] || echo "$name" | grep -E -q "^$pat$"; then | |
| if [ -z "$index" ]; then | |
| t=$(benchmark "$@") | |
| else | |
| t=$(benchmark_with_index "$@") | |
| fi | |
| mb_per=$(echo "scale=2; ($data_size / $t) / 2^20" | bc) | |
| printf "%s\t%0.02f seconds\t%s MB/sec\n" $name $t $mb_per | |
| fi | |
| } | |
| run count xsv count "$data" | |
| run flatten xsv flatten "$data" | |
| run flatten_condensed xsv flatten "$data" --condense 50 | |
| run frequency xsv frequency "$data" | |
| run index xsv index "$data" | |
| run sample_10 xsv sample 10 "$data" | |
| run sample_1000 xsv sample 1000 "$data" | |
| run sample_100000 xsv sample 100000 "$data" | |
| run search xsv search -s Country "'(?i)us'" "$data" | |
| run select xsv select Country "$data" | |
| run sort xsv sort -s AccentCity "$data" | |
| run slice_one_middle xsv slice -i 500000 "$data" | |
| run --index slice_one_middle_index xsv slice -i 500000 "$data" | |
| run stats xsv stats "$data" | |
| run --index stats_index xsv stats "$data" | |
| run stats_everything xsv stats "$data" --everything | |
| run --index stats_everything_index xsv stats "$data" --everything |