From e19de571b13d967bde43b10cbfca107d2e9fd1fe Mon Sep 17 00:00:00 2001 From: Ragnar Groot Koerkamp Date: Thu, 4 Jan 2024 23:35:01 +0100 Subject: [PATCH] manual SIMD --- src/main.rs | 101 +++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 81 insertions(+), 20 deletions(-) diff --git a/src/main.rs b/src/main.rs index a17539b..c785ab3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,11 @@ -#![feature(slice_split_once)] +#![feature(slice_split_once, portable_simd, slice_as_chunks)] use fxhash::{FxHashMap, FxHasher}; -use memchr::arch::x86_64::avx2::memchr::One; -use std::{env::args, hash::Hasher, io::Read}; +use std::{ + env::args, + hash::Hasher, + io::Read, + simd::{Simd, SimdPartialEq, ToBitMask}, +}; struct Record { count: u32, @@ -68,6 +72,7 @@ fn to_key_fx(name: &[u8]) -> u64 { h.write(name); h.finish() } +#[allow(unused)] fn to_key(name: &[u8]) -> u64 { let mut key = [0u8; 8]; let l = name.len().min(8); @@ -79,32 +84,88 @@ fn to_key(name: &[u8]) -> u64 { k ^ name.len() as u64 } +/// Number of SIMD lanes. AVX2 has 256 bits, so 32 lanes. +const L: usize = 32; +/// The Simd type. +type S = Simd; + +/// Find the regions between \n and ; (names) and between ; and \n (values), +/// and calls `callback` for each line. +#[inline(always)] +fn iter_lines<'a>(data: &'a [u8], mut callback: impl FnMut(&'a [u8], &'a [u8])) { + unsafe { + // TODO: Handle the tail. + let simd_data: &[S] = data.align_to::().1; + + let sep = S::splat(b';'); + let end = S::splat(b'\n'); + let mut start_pos = 0; + let mut i = 0; + let mut eq_sep = sep.simd_eq(simd_data[i]).to_bitmask(); + let mut eq_end = end.simd_eq(simd_data[i]).to_bitmask(); + + // TODO: Handle the tail. + while i < simd_data.len() - 2 { + // find ; separator + // TODO if? + while eq_sep == 0 { + i += 1; + eq_sep = sep.simd_eq(simd_data[i]).to_bitmask(); + eq_end = end.simd_eq(simd_data[i]).to_bitmask(); + } + let offset = eq_sep.trailing_zeros(); + eq_sep ^= 1 << offset; + let sep_pos = L * i + offset as usize; + + // find \n newline + // TODO if? + while eq_end == 0 { + i += 1; + eq_sep = sep.simd_eq(simd_data[i]).to_bitmask(); + eq_end = end.simd_eq(simd_data[i]).to_bitmask(); + } + let offset = eq_end.trailing_zeros(); + eq_end ^= 1 << offset; + let end_pos = L * i + offset as usize; + + callback( + data.get_unchecked(start_pos..sep_pos), + data.get_unchecked(sep_pos + 1..end_pos), + ); + + start_pos = end_pos + 1; + } + } +} + fn main() { let filename = &args().nth(1).unwrap_or("measurements.txt".to_string()); let mut data = vec![]; + let offset; { let stat = std::fs::metadata(filename).unwrap(); - data.reserve(stat.len() as usize + 1); + data.reserve(stat.len() as usize + 2 * L); + // Some hacky stuff to make sure data is aligned to simd lanes. + data.resize(4 * L, 0); + let pre_aligned = unsafe { data.align_to::().0 }; + offset = pre_aligned.len(); + assert!(offset < L); + data.resize(offset, 0); let mut file = std::fs::File::open(filename).unwrap(); file.read_to_end(&mut data).unwrap(); } + // Guaranteed to be aligned for SIMD. + let data = &data[offset..]; + let mut h = FxHashMap::default(); - let mut data = &data[..]; - let sep = One::new(b';').unwrap(); - let newline = One::new(b'\n').unwrap(); - unsafe { - while !data.is_empty() { - let separator = sep.find(data).unwrap(); - let end = newline.find(data.get_unchecked(separator..)).unwrap(); - let name = data.get_unchecked(..separator); - let value = data.get_unchecked(separator + 1..separator + end); - h.entry(to_key_fx(name)) - .or_insert((Record::default(), name)) - .0 - .add(parse(value)); - data = data.get_unchecked(separator + end + 1..); - } - } + + let callback = |name, value| { + h.entry(to_key_fx(name)) + .or_insert((Record::default(), name)) + .0 + .add(parse(value)); + }; + iter_lines(data, callback); let mut v = h.into_iter().collect::>(); v.sort_unstable_by_key(|p| p.0);