Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RFC Decode/Encode with multiple bytes per chunk #84

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
9 changes: 9 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ alloc = []
check = ["sha2"]

[dependencies]
bytemuck = "1.9.1"
sha2 = { version = "0.9.0", optional = true, default-features = false }

[dev_dependencies]
Expand Down
6 changes: 6 additions & 0 deletions benches/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ macro_rules! group_decode {
let mut output = [0; $decoded_length];
b.iter(|| bs58::decode($encoded).into(&mut output).unwrap());
});
group.bench_function("decode_bs58_unsafe", |b| {
b.iter(|| bs58::decode($encoded).into_vec_unsafe().unwrap())
});
group.finish();
}};
}
Expand All @@ -44,6 +47,9 @@ macro_rules! group_decode_long {
let mut output = [0; $decoded_length];
b.iter(|| bs58::decode($encoded).into(&mut output[..]).unwrap());
});
group.bench_function("decode_bs58_unsafe", |b| {
b.iter(|| bs58::decode($encoded).into_vec_unsafe().unwrap())
});
// bs58_noalloc_array is not possible because of limited array lengths in trait impls
group.finish();
}};
Expand Down
6 changes: 6 additions & 0 deletions benches/encode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ macro_rules! group_encode {
let mut output = String::with_capacity($encoded.len());
b.iter(|| bs58::encode($decoded).into(&mut output));
});
group.bench_function("encode_bs58_vec", |b| {
b.iter(|| bs58::encode($decoded).into_vec())
});
group.bench_function("encode_bs58_vec_unsafe", |b| {
b.iter(|| bs58::encode($decoded).into_vec_unsafe())
});
group.finish();
}};
}
Expand Down
86 changes: 86 additions & 0 deletions src/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,22 @@ impl<'a, I: AsRef<[u8]>> DecodeBuilder<'a, I> {
Ok(output)
}

/// Decode into a new vector of bytes.
///
/// This method decodes multiple bytes simultaneously and allocates more memory than strictly
/// necessary (by a constant number of bytes). Simultaneously, this method does not obey the
/// `Check::Enabled` flag.
#[cfg(feature = "alloc")]
#[cfg_attr(docsrs, doc(cfg(any(feature = "alloc", feature = "std"))))]
pub fn into_vec_unsafe(self) -> Result<Vec<u8>> {
let mut output = Vec::new();
output.resize((self.input.as_ref().len() + 3) / 4 * 4, 0);

let len = decode_into_limbs(self.input.as_ref(), &mut output, self.alpha)?;
output.truncate(len);
Ok(output)
}

/// Decode into the given buffer.
///
/// Returns the length written into the buffer.
Expand Down Expand Up @@ -306,6 +322,76 @@ fn decode_into(input: &[u8], output: &mut [u8], alpha: &Alphabet) -> Result<usiz
Ok(index)
}

fn decode_into_limbs(input: &[u8], output: &mut [u8], alpha: &Alphabet) -> Result<usize> {
let input_bytes_per_limb = 5; // 58**5 < 2**32

let decode_input_byte = |i: usize, c: u8| -> Result<usize> {
if c > 127 {
return Err(Error::NonAsciiCharacter { index: i });
}

let val = alpha.decode[c as usize] as usize;
if val == 0xFF {
return Err(Error::InvalidCharacter {
character: c as char,
index: i,
});
}
Ok(val)
};

let mut index = 0;

let (prefix, output_as_limbs, _) = bytemuck::pod_align_to_mut::<u8, u32>(output);
let prefix_len = prefix.len();

for (chunk_idx, chunk) in input.chunks(input_bytes_per_limb).enumerate() {
let mut next_limb = 0;
let mut last_limb_multiplier = 1;
for (byte_idx, input_byte) in chunk.into_iter().enumerate() {
next_limb = next_limb * 58 + decode_input_byte(chunk_idx * 4 + byte_idx, *input_byte)?;
last_limb_multiplier = last_limb_multiplier * 58;
}

for limb in &mut output_as_limbs[..index] {
next_limb += (*limb as usize) * last_limb_multiplier;
*limb = (next_limb & 0xFFFFFFFF) as u32;
next_limb >>= 32;
}

while next_limb > 0 {
let limb = output_as_limbs.get_mut(index).ok_or(Error::BufferTooSmall)?;
*limb = (next_limb & 0xFFFFFFFF) as u32;
index += 1;
next_limb >>= 32;
}
}

// rescale for the remainder
index = index * 4;
{
let output = &mut output[prefix_len..];
while index > 0 && output[index - 1] == 0 {
index -= 1;
}

let zero = alpha.encode[0];
for _ in input.iter().take_while(|c| **c == zero) {
let byte = output.get_mut(index).ok_or(Error::BufferTooSmall)?;
*byte = 0;
index += 1;
}

output[..index].reverse();
}

if prefix_len > 0 {
output.copy_within(prefix_len..prefix_len + index, 0);
}

Ok(index)
}

#[cfg(feature = "check")]
fn decode_check_into(
input: &[u8],
Expand Down
100 changes: 100 additions & 0 deletions src/encode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,17 @@ impl<'a, I: AsRef<[u8]>> EncodeBuilder<'a, I> {
output
}

/// Encode into a new owned vector.
pub fn into_vec_unsafe(self) -> Vec<u8> {
let mut output = Vec::new();
let max_encoded_len = (self.input.as_ref().len() / 5 + 1) * 8;
output.resize((max_encoded_len + 4) / 5 * 5, 0);

let len = encode_into_limbs(self.input.as_ref(), &mut output, self.alpha).unwrap();
output.truncate(len);
output
}

/// Encode into the given buffer.
///
/// Returns the length written into the buffer.
Expand Down Expand Up @@ -370,6 +381,95 @@ where
Ok(index)
}

fn encode_into_limbs(input: &[u8], output: &mut [u8], alpha: &Alphabet) -> Result<usize>
{
let input_bytes_per_limb = 4;
let (prefix, output_as_limbs, _) = bytemuck::pod_align_to_mut::<u8, u32>(output);
let prefix_len = prefix.len();

let mut index = 0;
let next_limb_divisor = 58 * 58 * 58 * 58 * 58;
for chunk in input.chunks(input_bytes_per_limb) {
let mut carry = 0;
let mut shift_size = 0;
for input_byte in chunk {
carry = (carry << 8) + *input_byte as usize;
shift_size = shift_size + 8;
}

for limb in &mut output_as_limbs[..index] {
carry += (*limb as usize) << shift_size;
*limb = (carry % next_limb_divisor) as u32;
carry /= next_limb_divisor;
}

while carry > 0 {
let limb = output_as_limbs.get_mut(index).ok_or(Error::BufferTooSmall)?;
*limb = (carry % next_limb_divisor) as u32;
index += 1;
carry /= next_limb_divisor;
}
}

// shouldn't happen since we control the output buffer passed in...
if output.len() < prefix_len + index * 5 {
return Err(Error::BufferTooSmall);
}

for index in (0..index).rev() {
let limb_offset = prefix_len + index * 4;
let mut limb_bytes = [0; 4];
limb_bytes.copy_from_slice(&output[limb_offset..limb_offset+4]);
let limb = if cfg!(target_endian = "little") {
u32::from_le_bytes(limb_bytes)
} else {
u32::from_be_bytes(limb_bytes)
};

let output_byte4 = limb / (58 * 58 * 58 * 58);
let output_byte3 = (limb / (58 * 58 * 58)) % 58;
let output_byte2 = (limb / (58 * 58)) % 58;
let output_byte1 = (limb / 58) % 58;
let output_byte0 = limb % 58;

let output_offset = prefix_len + index * 5;
output[output_offset..output_offset+5].copy_from_slice(&[
output_byte0 as u8,
output_byte1 as u8,
output_byte2 as u8,
output_byte3 as u8,
output_byte4 as u8,
]);
}

// rescale for the remainder
index = index * 5;
{
let output = &mut output[prefix_len..];
while index > 0 && output[index - 1] == 0 {
index -= 1;
}

for _ in input.into_iter().take_while(|v| **v == 0) {
let byte = output.get_mut(index).ok_or(Error::BufferTooSmall)?;
*byte = 0;
index += 1;
}

for val in &mut output[..index] {
*val = alpha.encode[*val as usize];
}

output[..index].reverse();
}

if prefix_len > 0 {
output.copy_within(prefix_len..prefix_len + index, 0);
}

Ok(index)
}

#[cfg(feature = "check")]
fn encode_check_into(
input: &[u8],
Expand Down
1 change: 1 addition & 0 deletions tests/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use assert_matches::assert_matches;
fn test_decode() {
for &(val, s) in cases::TEST_CASES.iter() {
assert_eq!(val.to_vec(), bs58::decode(s).into_vec().unwrap());
assert_eq!(val.to_vec(), bs58::decode(s).into_vec_unsafe().unwrap());
}
}

Expand Down
1 change: 1 addition & 0 deletions tests/encode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ fn test_encode() {
assert_eq!(s, bs58::encode(val).into_string());

assert_eq!(s.as_bytes(), &*bs58::encode(val).into_vec());
assert_eq!(s.as_bytes(), &*bs58::encode(val).into_vec_unsafe());

{
let mut bytes = FILLER;
Expand Down