Nullus157 · lwus · May 11, 2022 · May 11, 2022 · May 11, 2022 · May 11, 2022
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -23,6 +23,7 @@ alloc = []
 check = ["sha2"]
 
 [dependencies]
+bytemuck = "1.9.1"
 sha2 = { version = "0.9.0", optional = true, default-features = false }
 
 [dev_dependencies]

diff --git a/benches/decode.rs b/benches/decode.rs
@@ -24,6 +24,9 @@ macro_rules! group_decode {
             let mut output = [0; $decoded_length];
             b.iter(|| bs58::decode($encoded).into(&mut output).unwrap());
         });
+        group.bench_function("decode_bs58_unsafe", |b| {
+            b.iter(|| bs58::decode($encoded).into_vec_unsafe().unwrap())
+        });
         group.finish();
     }};
 }
@@ -44,6 +47,9 @@ macro_rules! group_decode_long {
             let mut output = [0; $decoded_length];
             b.iter(|| bs58::decode($encoded).into(&mut output[..]).unwrap());
         });
+        group.bench_function("decode_bs58_unsafe", |b| {
+            b.iter(|| bs58::decode($encoded).into_vec_unsafe().unwrap())
+        });
         // bs58_noalloc_array is not possible because of limited array lengths in trait impls
         group.finish();
     }};

diff --git a/benches/encode.rs b/benches/encode.rs
@@ -20,6 +20,12 @@ macro_rules! group_encode {
             let mut output = String::with_capacity($encoded.len());
             b.iter(|| bs58::encode($decoded).into(&mut output));
         });
+        group.bench_function("encode_bs58_vec", |b| {
+            b.iter(|| bs58::encode($decoded).into_vec())
+        });
+        group.bench_function("encode_bs58_vec_unsafe", |b| {
+            b.iter(|| bs58::encode($decoded).into_vec_unsafe())
+        });
         group.finish();
     }};
 }

diff --git a/src/decode.rs b/src/decode.rs
@@ -219,6 +219,22 @@ impl<'a, I: AsRef<[u8]>> DecodeBuilder<'a, I> {
         Ok(output)
     }
 
+    /// Decode into a new vector of bytes.
+    ///
+    /// This method decodes multiple bytes simultaneously and allocates more memory than strictly
+    /// necessary (by a constant number of bytes). Simultaneously, this method does not obey the
+    /// `Check::Enabled` flag.
+    #[cfg(feature = "alloc")]
+    #[cfg_attr(docsrs, doc(cfg(any(feature = "alloc", feature = "std"))))]
+    pub fn into_vec_unsafe(self) -> Result<Vec<u8>> {
+        let mut output = Vec::new();
+        output.resize((self.input.as_ref().len() + 3) / 4 * 4, 0);
+
+        let len = decode_into_limbs(self.input.as_ref(), &mut output, self.alpha)?;
+        output.truncate(len);
+        Ok(output)
+    }
+
     /// Decode into the given buffer.
     ///
     /// Returns the length written into the buffer.
@@ -306,6 +322,76 @@ fn decode_into(input: &[u8], output: &mut [u8], alpha: &Alphabet) -> Result<usiz
     Ok(index)
 }
 
+fn decode_into_limbs(input: &[u8], output: &mut [u8], alpha: &Alphabet) -> Result<usize> {
+    let input_bytes_per_limb = 5; // 58**5 < 2**32
+
+    let decode_input_byte = |i: usize, c: u8| -> Result<usize> {
+        if c > 127 {
+            return Err(Error::NonAsciiCharacter { index: i });
+        }
+
+        let val = alpha.decode[c as usize] as usize;
+        if val == 0xFF {
+            return Err(Error::InvalidCharacter {
+                character: c as char,
+                index: i,
+            });
+        }
+        Ok(val)
+    };
+
+    let mut index = 0;
+
+    let (prefix, output_as_limbs, _) = bytemuck::pod_align_to_mut::<u8, u32>(output);
+    let prefix_len = prefix.len();
+
+    for (chunk_idx, chunk) in input.chunks(input_bytes_per_limb).enumerate() {
+        let mut next_limb = 0;
+        let mut last_limb_multiplier = 1;
+        for (byte_idx, input_byte) in chunk.into_iter().enumerate() {
+            next_limb = next_limb * 58 + decode_input_byte(chunk_idx * 4 + byte_idx, *input_byte)?;
+            last_limb_multiplier = last_limb_multiplier * 58;
+        }
+
+        for limb in &mut output_as_limbs[..index] {
+            next_limb += (*limb as usize) * last_limb_multiplier;
+            *limb = (next_limb & 0xFFFFFFFF) as u32;
+            next_limb >>= 32;
+        }
+
+        while next_limb > 0 {
+            let limb = output_as_limbs.get_mut(index).ok_or(Error::BufferTooSmall)?;
+            *limb = (next_limb & 0xFFFFFFFF) as u32;
+            index += 1;
+            next_limb >>= 32;
+        }
+    }
+
+    // rescale for the remainder
+    index = index * 4;
+    {
+    let output = &mut output[prefix_len..];
+    while index > 0 && output[index - 1] == 0 {
+        index -= 1;
+    }
+
+    let zero = alpha.encode[0];
+    for _ in input.iter().take_while(|c| **c == zero) {
+        let byte = output.get_mut(index).ok_or(Error::BufferTooSmall)?;
+        *byte = 0;
+        index += 1;
+    }
+
+    output[..index].reverse();
+    }
+
+    if prefix_len > 0 {
+        output.copy_within(prefix_len..prefix_len + index, 0);
+    }
+
+    Ok(index)
+}
+
 #[cfg(feature = "check")]
 fn decode_check_into(
     input: &[u8],

diff --git a/src/encode.rs b/src/encode.rs
@@ -248,6 +248,17 @@ impl<'a, I: AsRef<[u8]>> EncodeBuilder<'a, I> {
         output
     }
 
+    /// Encode into a new owned vector.
+    pub fn into_vec_unsafe(self) -> Vec<u8> {
+        let mut output = Vec::new();
+        let max_encoded_len = (self.input.as_ref().len() / 5 + 1) * 8;
+        output.resize((max_encoded_len + 4) / 5 * 5, 0);
+
+        let len = encode_into_limbs(self.input.as_ref(), &mut output, self.alpha).unwrap();
+        output.truncate(len);
+        output
+    }
+
     /// Encode into the given buffer.
     ///
     /// Returns the length written into the buffer.
@@ -370,6 +381,95 @@ where
     Ok(index)
 }
 
+fn encode_into_limbs(input: &[u8], output: &mut [u8], alpha: &Alphabet) -> Result<usize>
+{
+    let input_bytes_per_limb = 4;
+    let (prefix, output_as_limbs, _) = bytemuck::pod_align_to_mut::<u8, u32>(output);
+    let prefix_len = prefix.len();
+
+    let mut index = 0;
+    let next_limb_divisor = 58 * 58 * 58 * 58 * 58;
+    for chunk in input.chunks(input_bytes_per_limb) {
+        let mut carry = 0;
+        let mut shift_size = 0;
+        for input_byte in chunk {
+            carry = (carry << 8) + *input_byte as usize;
+            shift_size = shift_size + 8;
+        }
+
+        for limb in &mut output_as_limbs[..index] {
+            carry += (*limb as usize) << shift_size;
+            *limb = (carry % next_limb_divisor) as u32;
+            carry /= next_limb_divisor;
+        }
+
+        while carry > 0 {
+            let limb = output_as_limbs.get_mut(index).ok_or(Error::BufferTooSmall)?;
+            *limb = (carry % next_limb_divisor) as u32;
+            index += 1;
+            carry /= next_limb_divisor;
+        }
+    }
+
+    // shouldn't happen since we control the output buffer passed in...
+    if output.len() < prefix_len + index * 5 {
+        return Err(Error::BufferTooSmall);
+    }
+
+    for index in (0..index).rev() {
+        let limb_offset = prefix_len + index * 4;
+        let mut limb_bytes = [0; 4];
+        limb_bytes.copy_from_slice(&output[limb_offset..limb_offset+4]);
+        let limb = if cfg!(target_endian = "little") {
+            u32::from_le_bytes(limb_bytes)
+        } else {
+            u32::from_be_bytes(limb_bytes)
+        };
+
+        let output_byte4 =  limb / (58 * 58 * 58 * 58);
+        let output_byte3 = (limb / (58 * 58 * 58)) % 58;
+        let output_byte2 = (limb / (58 * 58)) % 58;
+        let output_byte1 = (limb / 58) % 58;
+        let output_byte0 =  limb % 58;
+
+        let output_offset = prefix_len + index * 5;
+        output[output_offset..output_offset+5].copy_from_slice(&[
+            output_byte0 as u8,
+            output_byte1 as u8,
+            output_byte2 as u8,
+            output_byte3 as u8,
+            output_byte4 as u8,
+        ]);
+    }
+
+    // rescale for the remainder
+    index = index * 5;
+    {
+    let output = &mut output[prefix_len..];
+    while index > 0 && output[index - 1] == 0 {
+        index -= 1;
+    }
+
+    for _ in input.into_iter().take_while(|v| **v == 0) {
+        let byte = output.get_mut(index).ok_or(Error::BufferTooSmall)?;
+        *byte = 0;
+        index += 1;
+    }
+
+    for val in &mut output[..index] {
+        *val = alpha.encode[*val as usize];
+    }
+
+    output[..index].reverse();
+    }
+
+    if prefix_len > 0 {
+        output.copy_within(prefix_len..prefix_len + index, 0);
+    }
+
+    Ok(index)
+}
+
 #[cfg(feature = "check")]
 fn encode_check_into(
     input: &[u8],

diff --git a/tests/decode.rs b/tests/decode.rs
@@ -7,6 +7,7 @@ use assert_matches::assert_matches;
 fn test_decode() {
     for &(val, s) in cases::TEST_CASES.iter() {
         assert_eq!(val.to_vec(), bs58::decode(s).into_vec().unwrap());
+        assert_eq!(val.to_vec(), bs58::decode(s).into_vec_unsafe().unwrap());
     }
 }
 

diff --git a/tests/encode.rs b/tests/encode.rs
@@ -8,6 +8,7 @@ fn test_encode() {
         assert_eq!(s, bs58::encode(val).into_string());
 
         assert_eq!(s.as_bytes(), &*bs58::encode(val).into_vec());
+        assert_eq!(s.as_bytes(), &*bs58::encode(val).into_vec_unsafe());
 
         {
             let mut bytes = FILLER;