Skip to content

Commit

Permalink
added extra deserialization optimization for types within certain limits
Browse files Browse the repository at this point in the history
  • Loading branch information
DarrenLevine committed Jul 4, 2021
1 parent c9b709e commit ea1d4b0
Show file tree
Hide file tree
Showing 2 changed files with 201 additions and 2 deletions.
180 changes: 180 additions & 0 deletions include/bitcpy_common.h
Expand Up @@ -274,6 +274,186 @@ namespace serdes
{
static constexpr size_t value = 1u;
};

template <typename T, typename T2>
CONSTEXPR_ABOVE_CPP11 inline T big_endian_memcpy(const T2 *const);

//
// uint8_t[] section
//

template <>
CONSTEXPR_ABOVE_CPP11 inline uint8_t big_endian_memcpy(const uint8_t *const data)
{
return data[0];
}
template <>
CONSTEXPR_ABOVE_CPP11 inline uint16_t big_endian_memcpy(const uint8_t *const data)
{
return static_cast<uint16_t>(data[0]) << 8 | static_cast<uint16_t>(data[1]);
}
template <>
CONSTEXPR_ABOVE_CPP11 inline uint32_t big_endian_memcpy(const uint8_t *const data)
{
return (static_cast<uint32_t>(data[0]) << 24) |
(static_cast<uint32_t>(data[1]) << 16) |
(static_cast<uint32_t>(data[2]) << 8) |
static_cast<uint16_t>(data[3]);
}
template <>
CONSTEXPR_ABOVE_CPP11 inline uint64_t big_endian_memcpy(const uint8_t *const data)
{
return (static_cast<uint64_t>(data[0]) << 56) |
(static_cast<uint64_t>(data[1]) << 48) |
(static_cast<uint64_t>(data[2]) << 40) |
(static_cast<uint64_t>(data[3]) << 32) |
(static_cast<uint64_t>(data[4]) << 24) |
(static_cast<uint64_t>(data[5]) << 16) |
(static_cast<uint64_t>(data[6]) << 8) |
static_cast<uint64_t>(data[7]);
}

//
// uint16_t[] section
//

template <>
CONSTEXPR_ABOVE_CPP11 inline uint8_t big_endian_memcpy(const uint16_t *const data)
{
return static_cast<uint8_t>(data[0] >> 8);
}
template <>
CONSTEXPR_ABOVE_CPP11 inline uint16_t big_endian_memcpy(const uint16_t *const data)
{
return data[0];
}
template <>
CONSTEXPR_ABOVE_CPP11 inline uint32_t big_endian_memcpy(const uint16_t *const data)
{
return (static_cast<uint32_t>(data[0]) << 16) | static_cast<uint32_t>(data[1]);
}
template <>
CONSTEXPR_ABOVE_CPP11 inline uint64_t big_endian_memcpy(const uint16_t *const data)
{
return (static_cast<uint64_t>(data[0]) << 48) |
(static_cast<uint64_t>(data[1]) << 32) |
(static_cast<uint64_t>(data[2]) << 16) |
static_cast<uint64_t>(data[3]);
}

//
// uint32_t[] section
//

template <>
CONSTEXPR_ABOVE_CPP11 inline uint8_t big_endian_memcpy(const uint32_t *const data)
{
return static_cast<uint8_t>(data[0] >> 24);
}
template <>
CONSTEXPR_ABOVE_CPP11 inline uint16_t big_endian_memcpy(const uint32_t *const data)
{
return static_cast<uint16_t>(data[0] >> 16);
}
template <>
CONSTEXPR_ABOVE_CPP11 inline uint32_t big_endian_memcpy(const uint32_t *const data)
{
return data[0];
}
template <>
CONSTEXPR_ABOVE_CPP11 inline uint64_t big_endian_memcpy(const uint32_t *const data)
{
return (static_cast<uint64_t>(data[0]) << 32) |
static_cast<uint64_t>(data[1]);
}

//
// uint64_t[] section
//

template <>
CONSTEXPR_ABOVE_CPP11 inline uint8_t big_endian_memcpy(const uint64_t *const data)
{
return static_cast<uint8_t>(data[0] >> 56);
}
template <>
CONSTEXPR_ABOVE_CPP11 inline uint16_t big_endian_memcpy(const uint64_t *const data)
{
return static_cast<uint16_t>(data[0] >> 48);
}
template <>
CONSTEXPR_ABOVE_CPP11 inline uint32_t big_endian_memcpy(const uint64_t *const data)
{
return static_cast<uint32_t>(data[0] >> 32);
}
template <>
CONSTEXPR_ABOVE_CPP11 inline uint64_t big_endian_memcpy(const uint64_t *const data)
{
return data[0];
}

BITCPY_INT128_CONDITIONAL_DEFINE_C(

template <>
CONSTEXPR_ABOVE_CPP11 inline __uint128_t big_endian_memcpy(const uint8_t *const data)
{
using T2 = uint8_t;
return (static_cast<__uint128_t>(big_endian_memcpy<uint64_t, T2>(data)) << 64) | static_cast<__uint128_t>(big_endian_memcpy<uint64_t, T2>(&data[8 / sizeof(T2)]));
}

template <>
CONSTEXPR_ABOVE_CPP11 inline __uint128_t big_endian_memcpy(const uint16_t *const data)
{
using T2 = uint16_t;
return (static_cast<__uint128_t>(big_endian_memcpy<uint64_t, T2>(data)) << 64) | static_cast<__uint128_t>(big_endian_memcpy<uint64_t, T2>(&data[8 / sizeof(T2)]));
}

template <>
CONSTEXPR_ABOVE_CPP11 inline __uint128_t big_endian_memcpy(const uint32_t *const data)
{
using T2 = uint32_t;
return (static_cast<__uint128_t>(big_endian_memcpy<uint64_t, T2>(data)) << 64) | static_cast<__uint128_t>(big_endian_memcpy<uint64_t, T2>(&data[8 / sizeof(T2)]));
}

template <>
CONSTEXPR_ABOVE_CPP11 inline __uint128_t big_endian_memcpy(const uint64_t *const data)
{
using T2 = uint64_t;
return (static_cast<__uint128_t>(big_endian_memcpy<uint64_t, T2>(data)) << 64) | static_cast<__uint128_t>(big_endian_memcpy<uint64_t, T2>(&data[8 / sizeof(T2)]));
}

template <>
CONSTEXPR_ABOVE_CPP11 inline __uint128_t big_endian_memcpy(const __uint128_t *const data)
{
return data[0];
}

template <>
CONSTEXPR_ABOVE_CPP11 inline uint64_t big_endian_memcpy(const __uint128_t *const data)
{
return static_cast<uint64_t>(data[0] >> 64);
}

template <>
CONSTEXPR_ABOVE_CPP11 inline uint32_t big_endian_memcpy(const __uint128_t *const data)
{
return static_cast<uint32_t>(data[0] >> 96);
}

template <>
CONSTEXPR_ABOVE_CPP11 inline uint16_t big_endian_memcpy(const __uint128_t *const data)
{
return static_cast<uint16_t>(data[0] >> 112);
}

template <>
CONSTEXPR_ABOVE_CPP11 inline uint8_t big_endian_memcpy(const __uint128_t *const data)
{
return static_cast<uint8_t>(data[0] >> 120);
}

);

}
}
#endif // _BITCPY_COMMON_H_
23 changes: 21 additions & 2 deletions include/bitcpy_from_array.h
Expand Up @@ -51,11 +51,26 @@ namespace serdes
return bits;
}

// shortcut 3: if there is no left 0 padding, and we wouldn't exceed the buffer
// safety limit by possibly reading past the end of the array, we can avoid the loop
// by using big_endian_memcpy
constexpr size_t bits_per_T_val = sizeof(T_val) * 8u;
if (number_of_bits_after_index <= bits_per_T_val)
{
const size_t alignment_shift = bits_per_T_val - number_of_bits_after_index;
if (alignment_shift < bits_per_T_array)
{
dest = detail::big_endian_memcpy<T_val, T_array>(&source[array_read_index]);
const T_val unaligned_mask = detail::bitmask<T_val>(bits);
dest = static_cast<T_val>(dest >> alignment_shift) & unaligned_mask;
return bits;
}
}

// if the read DOES need to be split across > 1 array elements, or left 0 padding is needed
const size_t num_array_elements_touched_minus_one = (number_of_bits_after_index + bits_per_T_array - 1u) / bits_per_T_array - 1u;
const size_t bits_in_first_element = bits_per_T_array - bit_offset_from_start_index;
size_t bits_remaining = bits - bits_in_first_element;
constexpr size_t bits_per_T_val = sizeof(T_val) * 8u;
size_t start_index = 1u;
if (bits_remaining < bits_per_T_val)
{
Expand Down Expand Up @@ -221,7 +236,11 @@ namespace serdes
/// @param bits: number of bits to copy from
/// @return size_t: number of bits coppied
template <typename T_array, typename T_val, detail::requires_large_non_integral_type<T_val> * = nullptr>
CONSTEXPR_ABOVE_CPP11 size_t bitcpy(T_val &dest, const T_array *const source, const size_t bit_offset = 0, const size_t bits = detail::default_bitsize<T_val>::value) noexcept
#ifndef __clang__
CONSTEXPR_ABOVE_CPP11
#endif
size_t
bitcpy(T_val &dest, const T_array *const source, const size_t bit_offset = 0, const size_t bits = detail::default_bitsize<T_val>::value) noexcept
{
sized_pointer<uint8_t> dest_array(reinterpret_cast<uint8_t *>(&dest), sizeof(T_val));
return bitcpy(dest_array, source, bit_offset, bits);
Expand Down

0 comments on commit ea1d4b0

Please sign in to comment.