Skip to content

Commit

Permalink
Create and use 32 and 64 bit msbit_pos() fcns
Browse files Browse the repository at this point in the history
The existing code to determine the position of the most significant 1
bit in a word is extracted from variant_byte_number(), and generalized
to use the deBruijn method previously added that works on any bit in the
word, rather than the existing method which looks just at the msb of
each byte.  The code is moved to a new function in preparation for being
called from other places.

A U32 version is created, and on 64 bit platforms, a second, parallel,
version taking a U64 argument is also created.  This is because future
commits may care about the word size differences.
  • Loading branch information
khwilliamson committed Jul 30, 2021
1 parent b5288ed commit 330cd0c
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 39 deletions.
2 changes: 2 additions & 0 deletions embed.fnc
Expand Up @@ -1143,9 +1143,11 @@ ATidRp |bool |is_utf8_invariant_string_loc|NN const U8* const s \
|NULLOK const U8 ** ep
CTiRp |unsigned|single_1bit_pos32|U32 word
CTiRp |unsigned|lsbit_pos32|U32 word
CTiRp |unsigned|msbit_pos32|U32 word
#ifdef U64TYPE /* HAS_QUAD undefined outside of core */
CTiRp |unsigned|single_1bit_pos64|U64 word
CTiRp |unsigned|lsbit_pos64|U64 word
CTiRp |unsigned|msbit_pos64|U64 word
#endif
#ifndef EBCDIC
CTiRp |unsigned int|variant_byte_number|PERL_UINTMAX_T word
Expand Down
2 changes: 2 additions & 0 deletions embed.h
Expand Up @@ -326,6 +326,7 @@
#define mortal_getenv Perl_mortal_getenv
#define mro_get_linear_isa(a) Perl_mro_get_linear_isa(aTHX_ a)
#define mro_method_changed_in(a) Perl_mro_method_changed_in(aTHX_ a)
#define msbit_pos32 Perl_msbit_pos32
#define my_atof(a) Perl_my_atof(aTHX_ a)
#define my_atof3(a,b,c) Perl_my_atof3(aTHX_ a,b,c)
#define my_dirfd Perl_my_dirfd
Expand Down Expand Up @@ -873,6 +874,7 @@
#endif
#if defined(U64TYPE) /* HAS_QUAD undefined outside of core */
#define lsbit_pos64 Perl_lsbit_pos64
#define msbit_pos64 Perl_msbit_pos64
#define single_1bit_pos64 Perl_single_1bit_pos64
#endif
#if defined(UNLINK_ALL_VERSIONS)
Expand Down
104 changes: 65 additions & 39 deletions inline.h
Expand Up @@ -664,7 +664,7 @@ Perl_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
return TRUE;
}

/* Below are functions to find the final or only set bit in a word. On
/* Below are functions to find the first, last, or only set bit in a word. On
* platforms with 64-bit capability, there is a pair for each operation; the
* first taking a 64 bit operand, and the second a 32 bit one. The logic is
* the same in each pair, so the second is stripped of most comments. */
Expand Down Expand Up @@ -715,6 +715,59 @@ Perl_lsbit_pos32(U32 word)

#ifdef U64TYPE /* HAS_QUAD not usable outside the core */

PERL_STATIC_INLINE unsigned
Perl_msbit_pos64(U64 word)
{
/* Find the position (0..63) of the most significant set bit in the input
* word */

ASSUME(word != 0);

/* Isolate the msb; http://codeforces.com/blog/entry/10330
*
* Only the most significant set bit matters. Or'ing word with its right
* shift of 1 makes that bit and the next one to its right both 1.
* Repeating that with the right shift of 2 makes for 4 1-bits in a row.
* ... We end with the msb and all to the right being 1. */
word |= (word >> 1);
word |= (word >> 2);
word |= (word >> 4);
word |= (word >> 8);
word |= (word >> 16);
word |= (word >> 32);

/* Then subtracting the right shift by 1 clears all but the left-most of
* the 1 bits, which is our desired result */
word -= (word >> 1);

/* Now we have a single bit set */
return single_1bit_pos64(word);
}

# define msbit_pos_uintmax_(word) msbit_pos64(word)
#else /* ! QUAD */
# define msbit_pos_uintmax_(word) msbit_pos32(word)
#endif

PERL_STATIC_INLINE unsigned
Perl_msbit_pos32(U32 word)
{
/* Find the position (0..31) of the most significant set bit in the input
* word */

ASSUME(word != 0);

word |= (word >> 1);
word |= (word >> 2);
word |= (word >> 4);
word |= (word >> 8);
word |= (word >> 16);
word -= (word >> 1);
return single_1bit_pos32(word);
}

#ifdef U64TYPE /* HAS_QUAD not usable outside the core */

PERL_STATIC_INLINE unsigned
Perl_single_1bit_pos64(U64 word)
{
Expand Down Expand Up @@ -786,51 +839,24 @@ Perl_variant_byte_number(PERL_UINTMAX_T word)
/* Bytes are stored like
* Byte1 Byte2 ... Byte8
* 63..56 55..47 ... 7...0
*
* Isolate the msb; http://codeforces.com/blog/entry/10330
*
* Only the most significant set bit matters. Or'ing word with its right
* shift of 1 makes that bit and the next one to its right both 1. Then
* right shifting by 2 makes for 4 1-bits in a row. ... We end with the
* msb and all to the right being 1. */
word |= word >> 1;
word |= word >> 2;
word |= word >> 4;
word |= word >> 8;
word |= word >> 16;
word |= word >> 32; /* This should get optimized out on 32-bit systems. */

/* Then subtracting the right shift by 1 clears all but the left-most of
* the 1 bits, which is our desired result */
word -= (word >> 1);

# else
# error Unexpected byte order
# endif
* so getting the msb of the whole modified word is getting the msb of the
* first byte that has its msb set */
word = msbit_pos_uintmax_(word);

/* Here 'word' has a single bit set: the msb of the first byte in which it
* is set. Calculate that position in the word. We can use this
* specialized solution: https://stackoverflow.com/a/32339674/1626653,
* assumes an 8-bit byte. (On a 32-bit machine, the larger numbers should
* just get shifted off at compile time) */
word = (word >> 7) * ((UINTMAX_C( 7) << 56) | (UINTMAX_C(15) << 48)
| (UINTMAX_C(23) << 40) | (UINTMAX_C(31) << 32)
| (39 << 24) | (47 << 16)
| (55 << 8) | (63 << 0));
word >>= PERL_WORDSIZE * 7; /* >> by either 56 or 24 */

/* Here, word contains the position 7,15,23,...,63 of that bit. Convert to
* 0..7 */
/* Here, word contains the position 63,55,...,23,15,7 of that bit. Convert
* to 0..7 */
word = ((word + 1) >> 3) - 1;

# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321

/* And invert the result */
/* And invert the result because of the reversed byte order on this
* platform */
word = CHARBITS - word - 1;

return (unsigned int) word;

# else
# error Unexpected byte order
# endif

return (unsigned int) word;
}

#endif
Expand Down
12 changes: 12 additions & 0 deletions proto.h
Expand Up @@ -2152,6 +2152,12 @@ PERL_CALLCONV void Perl_mro_set_mro(pTHX_ struct mro_meta *const meta, SV *const
PERL_CALLCONV SV* Perl_mro_set_private_data(pTHX_ struct mro_meta *const smeta, const struct mro_alg *const which, SV *const data);
#define PERL_ARGS_ASSERT_MRO_SET_PRIVATE_DATA \
assert(smeta); assert(which); assert(data)
#ifndef PERL_NO_INLINE_FUNCTIONS
PERL_STATIC_INLINE unsigned Perl_msbit_pos32(U32 word)
__attribute__warn_unused_result__;
#define PERL_ARGS_ASSERT_MSBIT_POS32
#endif

PERL_CALLCONV SV* Perl_multiconcat_stringify(pTHX_ const OP* o);
#define PERL_ARGS_ASSERT_MULTICONCAT_STRINGIFY \
assert(o)
Expand Down Expand Up @@ -6723,6 +6729,12 @@ PERL_STATIC_INLINE unsigned Perl_lsbit_pos64(U64 word)
#define PERL_ARGS_ASSERT_LSBIT_POS64
#endif

#ifndef PERL_NO_INLINE_FUNCTIONS
PERL_STATIC_INLINE unsigned Perl_msbit_pos64(U64 word)
__attribute__warn_unused_result__;
#define PERL_ARGS_ASSERT_MSBIT_POS64
#endif

#ifndef PERL_NO_INLINE_FUNCTIONS
PERL_STATIC_INLINE unsigned Perl_single_1bit_pos64(U64 word)
__attribute__warn_unused_result__;
Expand Down

0 comments on commit 330cd0c

Please sign in to comment.