Skip to content

Commit

Permalink
Merge 23ceeed into 310f47b
Browse files Browse the repository at this point in the history
  • Loading branch information
khwilliamson committed Jul 3, 2021
2 parents 310f47b + 23ceeed commit ece7859
Show file tree
Hide file tree
Showing 9 changed files with 279 additions and 108 deletions.
3 changes: 3 additions & 0 deletions embed.fnc
Expand Up @@ -1141,6 +1141,9 @@ ATdmoR |bool |is_utf8_invariant_string|NN const U8* const s \
ATidRp |bool |is_utf8_invariant_string_loc|NN const U8* const s \
|STRLEN len \
|NULLOK const U8 ** ep
CTiRp |unsigned|my_ffs|PERL_UINTMAX_T word
CTiRp |unsigned|my_msbit_pos|PERL_UINTMAX_T word
CTiRp |unsigned|single_1bit_pos|PERL_UINTMAX_T word
#ifndef EBCDIC
CTiRp |unsigned int|variant_byte_number|PERL_UINTMAX_T word
#endif
Expand Down
3 changes: 3 additions & 0 deletions embed.h
Expand Up @@ -331,7 +331,9 @@
#define my_exit(a) Perl_my_exit(aTHX_ a)
#define my_failure_exit() Perl_my_failure_exit(aTHX)
#define my_fflush_all() Perl_my_fflush_all(aTHX)
#define my_ffs Perl_my_ffs
#define my_fork Perl_my_fork
#define my_msbit_pos Perl_my_msbit_pos
#define my_popen_list(a,b,c) Perl_my_popen_list(aTHX_ a,b,c)
#define my_setenv(a,b) Perl_my_setenv(aTHX_ a,b)
#define my_socketpair Perl_my_socketpair
Expand Down Expand Up @@ -561,6 +563,7 @@
#define set_context Perl_set_context
#define setdefout(a) Perl_setdefout(aTHX_ a)
#define share_hek(a,b,c) Perl_share_hek(aTHX_ a,b,c)
#define single_1bit_pos Perl_single_1bit_pos
#define sortsv(a,b,c) Perl_sortsv(aTHX_ a,b,c)
#define sortsv_flags(a,b,c,d) Perl_sortsv_flags(aTHX_ a,b,c,d)
#define stack_grow(a,b,c) Perl_stack_grow(aTHX_ a,b,c)
Expand Down
1 change: 1 addition & 0 deletions globvar.sym
Expand Up @@ -12,6 +12,7 @@ PL_c9_utf8_dfa_tab
PL_charclass
PL_check
PL_core_reg_engine
PL_deBruijn_bitpos_tab
PL_EXACTFish_bitmask
PL_EXACT_REQ8_bitmask
PL_extended_utf8_dfa_tab
Expand Down
225 changes: 165 additions & 60 deletions inline.h
Expand Up @@ -664,6 +664,150 @@ Perl_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
return TRUE;
}

PERL_STATIC_INLINE unsigned
Perl_single_1bit_pos(PERL_UINTMAX_T word)
{
/* Given a word known to contain all zero bits except one 1 bit, find and
* return the 1's position: 0..63 */

#ifdef PERL_CORE /* macro not exported */
ASSUME(isPOWER_OF_2(word));
#endif

#ifdef PERL_USE_CLZ

return my_msbit_pos(word);

#elif defined(PERL_USE_FFS)

return my_ffs(word);

#else

/* The position of the only set bit in a word can be quickly calculated
* using deBruijn sequences. See for example
* https://en.wikipedia.org/wiki/De_Bruijn_sequence */
return PL_deBruijn_bitpos_tab[(word * PERL_deBruijnMagic_)
>> PERL_deBruijnShift_];
#endif

}

#if defined(WIN32) && (defined(PERL_USE_FFS) || defined(PERL_USE_CLZ))
# include <intrin.h>
# pragma intrinsic(_BitScanReverse,_BitScanReverse64)
# pragma intrinsic(_BitScanForward,_BitScanForward64)
#endif

PERL_STATIC_INLINE unsigned
Perl_my_msbit_pos(PERL_UINTMAX_T word)
{
/* Find the position (0..63) of the most significant set bit in the input
* word */

ASSUME(word != 0);

#ifdef PERL_USE_CLZ
# ifndef WIN32

/* First set bit is the complement of how many leading unset bits */
return (PERL_UINTMAX_SIZE * CHARBITS) - 1 - PERL_USE_CLZ(word);

# else

{
unsigned long Index;

PERL_USE_CLZ(&Index, word);

return Index;
}

# endif
#else

/* Isolate the msb; http://codeforces.com/blog/entry/10330
*
* Only the most significant set bit matters. Or'ing word with its right
* shift of 1 makes that bit and the next one to its right both 1.
* Repeating that with the right shift of 2 makes for 4 1-bits in a row.
* ... We end with the msb and all to the right being 1. */
word |= (word >> 1);
word |= (word >> 2);
word |= (word >> 4);
word |= (word >> 8);
word |= (word >> 16);

# if PERL_UINTMAX_SIZE > 4

word |= (word >> 32);

# endif

/* Then subtracting the right shift by 1 clears all but the left-most of
* the 1 bits, which is our desired result */
word -= (word >> 1);

/* Now we have a single bit set */
return single_1bit_pos(word);

#endif

}

PERL_STATIC_INLINE unsigned
Perl_my_ffs(PERL_UINTMAX_T word)
{
/* Find the position (0..63) of the least significant set bit in the input
* word */

ASSUME(word != 0);

/* If we have clz, it may be just two inlined single machine instructions,
* coupled with the stuff below: an addition, complement and AND. It's
* hard to beat that. On the other hand ffs(), while likely faster than
* the hand-rolled code we otherwise would execute, may very well incur
* function call overhead. So use it only if no clz */
#if defined(PERL_USE_FFS) && ! defined(PERL_USE_CLZ)
# ifndef WIN32

/* ffs() returns bit position indexed from 1 */
return PERL_USE_FFS(word) - 1;

# else

{
unsigned long Index;

PERL_USE_FFS(&Index, word);

return Index;
}

# endif
#else

/* Isolate the lsb;
* https://stackoverflow.com/questions/757059/position-of-least-significant-bit-that-is-set
*
* The word will look like this, with a rightmost set bit in position 's':
* ('x's are don't cares, and 'y's are their complements)
* s
* x..x100..00
* y..y011..11 Complement
* y..y100..00 Add 1
* 0..0100..00 And with the original
*
* (Yes, complementing and adding 1 is just taking the negative on 2's
* complement machines, but not on 1's complement ones, and some compilers
* complain about negating an unsigned.)
*/
return single_1bit_pos(word & (~word + 1));

#endif

}

#ifndef EBCDIC

PERL_STATIC_INLINE unsigned int
Expand All @@ -673,88 +817,49 @@ Perl_variant_byte_number(PERL_UINTMAX_T word)
/* This returns the position in a word (0..7) of the first variant byte in
* it. This is a helper function. Note that there are no branches */

assert(word);

/* Get just the msb bits of each byte */
word &= PERL_VARIANTS_WORD_MASK;

/* This should only be called if we know there is a variant byte in the
* word */
assert(word);

# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678

/* Bytes are stored like
* Byte8 ... Byte2 Byte1
* 63..56...15...8 7...0
*
* Isolate the lsb;
* https://stackoverflow.com/questions/757059/position-of-least-significant-bit-that-is-set
*
* The word will look like this, with a rightmost set bit in position 's':
* ('x's are don't cares)
* s
* x..x100..0
* x..xx10..0 Right shift (rightmost 0 is shifted off)
* x..xx01..1 Subtract 1, turns all the trailing zeros into 1's and
* the 1 just to their left into a 0; the remainder is
* untouched
* 0..0011..1 The xor with the original, x..xx10..0, clears that
* remainder, sets the bottom to all 1
* 0..0100..0 Add 1 to clear the word except for the bit in 's'
*
* Another method is to do 'word &= -word'; but it generates a compiler
* message on some platforms about taking the negative of an unsigned */
* so getting the lsb of the whole modified word is getting the msb of the
* first byte that has its msb set */
word = my_ffs(word);

word >>= 1;
word = 1 + (word ^ (word - 1));
/* Here, word contains the position 7,15,23,...55,63 of that bit. Convert
* to 0..7 */
return (unsigned int) ((word + 1) >> 3) - 1;

# elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321

/* Bytes are stored like
* Byte1 Byte2 ... Byte8
* 63..56 55..47 ... 7...0
*
* Isolate the msb; http://codeforces.com/blog/entry/10330
*
* Only the most significant set bit matters. Or'ing word with its right
* shift of 1 makes that bit and the next one to its right both 1. Then
* right shifting by 2 makes for 4 1-bits in a row. ... We end with the
* msb and all to the right being 1. */
word |= word >> 1;
word |= word >> 2;
word |= word >> 4;
word |= word >> 8;
word |= word >> 16;
word |= word >> 32; /* This should get optimized out on 32-bit systems. */

/* Then subtracting the right shift by 1 clears all but the left-most of
* the 1 bits, which is our desired result */
word -= (word >> 1);

# else
# error Unexpected byte order
# endif
* so getting the msb of the whole modified word is getting the msb of the
* first byte that has its msb set */
word = my_msbit_pos(word);

/* Here 'word' has a single bit set: the msb of the first byte in which it
* is set. Calculate that position in the word. We can use this
* specialized solution: https://stackoverflow.com/a/32339674/1626653,
* assumes an 8-bit byte. (On a 32-bit machine, the larger numbers should
* just get shifted off at compile time) */
word = (word >> 7) * ((UINTMAX_C( 7) << 56) | (UINTMAX_C(15) << 48)
| (UINTMAX_C(23) << 40) | (UINTMAX_C(31) << 32)
| (39 << 24) | (47 << 16)
| (55 << 8) | (63 << 0));
word >>= PERL_WORDSIZE * 7; /* >> by either 56 or 24 */

/* Here, word contains the position 7,15,23,...,63 of that bit. Convert to
* 0..7 */
/* Here, word contains the position 63,55,...,23,15,7 of that bit. Convert
* to 0..7 */
word = ((word + 1) >> 3) - 1;

# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321

/* And invert the result */
/* And invert the result because of the reversed byte order on this
* platform */
word = CHARBITS - word - 1;

return (unsigned int) word;

# else
# error Unexpected byte order
# endif

return (unsigned int) word;
}

#endif
Expand Down
62 changes: 62 additions & 0 deletions perl.h
Expand Up @@ -1202,6 +1202,7 @@ Use L</UV> to declare variables of the maximum usable size on this platform.
typedef I64TYPE PERL_INTMAX_T;
typedef U64TYPE PERL_UINTMAX_T;
# endif
# define PERL_UINTMAX_SIZE U64SIZE
# ifndef INTMAX_C
# define INTMAX_C(c) INT64_C(c)
# endif
Expand All @@ -1216,6 +1217,7 @@ Use L</UV> to declare variables of the maximum usable size on this platform.
typedef I32TYPE PERL_INTMAX_T;
typedef U32TYPE PERL_UINTMAX_T;
# endif
# define PERL_UINTMAX_SIZE U32SIZE;
# ifndef INTMAX_C
# define INTMAX_C(c) INT32_C(c)
# endif
Expand Down Expand Up @@ -3863,6 +3865,36 @@ hint to the compiler that this condition is likely to be false.
# define __has_builtin(x) 0 /* not a clang style compiler */
#endif

#if defined(WIN32) || defined(WIN64)
# if defined(_MSC_VER) && _MSC_VER >= 1400
# ifdef WIN64
# define PERL_USE_CLZ(i, x) _BitScanReverse64(i, x)
# define PERL_USE_FFS(i, x) _BitScanForward64(i, x)
# else
# define PERL_USE_CLZ(i, x) _BitScanReverse(i, x)
# define PERL_USE_FFS(i, x) _BitScanForward(i, x)
# endif
# endif
#elif PERL_UINTMAX_SIZE == INTSIZE
# if __has_builtin(__builtin_clz) \
|| (defined(__GNUC__) && ( __GNUC__ > 3 \
|| __GNUC__ == 3 && __GNUC_MINOR__ >= 4))
# define PERL_USE_CLZ(x) __builtin_clz(x)
# endif
# ifdef HAS_FFS
# define PERL_USE_FFS(x) ffs(x)
# endif
#elif PERL_UINTMAX_SIZE == LONGSIZE
# if __has_builtin(__builtin_clzl) \
|| (defined(__GNUC__) && ( __GNUC__ > 3 \
|| __GNUC__ == 3 && __GNUC_MINOR__ >= 4))
# define PERL_USE_CLZ(x) __builtin_clzl(x)
# endif
# ifdef HAS_FFSL
# define PERL_USE_FFS(x) ffsl(x)
# endif
#endif

/*
=for apidoc Am||ASSUME|bool expr
C<ASSUME> is like C<assert()>, but it has a benefit in a release build. It is a
Expand Down Expand Up @@ -5836,6 +5868,26 @@ PL_valid_types_IV_set[] = { 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1 };
EXTCONST bool
PL_valid_types_NV_set[] = { 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };

EXTCONST U8
PL_deBruijn_bitpos_tab[] = {

# if PERL_UINTMAX_SIZE == 4
/* https://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn */
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9

# else

/* https://stackoverflow.com/questions/11376288/fast-computing-of-log2-for-64-bit-integers */
63, 0, 58, 1, 59, 47, 53, 2, 60, 39, 48, 27, 54, 33, 42, 3,
61, 51, 37, 40, 49, 18, 28, 20, 55, 30, 34, 11, 43, 14, 22, 4,
62, 57, 46, 52, 38, 26, 32, 41, 50, 36, 17, 19, 29, 10, 13, 21,
56, 45, 25, 31, 35, 16, 9, 12, 44, 24, 15, 8, 23, 7, 6, 5

# endif

};

#else

EXTCONST bool PL_valid_types_IVX[];
Expand All @@ -5844,9 +5896,19 @@ EXTCONST bool PL_valid_types_PVX[];
EXTCONST bool PL_valid_types_RV[];
EXTCONST bool PL_valid_types_IV_set[];
EXTCONST bool PL_valid_types_NV_set[];
EXTCONST U8 PL_deBruijn_bitpos_tab[];

#endif

/* The constants for using PL_deBruijn_bitpos_tab */
#if PERL_UINTMAX_SIZE == 4
# define PERL_deBruijnMagic_ 0x077CB531
# define PERL_deBruijnShift_ 27
#else
# define PERL_deBruijnMagic_ 0x07EDD5E59A4E28C2
# define PERL_deBruijnShift_ 58
#endif

/* In C99 we could use designated (named field) union initializers.
* In C89 we need to initialize the member declared first.
* In C++ we need extern C initializers.
Expand Down

0 comments on commit ece7859

Please sign in to comment.