diff --git a/embed.fnc b/embed.fnc index 55096fd9ea96..a883f0cfacb2 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1141,6 +1141,14 @@ ATdmoR |bool |is_utf8_invariant_string|NN const U8* const s \ ATidRp |bool |is_utf8_invariant_string_loc|NN const U8* const s \ |STRLEN len \ |NULLOK const U8 ** ep +CTiRp |unsigned|single_1bit_pos32|U32 word +CTiRp |unsigned|lsbit_pos32|U32 word +CTiRp |unsigned|msbit_pos32|U32 word +#ifdef U64TYPE /* HAS_QUAD undefined outside of core */ +CTiRp |unsigned|single_1bit_pos64|U64 word +CTiRp |unsigned|lsbit_pos64|U64 word +CTiRp |unsigned|msbit_pos64|U64 word +#endif #ifndef EBCDIC CTiRp |unsigned int|variant_byte_number|PERL_UINTMAX_T word #endif diff --git a/embed.h b/embed.h index 62c9e68455ad..e32b06076cd5 100644 --- a/embed.h +++ b/embed.h @@ -302,6 +302,7 @@ #define load_module Perl_load_module #endif #define looks_like_number(a) Perl_looks_like_number(aTHX_ a) +#define lsbit_pos32 Perl_lsbit_pos32 #define magic_dump(a) Perl_magic_dump(aTHX_ a) #define markstack_grow() Perl_markstack_grow(aTHX) #ifndef MULTIPLICITY @@ -325,6 +326,7 @@ #define mortal_getenv Perl_mortal_getenv #define mro_get_linear_isa(a) Perl_mro_get_linear_isa(aTHX_ a) #define mro_method_changed_in(a) Perl_mro_method_changed_in(aTHX_ a) +#define msbit_pos32 Perl_msbit_pos32 #define my_atof(a) Perl_my_atof(aTHX_ a) #define my_atof3(a,b,c) Perl_my_atof3(aTHX_ a,b,c) #define my_dirfd Perl_my_dirfd @@ -561,6 +563,7 @@ #define set_context Perl_set_context #define setdefout(a) Perl_setdefout(aTHX_ a) #define share_hek(a,b,c) Perl_share_hek(aTHX_ a,b,c) +#define single_1bit_pos32 Perl_single_1bit_pos32 #define sortsv(a,b,c) Perl_sortsv(aTHX_ a,b,c) #define sortsv_flags(a,b,c,d) Perl_sortsv_flags(aTHX_ a,b,c,d) #define stack_grow(a,b,c) Perl_stack_grow(aTHX_ a,b,c) @@ -869,6 +872,11 @@ #if defined(PERL_USE_3ARG_SIGHANDLER) #define csighandler Perl_csighandler #endif +#if defined(U64TYPE) /* HAS_QUAD undefined outside of core */ +#define lsbit_pos64 Perl_lsbit_pos64 +#define msbit_pos64 Perl_msbit_pos64 +#define single_1bit_pos64 Perl_single_1bit_pos64 +#endif #if defined(UNLINK_ALL_VERSIONS) #define unlnk(a) Perl_unlnk(aTHX_ a) #endif diff --git a/globvar.sym b/globvar.sym index 03c324868ba4..22e353f34d33 100644 --- a/globvar.sym +++ b/globvar.sym @@ -12,6 +12,8 @@ PL_c9_utf8_dfa_tab PL_charclass PL_check PL_core_reg_engine +PL_deBruijn_bitpos_tab32 +PL_deBruijn_bitpos_tab64 PL_EXACTFish_bitmask PL_EXACT_REQ8_bitmask PL_extended_utf8_dfa_tab diff --git a/inline.h b/inline.h index 55b26e0d7810..5ed7692984e6 100644 --- a/inline.h +++ b/inline.h @@ -664,97 +664,412 @@ Perl_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep) return TRUE; } -#ifndef EBCDIC +/* See if the platform has builtins for finding the most/least significant bit, + * and which one is right for using on 32 and 64 bit operands */ +#if (__has_builtin(__builtin_clz) || PERL_GCC_VERSION_GE(3,4,0)) +# if U32SIZE == INTSIZE +# define PERL_CLZ_32 __builtin_clz +# endif +# if defined(U64TYPE) && U64SIZE == INTSIZE +# define PERL_CLZ_64 __builtin_clz +# endif +#endif +#if (__has_builtin(__builtin_ctz) || PERL_GCC_VERSION_GE(3,4,0)) +# if U32SIZE == INTSIZE +# define PERL_CTZ_32 __builtin_ctz +# endif +# if defined(U64TYPE) && U64SIZE == INTSIZE +# define PERL_CTZ_64 __builtin_ctz +# endif +#endif -PERL_STATIC_INLINE unsigned int -Perl_variant_byte_number(PERL_UINTMAX_T word) +#if (__has_builtin(__builtin_clzl) || PERL_GCC_VERSION_GE(3,4,0)) +# if U32SIZE == LONGSIZE && ! defined(PERL_CLZ_32) +# define PERL_CLZ_32 __builtin_clzl +# endif +# if defined(U64TYPE) && U64SIZE == LONGSIZE && ! defined(PERL_CLZ_64) +# define PERL_CLZ_64 __builtin_clzl +# endif +#endif +#if (__has_builtin(__builtin_ctzl) || PERL_GCC_VERSION_GE(3,4,0)) +# if U32SIZE == LONGSIZE && ! defined(PERL_CTZ_32) +# define PERL_CTZ_32 __builtin_ctzl +# endif +# if defined(U64TYPE) && U64SIZE == LONGSIZE && ! defined(PERL_CTZ_64) +# define PERL_CTZ_64 __builtin_ctzl +# endif +#endif + +#if (__has_builtin(__builtin_clzll) || PERL_GCC_VERSION_GE(3,4,0)) +# if U32SIZE == LONGLONGSIZE && ! defined(PERL_CLZ_32) +# define PERL_CLZ_32 __builtin_clzll +# endif +# if defined(U64TYPE) && U64SIZE == LONGLONGSIZE && ! defined(PERL_CLZ_64) +# define PERL_CLZ_64 __builtin_clzll +# endif +#endif +#if (__has_builtin(__builtin_ctzll) || PERL_GCC_VERSION_GE(3,4,0)) +# if U32SIZE == LONGLONGSIZE && ! defined(PERL_CTZ_32) +# define PERL_CTZ_32 __builtin_ctzll +# endif +# if defined(U64TYPE) && U64SIZE == LONGLONGSIZE && ! defined(PERL_CTZ_64) +# define PERL_CTZ_64 __builtin_ctzll +# endif +#endif + +#if defined(_MSC_VER) && _MSC_VER >= 1400 +# include +# pragma intrinsic(_BitScanForward) +# pragma intrinsic(_BitScanReverse) +# ifdef _WIN64 +# pragma intrinsic(_BitScanForward64) +# pragma intrinsic(_BitScanReverse64) +# endif +#endif + +/* The reason there are not checks to see if ffs() and ffsl() are available for + * determining the lsb, is because these don't improve on the deBruijn method + * fallback, which is just a branchless integer multiply, array element + * retrieval, and shift. The others, even if the function call overhead is + * optimized out, have to cope with the possibility of the input being all + * zeroes, and almost certainly will have conditionals for this eventuality. + * khw, at the time of this commit, looked at the source for both gcc and clang + * to verify this. (gcc used a method inferior to deBruijn.) */ + +/* Below are functions to find the first, last, or only set bit in a word. On + * platforms with 64-bit capability, there is a pair for each operation; the + * first taking a 64 bit operand, and the second a 32 bit one. The logic is + * the same in each pair, so the second is stripped of most comments. */ + +#ifdef U64TYPE /* HAS_QUAD not usable outside the core */ + +PERL_STATIC_INLINE unsigned +Perl_lsbit_pos64(U64 word) { + /* Find the position (0..63) of the least significant set bit in the input + * word */ - /* This returns the position in a word (0..7) of the first variant byte in - * it. This is a helper function. Note that there are no branches */ + ASSUME(word != 0); - assert(word); + /* If we can determine that the platform has a usable fast method to get + * this info, use that */ - /* Get just the msb bits of each byte */ - word &= PERL_VARIANTS_WORD_MASK; +# if defined(PERL_CTZ_64) +# define PERL_HAS_FAST_GET_LSB_POS64 -# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 + return (unsigned) PERL_CTZ_64(word); - /* Bytes are stored like - * Byte8 ... Byte2 Byte1 - * 63..56...15...8 7...0 +# elif U64SIZE == 8 && defined(_MSC_VER) && _MSC_VER >= 1400 +# define PERL_HAS_FAST_GET_LSB_POS64 + + { + unsigned long index; + _BitScanForward64(&index, word); + return (unsigned)index; + } + +# else + + /* Here, we didn't find a fast method for finding the lsb. Fall back to + * making the lsb the only set bit in the word, and use our function that + * works on words with a single bit set. * - * Isolate the lsb; + * Isolate the lsb; * https://stackoverflow.com/questions/757059/position-of-least-significant-bit-that-is-set * * The word will look like this, with a rightmost set bit in position 's': - * ('x's are don't cares) + * ('x's are don't cares, and 'y's are their complements) * s - * x..x100..0 - * x..xx10..0 Right shift (rightmost 0 is shifted off) - * x..xx01..1 Subtract 1, turns all the trailing zeros into 1's and - * the 1 just to their left into a 0; the remainder is - * untouched - * 0..0011..1 The xor with the original, x..xx10..0, clears that - * remainder, sets the bottom to all 1 - * 0..0100..0 Add 1 to clear the word except for the bit in 's' + * x..x100..00 + * y..y011..11 Complement + * y..y100..00 Add 1 + * 0..0100..00 And with the original * - * Another method is to do 'word &= -word'; but it generates a compiler - * message on some platforms about taking the negative of an unsigned */ + * (Yes, complementing and adding 1 is just taking the negative on 2's + * complement machines, but not on 1's complement ones, and some compilers + * complain about negating an unsigned.) + */ + return single_1bit_pos64(word & (~word + 1)); - word >>= 1; - word = 1 + (word ^ (word - 1)); +# endif -# elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 +} - /* Bytes are stored like - * Byte1 Byte2 ... Byte8 - * 63..56 55..47 ... 7...0 +# define lsbit_pos_uintmax_(word) lsbit_pos64(word) +#else /* ! QUAD */ +# define lsbit_pos_uintmax_(word) lsbit_pos32(word) +#endif + +PERL_STATIC_INLINE unsigned /* Like above for 32 bit word */ +Perl_lsbit_pos32(U32 word) +{ + /* Find the position (0..31) of the least significant set bit in the input + * word */ + + ASSUME(word != 0); + +#if defined(PERL_CTZ_32) +# define PERL_HAS_FAST_GET_LSB_POS32 + + return (unsigned) PERL_CTZ_32(word); + +#elif U32SIZE == 4 && defined(_MSC_VER) && _MSC_VER >= 1400 +# define PERL_HAS_FAST_GET_LSB_POS32 + + { + unsigned long index; + _BitScanForward(&index, word); + return (unsigned)index; + } + +#else + + return single_1bit_pos32(word & (~word + 1)); + +#endif + +} + + +/* Convert the leading zeros count to the bit position of the first set bit. + * This just subtracts from the highest position, 31 or 63. But some compilers + * don't optimize this optimally, and so a bit of bit twiddling encourages them + * to do the right thing. It turns out that subracting a smaller non-negative + * number 'x' from 2**n-1 for any n is the same as taking the exclusive-or of the + * two numbers. This is because '11111...1 ^ x' is just the complement of x, + * call it 'y'. Adding a number and its complement yields all ones. So y is + * actually the same as subtracting x from the ones, our desired value */ +#define LZC_TO_MSBIT_POS_(size, lzc) ((size##SIZE * CHARBITS - 1) ^ (lzc)) + +#ifdef U64TYPE /* HAS_QUAD not usable outside the core */ + +PERL_STATIC_INLINE unsigned +Perl_msbit_pos64(U64 word) +{ + /* Find the position (0..63) of the most significant set bit in the input + * word */ + + ASSUME(word != 0); + + /* If we can determine that the platform has a usable fast method to get + * this, use that */ + +# if defined(PERL_CLZ_64) +# define PERL_HAS_FAST_GET_MSB_POS64 + + return (unsigned) LZC_TO_MSBIT_POS_(U64, PERL_CLZ_64(word)); + +# elif U64SIZE == 8 && defined(_WIN64) && defined(_MSC_VER) && _MSC_VER >= 1400 +# define PERL_HAS_FAST_GET_MSB_POS64 + + { + unsigned long index; + _BitScanReverse64(&index, word); + return (unsigned)index; + } + +# else + + /* Here, we didn't find a fast method for finding the msb. Fall back to + * making the msb the only set bit in the word, and use our function that + * works on words with a single bit set. * * Isolate the msb; http://codeforces.com/blog/entry/10330 * * Only the most significant set bit matters. Or'ing word with its right - * shift of 1 makes that bit and the next one to its right both 1. Then - * right shifting by 2 makes for 4 1-bits in a row. ... We end with the - * msb and all to the right being 1. */ - word |= word >> 1; - word |= word >> 2; - word |= word >> 4; - word |= word >> 8; - word |= word >> 16; - word |= word >> 32; /* This should get optimized out on 32-bit systems. */ + * shift of 1 makes that bit and the next one to its right both 1. + * Repeating that with the right shift of 2 makes for 4 1-bits in a row. + * ... We end with the msb and all to the right being 1. */ + word |= (word >> 1); + word |= (word >> 2); + word |= (word >> 4); + word |= (word >> 8); + word |= (word >> 16); + word |= (word >> 32); /* Then subtracting the right shift by 1 clears all but the left-most of * the 1 bits, which is our desired result */ word -= (word >> 1); + /* Now we have a single bit set */ + return single_1bit_pos64(word); + +# endif + +} + +# define msbit_pos_uintmax_(word) msbit_pos64(word) +#else /* ! QUAD */ +# define msbit_pos_uintmax_(word) msbit_pos32(word) +#endif + +PERL_STATIC_INLINE unsigned +Perl_msbit_pos32(U32 word) +{ + /* Find the position (0..31) of the most significant set bit in the input + * word */ + + ASSUME(word != 0); + +#if defined(PERL_CLZ_32) +# define PERL_HAS_FAST_GET_MSB_POS32 + + return (unsigned) LZC_TO_MSBIT_POS_(U32, PERL_CLZ_32(word)); + +#elif U32SIZE == 4 && defined(_MSC_VER) && _MSC_VER >= 1400 +# define PERL_HAS_FAST_GET_MSB_POS32 + + { + unsigned long index; + _BitScanReverse(&index, word); + return (unsigned)index; + } + +#else + + word |= (word >> 1); + word |= (word >> 2); + word |= (word >> 4); + word |= (word >> 8); + word |= (word >> 16); + word -= (word >> 1); + return single_1bit_pos32(word); + +#endif + +} + +#ifdef U64TYPE /* HAS_QUAD not usable outside the core */ + +PERL_STATIC_INLINE unsigned +Perl_single_1bit_pos64(U64 word) +{ + /* Given a 64-bit word known to contain all zero bits except one 1 bit, + * find and return the 1's position: 0..63 */ + +# ifdef PERL_CORE /* macro not exported */ + ASSUME(isPOWER_OF_2(word)); # else -# error Unexpected byte order + ASSUME(word && (word & (word-1)) == 0); # endif - /* Here 'word' has a single bit set: the msb of the first byte in which it - * is set. Calculate that position in the word. We can use this - * specialized solution: https://stackoverflow.com/a/32339674/1626653, - * assumes an 8-bit byte. (On a 32-bit machine, the larger numbers should - * just get shifted off at compile time) */ - word = (word >> 7) * ((UINTMAX_C( 7) << 56) | (UINTMAX_C(15) << 48) - | (UINTMAX_C(23) << 40) | (UINTMAX_C(31) << 32) - | (39 << 24) | (47 << 16) - | (55 << 8) | (63 << 0)); - word >>= PERL_WORDSIZE * 7; /* >> by either 56 or 24 */ - - /* Here, word contains the position 7,15,23,...,63 of that bit. Convert to - * 0..7 */ - word = ((word + 1) >> 3) - 1; + /* The only set bit is both the most and least significant bit. If we have + * a fast way of finding either one, use that. + * + * It may appear at first glance that those functions call this one, but + * they don't if the corresponding #define is set */ -# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 +# ifdef PERL_HAS_FAST_GET_MSB_POS64 - /* And invert the result */ - word = CHARBITS - word - 1; + return msbit_pos64(word); + +# elif defined(PERL_HAS_FAST_GET_LSB_POS64) + + return lsbit_pos64(word); +# else + + /* The position of the only set bit in a word can be quickly calculated + * using deBruijn sequences. See for example + * https://en.wikipedia.org/wiki/De_Bruijn_sequence */ + return PL_deBruijn_bitpos_tab64[(word * PERL_deBruijnMagic64_) + >> PERL_deBruijnShift64_]; # endif +} + +#endif + +PERL_STATIC_INLINE unsigned +Perl_single_1bit_pos32(U32 word) +{ + /* Given a 32-bit word known to contain all zero bits except one 1 bit, + * find and return the 1's position: 0..31 */ + +#ifdef PERL_CORE /* macro not exported */ + ASSUME(isPOWER_OF_2(word)); +#else + ASSUME(word && (word & (word-1)) == 0); +#endif +#ifdef PERL_HAS_FAST_GET_MSB_POS32 + + return msbit_pos32(word); + +#elif defined(PERL_HAS_FAST_GET_LSB_POS32) + + return lsbit_pos32(word); + +/* Unlikely, but possible for the platform to have a wider fast operation but + * not a narrower one. But easy enough to handle the case by widening the + * parameter size. (Going the other way, emulating 64 bit by two 32 bit ops + * would be slower than the deBruijn method.) */ +#elif defined(PERL_HAS_FAST_GET_MSB_POS64) + + return msbit_pos64(word); + +#elif defined(PERL_HAS_FAST_GET_LSB_POS64) + + return lsbit_pos64(word); + +#else + + return PL_deBruijn_bitpos_tab32[(word * PERL_deBruijnMagic32_) + >> PERL_deBruijnShift32_]; +#endif + +} + +#ifndef EBCDIC + +PERL_STATIC_INLINE unsigned int +Perl_variant_byte_number(PERL_UINTMAX_T word) +{ + /* This returns the position in a word (0..7) of the first variant byte in + * it. This is a helper function. Note that there are no branches */ + + /* Get just the msb bits of each byte */ + word &= PERL_VARIANTS_WORD_MASK; + + /* This should only be called if we know there is a variant byte in the + * word */ + assert(word); + +# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 + + /* Bytes are stored like + * Byte8 ... Byte2 Byte1 + * 63..56...15...8 7...0 + * so getting the lsb of the whole modified word is getting the msb of the + * first byte that has its msb set */ + word = lsbit_pos_uintmax_(word); + + /* Here, word contains the position 7,15,23,...55,63 of that bit. Convert + * to 0..7 */ + return (unsigned int) ((word + 1) >> 3) - 1; + +# elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 + + /* Bytes are stored like + * Byte1 Byte2 ... Byte8 + * 63..56 55..47 ... 7...0 + * so getting the msb of the whole modified word is getting the msb of the + * first byte that has its msb set */ + word = msbit_pos_uintmax_(word); + + /* Here, word contains the position 63,55,...,23,15,7 of that bit. Convert + * to 0..7 */ + word = ((word + 1) >> 3) - 1; + + /* And invert the result because of the reversed byte order on this + * platform */ + word = CHARBITS - word - 1; + return (unsigned int) word; + +# else +# error Unexpected byte order +# endif + } #endif diff --git a/perl.h b/perl.h index 917ed5fcbb31..aa2f6c7fcba0 100644 --- a/perl.h +++ b/perl.h @@ -285,6 +285,19 @@ Now a no-op. # define PERL_IS_GCC 1 #endif +#define PERL_GCC_VERSION_GE(major,minor,patch) \ + (((100000 * __GNUC__) + (1000 * __GNUC_MINOR__) + __GNUC_PATCHLEVEL__) \ + >= ((100000 * (major)) + (1000 * (minor)) + (patch))) +#define PERL_GCC_VERSION_GT(major,minor,patch) \ + (((100000 * __GNUC__) + (1000 * __GNUC_MINOR__) + __GNUC_PATCHLEVEL__) \ + > ((100000 * (major)) + (1000 * (minor)) + (patch))) +#define PERL_GCC_VERSION_LE(major,minor,patch) \ + (((100000 * __GNUC__) + (1000 * __GNUC_MINOR__) + __GNUC_PATCHLEVEL__) \ + <= ((100000 * (major)) + (1000 * (minor)) + (patch))) +#define PERL_GCC_VERSION_LT(major,minor,patch) \ + (((100000 * __GNUC__) + (1000 * __GNUC_MINOR__) + __GNUC_PATCHLEVEL__) \ + < ((100000 * (major)) + (1000 * (minor)) + (patch))) + /* In case Configure was not used (we are using a "canned config" * such as Win32, or a cross-compilation setup, for example) try going * by the gcc major and minor versions. One useful URL is @@ -301,38 +314,38 @@ Now a no-op. #ifndef PERL_MICRO # if defined __GNUC__ && !defined(__INTEL_COMPILER) -# if __GNUC__ == 3 && __GNUC_MINOR__ >= 1 || __GNUC__ > 3 /* 3.1 -> */ +# if PERL_GCC_VERSION_GE(3,1,0) # define HASATTRIBUTE_DEPRECATED # endif -# if __GNUC__ >= 3 /* 3.0 -> */ /* XXX Verify this version */ +# if PERL_GCC_VERSION_GE(3,0,0) /* XXX Verify this version */ # define HASATTRIBUTE_FORMAT # if defined __MINGW32__ # define PRINTF_FORMAT_NULL_OK # endif # endif -# if __GNUC__ >= 3 /* 3.0 -> */ +# if PERL_GCC_VERSION_GE(3,0,0) # define HASATTRIBUTE_MALLOC # endif -# if __GNUC__ == 3 && __GNUC_MINOR__ >= 3 || __GNUC__ > 3 /* 3.3 -> */ +# if PERL_GCC_VERSION_GE(3,3,0) # define HASATTRIBUTE_NONNULL # endif -# if __GNUC__ == 2 && __GNUC_MINOR__ >= 5 || __GNUC__ > 2 /* 2.5 -> */ +# if PERL_GCC_VERSION_GE(2,5,0) # define HASATTRIBUTE_NORETURN # endif -# if __GNUC__ >= 3 /* gcc 3.0 -> */ +# if PERL_GCC_VERSION_GE(3,0,0) # define HASATTRIBUTE_PURE # endif -# if __GNUC__ == 3 && __GNUC_MINOR__ >= 4 || __GNUC__ > 3 /* 3.4 -> */ +# if PERL_GCC_VERSION_GE(3,4,0) # define HASATTRIBUTE_UNUSED # endif # if __GNUC__ == 3 && __GNUC_MINOR__ == 3 && !defined(__cplusplus) # define HASATTRIBUTE_UNUSED /* gcc-3.3, but not g++-3.3. */ # endif -# if __GNUC__ == 3 && __GNUC_MINOR__ >= 4 || __GNUC__ > 3 /* 3.4 -> */ +# if PERL_GCC_VERSION_GE(3,4,0) # define HASATTRIBUTE_WARN_UNUSED_RESULT # endif /* always_inline is buggy in gcc <= 4.6 and causes compilation errors */ -# if __GNUC__ == 4 && __GNUC_MINOR__ >= 7 || __GNUC__ > 4 /* 4.7 -> */ +# if PERL_GCC_VERSION_GE(4,7,0) # define HASATTRIBUTE_ALWAYS_INLINE # endif # endif @@ -364,7 +377,7 @@ Now a no-op. #endif #ifdef HASATTRIBUTE_ALWAYS_INLINE /* always_inline is buggy in gcc <= 4.6 and causes compilation errors */ -# if !defined(PERL_IS_GCC) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7 || __GNUC__ > 4) +# if !defined(PERL_IS_GCC) || PERL_GCC_VERSION_GE(4,7,0) # define __attribute__always_inline__ __attribute__((always_inline)) # endif #endif @@ -482,7 +495,7 @@ compilation causes it be used just some times. */ #if defined(PERL_GCC_PEDANTIC) || \ (defined(__GNUC__) && defined(__cplusplus) && \ - ((__GNUC__ < 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ < 2)))) + (PERL_GCC_VERSION_LT(4,2,0))) # ifndef PERL_GCC_BRACE_GROUPS_FORBIDDEN # define PERL_GCC_BRACE_GROUPS_FORBIDDEN # endif @@ -558,8 +571,7 @@ __typeof__ and nothing else. * */ -#if defined(__clang__) || defined(__clang) || \ - (defined( __GNUC__) && ((__GNUC__ * 100) + __GNUC_MINOR__) >= 406) +#if defined(__clang__) || defined(__clang) || PERL_GCC_VERSION_GE(4,6,0) # define GCC_DIAG_PRAGMA(x) _Pragma (#x) /* clang has "clang diagnostic" pragmas, but also understands gcc. */ # define GCC_DIAG_IGNORE(x) _Pragma("GCC diagnostic push") \ @@ -3873,8 +3885,7 @@ intrinsic function, see its documents for more details. #if __has_builtin(__builtin_unreachable) # define HAS_BUILTIN_UNREACHABLE -#elif (defined(__GNUC__) && ( __GNUC__ > 4 \ - || __GNUC__ == 4 && __GNUC_MINOR__ >= 5)) +#elif PERL_GCC_VERSION_GE(4,5,0) # define HAS_BUILTIN_UNREACHABLE #endif @@ -5831,6 +5842,22 @@ PL_valid_types_IV_set[] = { 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1 }; EXTCONST bool PL_valid_types_NV_set[] = { 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 }; +EXTCONST U8 +PL_deBruijn_bitpos_tab32[] = { + /* https://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn */ + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 +}; + +EXTCONST U8 +PL_deBruijn_bitpos_tab64[] = { + /* https://stackoverflow.com/questions/11376288/fast-computing-of-log2-for-64-bit-integers */ + 63, 0, 58, 1, 59, 47, 53, 2, 60, 39, 48, 27, 54, 33, 42, 3, + 61, 51, 37, 40, 49, 18, 28, 20, 55, 30, 34, 11, 43, 14, 22, 4, + 62, 57, 46, 52, 38, 26, 32, 41, 50, 36, 17, 19, 29, 10, 13, 21, + 56, 45, 25, 31, 35, 16, 9, 12, 44, 24, 15, 8, 23, 7, 6, 5 +}; + #else EXTCONST bool PL_valid_types_IVX[]; @@ -5839,9 +5866,17 @@ EXTCONST bool PL_valid_types_PVX[]; EXTCONST bool PL_valid_types_RV[]; EXTCONST bool PL_valid_types_IV_set[]; EXTCONST bool PL_valid_types_NV_set[]; +EXTCONST U8 PL_deBruijn_bitpos_tab32[]; +EXTCONST U8 PL_deBruijn_bitpos_tab64[]; #endif +/* The constants for using PL_deBruijn_bitpos_tab */ +#define PERL_deBruijnMagic32_ 0x077CB531 +#define PERL_deBruijnShift32_ 27 +#define PERL_deBruijnMagic64_ 0x07EDD5E59A4E28C2 +#define PERL_deBruijnShift64_ 58 + /* In C99 we could use designated (named field) union initializers. * In C89 we need to initialize the member declared first. * In C++ we need extern C initializers. diff --git a/proto.h b/proto.h index 3900f0671736..92c371a6493f 100644 --- a/proto.h +++ b/proto.h @@ -1861,6 +1861,12 @@ PERL_CALLCONV I32 Perl_looks_like_number(pTHX_ SV *const sv) #define PERL_ARGS_ASSERT_LOOKS_LIKE_NUMBER \ assert(sv) +#ifndef PERL_NO_INLINE_FUNCTIONS +PERL_STATIC_INLINE unsigned Perl_lsbit_pos32(U32 word) + __attribute__warn_unused_result__; +#define PERL_ARGS_ASSERT_LSBIT_POS32 +#endif + PERL_CALLCONV int Perl_magic_clear_all_env(pTHX_ SV* sv, MAGIC* mg); #define PERL_ARGS_ASSERT_MAGIC_CLEAR_ALL_ENV \ assert(sv); assert(mg) @@ -2146,6 +2152,12 @@ PERL_CALLCONV void Perl_mro_set_mro(pTHX_ struct mro_meta *const meta, SV *const PERL_CALLCONV SV* Perl_mro_set_private_data(pTHX_ struct mro_meta *const smeta, const struct mro_alg *const which, SV *const data); #define PERL_ARGS_ASSERT_MRO_SET_PRIVATE_DATA \ assert(smeta); assert(which); assert(data) +#ifndef PERL_NO_INLINE_FUNCTIONS +PERL_STATIC_INLINE unsigned Perl_msbit_pos32(U32 word) + __attribute__warn_unused_result__; +#define PERL_ARGS_ASSERT_MSBIT_POS32 +#endif + PERL_CALLCONV SV* Perl_multiconcat_stringify(pTHX_ const OP* o); #define PERL_ARGS_ASSERT_MULTICONCAT_STRINGIFY \ assert(o) @@ -3246,6 +3258,12 @@ PERL_CALLCONV Signal_t Perl_sighandler1(int sig); #define PERL_ARGS_ASSERT_SIGHANDLER1 PERL_CALLCONV Signal_t Perl_sighandler3(int sig, Siginfo_t *info, void *uap); #define PERL_ARGS_ASSERT_SIGHANDLER3 +#ifndef PERL_NO_INLINE_FUNCTIONS +PERL_STATIC_INLINE unsigned Perl_single_1bit_pos32(U32 word) + __attribute__warn_unused_result__; +#define PERL_ARGS_ASSERT_SINGLE_1BIT_POS32 +#endif + PERL_CALLCONV char* Perl_skipspace_flags(pTHX_ char *s, U32 flags) __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_SKIPSPACE_FLAGS \ @@ -6703,6 +6721,26 @@ PERL_CALLCONV Signal_t Perl_csighandler(int sig, Siginfo_t *info, void *uap); #define PERL_ARGS_ASSERT_CSIGHANDLER PERL_CALLCONV Signal_t Perl_sighandler(int sig, Siginfo_t *info, void *uap); #define PERL_ARGS_ASSERT_SIGHANDLER +#endif +#if defined(U64TYPE) /* HAS_QUAD undefined outside of core */ +#ifndef PERL_NO_INLINE_FUNCTIONS +PERL_STATIC_INLINE unsigned Perl_lsbit_pos64(U64 word) + __attribute__warn_unused_result__; +#define PERL_ARGS_ASSERT_LSBIT_POS64 +#endif + +#ifndef PERL_NO_INLINE_FUNCTIONS +PERL_STATIC_INLINE unsigned Perl_msbit_pos64(U64 word) + __attribute__warn_unused_result__; +#define PERL_ARGS_ASSERT_MSBIT_POS64 +#endif + +#ifndef PERL_NO_INLINE_FUNCTIONS +PERL_STATIC_INLINE unsigned Perl_single_1bit_pos64(U64 word) + __attribute__warn_unused_result__; +#define PERL_ARGS_ASSERT_SINGLE_1BIT_POS64 +#endif + #endif #if defined(UNLINK_ALL_VERSIONS) PERL_CALLCONV I32 Perl_unlnk(pTHX_ const char* f); diff --git a/regcomp.c b/regcomp.c index c5e54cc69a29..5de0aab1b698 100644 --- a/regcomp.c +++ b/regcomp.c @@ -19398,17 +19398,8 @@ S_optimize_regclass(pTHX_ bool already_inverted; bool are_equivalent; - /* Compute which bit is set, which is the same thing as, e.g., - * ANYOF_CNTRL. From - * https://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn - * */ - static const int MultiplyDeBruijnBitPosition2[32] = { - 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, - 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 - }; - namedclass = MultiplyDeBruijnBitPosition2[(posixl - * 0x077CB531U) >> 27]; + namedclass = single_1bit_pos32(posixl); classnum = namedclass_to_classnum(namedclass); /* The named classes are such that the inverted number is one diff --git a/regcomp.h b/regcomp.h index b92a08820f97..ec3c756148bc 100644 --- a/regcomp.h +++ b/regcomp.h @@ -680,30 +680,32 @@ struct regnode_ssc { #define ANYOF_BIT(c) (1U << ((c) & 7)) +#define ANYOF_POSIXL_BITMAP(p) (((regnode_charclass_posixl*) (p))->classflags) + #define POSIXL_SET(field, c) ((field) |= (1U << (c))) -#define ANYOF_POSIXL_SET(p, c) POSIXL_SET(((regnode_charclass_posixl*) (p))->classflags, (c)) +#define ANYOF_POSIXL_SET(p, c) POSIXL_SET(ANYOF_POSIXL_BITMAP(p), (c)) #define POSIXL_CLEAR(field, c) ((field) &= ~ (1U <<(c))) -#define ANYOF_POSIXL_CLEAR(p, c) POSIXL_CLEAR(((regnode_charclass_posixl*) (p))->classflags, (c)) +#define ANYOF_POSIXL_CLEAR(p, c) POSIXL_CLEAR(ANYOF_POSIXL_BITMAP(p), (c)) #define POSIXL_TEST(field, c) ((field) & (1U << (c))) -#define ANYOF_POSIXL_TEST(p, c) POSIXL_TEST(((regnode_charclass_posixl*) (p))->classflags, (c)) +#define ANYOF_POSIXL_TEST(p, c) POSIXL_TEST(ANYOF_POSIXL_BITMAP(p), (c)) #define POSIXL_ZERO(field) STMT_START { (field) = 0; } STMT_END -#define ANYOF_POSIXL_ZERO(ret) POSIXL_ZERO(((regnode_charclass_posixl*) (ret))->classflags) +#define ANYOF_POSIXL_ZERO(ret) POSIXL_ZERO(ANYOF_POSIXL_BITMAP(ret)) #define ANYOF_POSIXL_SET_TO_BITMAP(p, bits) \ - STMT_START { \ - ((regnode_charclass_posixl*) (p))->classflags = (bits); \ - } STMT_END + STMT_START { ANYOF_POSIXL_BITMAP(p) = (bits); } STMT_END /* Shifts a bit to get, eg. 0x4000_0000, then subtracts 1 to get 0x3FFF_FFFF */ -#define ANYOF_POSIXL_SETALL(ret) STMT_START { ((regnode_charclass_posixl*) (ret))->classflags = nBIT_MASK(ANYOF_POSIXL_MAX); } STMT_END +#define ANYOF_POSIXL_SETALL(ret) \ + STMT_START { \ + ANYOF_POSIXL_BITMAP(ret) = nBIT_MASK(ANYOF_POSIXL_MAX); \ + } STMT_END #define ANYOF_CLASS_SETALL(ret) ANYOF_POSIXL_SETALL(ret) #define ANYOF_POSIXL_TEST_ANY_SET(p) \ - ((ANYOF_FLAGS(p) & ANYOF_MATCHES_POSIXL) \ - && (((regnode_charclass_posixl*)(p))->classflags)) + ((ANYOF_FLAGS(p) & ANYOF_MATCHES_POSIXL) && ANYOF_POSIXL_BITMAP(p)) #define ANYOF_CLASS_TEST_ANY_SET(p) ANYOF_POSIXL_TEST_ANY_SET(p) /* Since an SSC always has this field, we don't have to test for that; nor do @@ -716,8 +718,7 @@ struct regnode_ssc { #define ANYOF_POSIXL_TEST_ALL_SET(p) \ ((ANYOF_FLAGS(p) & ANYOF_MATCHES_POSIXL) \ - && ((regnode_charclass_posixl*) (p))->classflags \ - == nBIT_MASK(ANYOF_POSIXL_MAX)) + && ANYOF_POSIXL_BITMAP(p) == nBIT_MASK(ANYOF_POSIXL_MAX)) #define ANYOF_POSIXL_OR(source, dest) STMT_START { (dest)->classflags |= (source)->classflags ; } STMT_END #define ANYOF_CLASS_OR(source, dest) ANYOF_POSIXL_OR((source), (dest)) diff --git a/regexec.c b/regexec.c index b06b6b0ea37a..512da8b77c62 100644 --- a/regexec.c +++ b/regexec.c @@ -10561,26 +10561,11 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const else if ( ANYOF_POSIXL_TEST_ANY_SET(n) && c <= U8_MAX /* param to isFOO_lc() */ ) { - /* The data structure is arranged so bits 0, 2, 4, ... are set * if the class includes the Posix character class given by * bit/2; and 1, 3, 5, ... are set if the class includes the - * complemented Posix class given by int(bit/2). So we loop - * through the bits, each time changing whether we complement - * the result or not. Suppose for the sake of illustration - * that bits 0-3 mean respectively, \w, \W, \s, \S. If bit 0 - * is set, it means there is a match for this ANYOF node if the - * character is in the class given by the expression (0 / 2 = 0 - * = \w). If it is in that class, isFOO_lc() will return 1, - * and since 'to_complement' is 0, the result will stay TRUE, - * and we exit the loop. Suppose instead that bit 0 is 0, but - * bit 1 is 1. That means there is a match if the character - * matches \W. We won't bother to call isFOO_lc() on bit 0, - * but will on bit 1. On the second iteration 'to_complement' - * will be 1, so the exclusive or will reverse things, so we - * are testing for \W. On the third iteration, 'to_complement' - * will be 0, and we would be testing for \s; the fourth - * iteration would test for \S, etc. + * complemented Posix class given by int(bit/2), so the + * remainder modulo 2 tells us if to complement or not. * * Note that this code assumes that all the classes are closed * under folding. For example, if a character matches \w, then @@ -10592,19 +10577,21 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const * loop could be used below to iterate over both the source * character, and its fold (if different) */ - int count = 0; - int to_complement = 0; + U32 posixl_bits = ANYOF_POSIXL_BITMAP(n); - while (count < ANYOF_MAX) { - if (ANYOF_POSIXL_TEST(n, count) - && to_complement ^ cBOOL(isFOO_lc(count/2, (U8) c))) - { + do { + /* Find the next set bit indicating a class to try matching + * against */ + U8 bit_pos = lsbit_pos32(posixl_bits); + + if (bit_pos % 2 ^ cBOOL(isFOO_lc(bit_pos/2, (U8) c))) { match = TRUE; break; } - count++; - to_complement ^= 1; - } + + /* Remove this class from consideration; repeat */ + POSIXL_CLEAR(posixl_bits, bit_pos); + } while(posixl_bits != 0); } } }