Permalink
Browse files

l

  • Loading branch information...
khwilliamson committed Jan 13, 2018
1 parent a8f13f4 commit e416bfefd089fdb116a854cb1bd0a43a6b4887ac
Showing with 104 additions and 3 deletions.
  1. +3 −0 embed.fnc
  2. +3 −0 embed.h
  3. +1 −1 ext/XS-APItest/APItest.pm
  4. +75 −1 inline.h
  5. +7 −0 proto.h
  6. +15 −1 regexec.c
View
@@ -806,6 +806,9 @@ AndmoR |bool |is_utf8_invariant_string|NN const U8* const s \
AnidR |bool |is_utf8_invariant_string_loc|NN const U8* const s \
|STRLEN len \
|NULLOK const U8 ** ep
#ifndef EBCDIC
AniR |unsigned int|_variant_byte_number|PERL_UINTMAX_T word
#endif
#if defined(PERL_CORE) || defined(PERL_EXT)
EinR |Size_t |variant_under_utf8_count|NN const U8* const s \
|NN const U8* const e
View
@@ -775,6 +775,9 @@
#if !(defined(HAS_SIGACTION) && defined(SA_SIGINFO))
#define csighandler Perl_csighandler
#endif
#if !defined(EBCDIC)
#define _variant_byte_number S__variant_byte_number
#endif
#if !defined(HAS_TRUNCATE) && !defined(HAS_CHSIZE) && defined(F_FREESP)
#define my_chsize(a,b) Perl_my_chsize(aTHX_ a,b)
#endif
@@ -5,7 +5,7 @@ use strict;
use warnings;
use Carp;
our $VERSION = '0.94';
our $VERSION = '0.95';
require XSLoader;
View
@@ -438,10 +438,24 @@ S_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
return FALSE;
}
/* Otherwise fall into final loop to find which byte it is */
#if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 \
|| BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
*ep = x + _variant_byte_number(* (PERL_UINTMAX_T *) x);
assert(*ep >= s && *ep < send);
return FALSE;
#else /* If weird byte order, drop into next loop to do byte-at-a-time
checks. */
break;
#endif
}
x += PERL_WORDSIZE;
} while (x + PERL_WORDSIZE <= send);
}
@@ -463,6 +477,64 @@ S_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
return TRUE;
}
#ifndef EBCDIC
PERL_STATIC_INLINE unsigned int
S__variant_byte_number(PERL_UINTMAX_T word)
{
/* This returns the position in a word (0..7) of the first variant byte in
* it. This is a helper function. Note that there are no conditionals */
assert(word);
/* Get just the msb bits of each byte */
word &= PERL_VARIANTS_WORD_MASK;
# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678
/* Isolate the lsb;
* https://stackoverflow.com/questions/757059/position-of-least-significant-bit-that-is-set */
word &= -word;
# elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
/* Isolate the msb; http://codeforces.com/blog/entry/10330 */
word |= word >> 1;
word |= word >> 2;
word |= word >> 4;
word |= word >> 8;
word |= word >> 16;
word |= word >> 32;
word -= (word >> 1);
# else
# error Unexpected byte order
# endif
/* Here word has a single bit set, the msb is of the first byte which has
* it set. Calculate the position in the word. We can use this
* specialized solution: https://stackoverflow.com/a/32339674/1626653 */
/* XXX need protable ULL */
word = (word >> 7) * (( 7ULL << 56) | (15ULL << 48) | (23ULL << 40)
| (31ULL << 32) | (39ULL << 24) | (47ULL << 16)
| (55ULL << 8) | (63ULL << 0));
word >>= PERL_WORDSIZE * (CHARBITS - 1);
/* Here, word contains the position 0..63 of that bit. Convert to 0..7 */
word = (word + 1) / CHARBITS - 1;
# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
word = CHARBITS - word;
# endif
return (unsigned int) word;
}
#endif /* ! EBCDIC */
#if defined(PERL_CORE) || defined(PERL_EXT)
/*
@@ -502,6 +574,8 @@ S_variant_under_utf8_count(const U8* const s, const U8* const e)
# ifndef EBCDIC
/* Test if the string is long enough to use word-at-a-time. (Logic is the
* same as for is_utf8_invariant_string()) */
if ((STRLEN) (e - x) >= PERL_WORDSIZE
+ PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
- (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
View
@@ -3864,6 +3864,13 @@ PERL_CALLCONV_NO_RET int Perl_magic_regdatum_set(pTHX_ SV* sv, MAGIC* mg)
#define PERL_ARGS_ASSERT_MAGIC_REGDATUM_SET \
assert(sv); assert(mg)
#endif
#if !defined(EBCDIC)
#ifndef PERL_NO_INLINE_FUNCTIONS
PERL_STATIC_INLINE unsigned int S__variant_byte_number(PERL_UINTMAX_T word)
__attribute__warn_unused_result__;
#endif
#endif
#if !defined(HAS_GETENV_LEN)
PERL_CALLCONV char* Perl_getenv_len(pTHX_ const char *env_elem, unsigned long *len);
View
@@ -584,10 +584,24 @@ S_find_next_ascii(char * s, const char * send, const bool utf8_target)
/* Here, we know we have at least one full word to process. Process
* per-word as long as we have at least a full word left */
do {
if ((* (PERL_UINTMAX_T *) s) & ~ PERL_VARIANTS_WORD_MASK) {
PERL_UINTMAX_T complemented = ~ * (PERL_UINTMAX_T *) s;
if (complemented & PERL_VARIANTS_WORD_MASK) {
#if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 \
|| BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
s += _variant_byte_number(complemented);
return s;
#else /* If weird byte order, drop into next loop to do byte-at-a-time
checks. */
break;
#endif
}
s += PERL_WORDSIZE;
} while (s + PERL_WORDSIZE <= send);
}

0 comments on commit e416bfe

Please sign in to comment.