Skip to content

Commit

Permalink
utf8.h: Add symbol for easing EBCDIC handling
Browse files Browse the repository at this point in the history
This is then used in regcomp.c to avoid an #ifdef EBCDIC
  • Loading branch information
khwilliamson committed Jun 14, 2021
1 parent 79bb2ea commit bd78ed1
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 5 deletions.
11 changes: 6 additions & 5 deletions regcomp.c
Expand Up @@ -19815,11 +19815,12 @@ S_optimize_regclass(pTHX_
* invariant bytes, because they have the same bit patterns under UTF-8
* as not. */
PERL_UINT_FAST8_T inverted = 0;
#ifdef EBCDIC
const PERL_UINT_FAST8_T max_permissible = 0xFF;
#else
const PERL_UINT_FAST8_T max_permissible = 0x7F;
#endif

/* Highest possible UTF-8 invariant is 7F on ASCII platforms; FF on
* EBCDIC */
const PERL_UINT_FAST8_T max_permissible
= nBIT_UMAX(7 + ONE_IF_EBCDIC_ZERO_IF_NOT);

/* If doesn't fit the criteria for ANYOFM, invert and try again. If
* that works we will instead later generate an NANYOFM, and invert
* back when through */
Expand Down
5 changes: 5 additions & 0 deletions utf8.h
Expand Up @@ -265,6 +265,11 @@ are in the character. */
#define isUTF8_POSSIBLY_PROBLEMATIC(c) (__ASSERT_(FITS_IN_8_BITS(c)) \
(U8) c >= 0xED)

/* It turns out that in a number of cases, that handling ASCII vs EBCDIC is a
* matter of being off-by-one. So this is a convenience macro, used to avoid
* some #ifdefs. */
# define ONE_IF_EBCDIC_ZERO_IF_NOT 0

#define UNICODE_IS_PERL_EXTENDED(uv) UNLIKELY((UV) (uv) > 0x7FFFFFFF)

#endif /* EBCDIC vs ASCII */
Expand Down
1 change: 1 addition & 0 deletions utfebcdic.h
Expand Up @@ -228,6 +228,7 @@ explicitly forbidden, and the shortest possible encoding should always be used

#define UNICODE_IS_PERL_EXTENDED(uv) UNLIKELY((UV) (uv) > 0x3FFFFFFF)

#define ONE_IF_EBCDIC_ZERO_IF_NOT 1

/*
* ex: set ts=8 sts=4 sw=4 et:
Expand Down

0 comments on commit bd78ed1

Please sign in to comment.