From e4faa4ba4acdd0f04d496b4a6ce5c3260b419f6f Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 10 Apr 2021 13:32:00 -0600 Subject: [PATCH] locale.c/inline.h: Add fold calc debug statements Under verbose debugging, this shows non-standard folds --- inline.h | 8 +++++- locale.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 1 deletion(-) diff --git a/inline.h b/inline.h index 9c494eba62b7..18ae2ef71cbf 100644 --- a/inline.h +++ b/inline.h @@ -2573,8 +2573,14 @@ Perl_foldEQ_locale(const char *s1, const char *s2, I32 len) assert(len >= 0); while (len--) { - if (*a != *b && *a != PL_fold_locale[*b]) + if (*a != *b && *a != PL_fold_locale[*b]) { + DEBUG_L(PerlIO_printf(Perl_debug_log, + "%s:%d: Our records indicate %02x is not a fold of %02x" + " or its mate %02x\n", + __FILE__, __LINE__, *a, *b, PL_fold_locale[*b])); + return 0; + } a++,b++; } return 1; diff --git a/locale.c b/locale.c index 3c2c45559a4e..a339850c3db2 100644 --- a/locale.c +++ b/locale.c @@ -1933,6 +1933,27 @@ S_new_ctype(pTHX_ const char *newctype) } } else { /* Not a canned locale we know the values for. Compute them */ + +# ifdef DEBUGGING + + bool has_non_ascii_fold = FALSE; + bool found_unexpected = FALSE; + + if (DEBUG_Lv_TEST) { + for (i = 128; i < 256; i++) { + if ( toU8_LOWER_LC(LATIN1_TO_NATIVE(i)) + != LATIN1_TO_NATIVE(i) + || toU8_UPPER_LC(LATIN1_TO_NATIVE(i)) + != LATIN1_TO_NATIVE(i)) + { + has_non_ascii_fold = TRUE; + break; + } + } + } + +# endif + for (i = 0; i < 256; i++) { if (isU8_UPPER_LC(i)) PL_fold_locale[i] = (U8) toU8_LOWER_LC(i); @@ -1940,6 +1961,69 @@ S_new_ctype(pTHX_ const char *newctype) PL_fold_locale[i] = (U8) toU8_UPPER_LC(i); else PL_fold_locale[i] = (U8) i; + +# ifdef DEBUGGING + + if (DEBUG_Lv_TEST) { + bool unexpected = FALSE; + + if (isUPPER_L1(i)) { + if (isUPPER_A(i)) { + if (PL_fold_locale[i] != toLOWER_A(i)) { + unexpected = TRUE; + } + } + else if (has_non_ascii_fold) { + if (PL_fold_locale[i] != toLOWER_L1(i)) { + unexpected = TRUE; + } + } + else if (PL_fold_locale[i] != i) { + unexpected = TRUE; + } + } + else if ( isLOWER_L1(i) + && i != LATIN_SMALL_LETTER_SHARP_S + && i != MICRO_SIGN) + { + if (isLOWER_A(i)) { + if (PL_fold_locale[i] != toUPPER_A(i)) { + unexpected = TRUE; + } + } + else if (has_non_ascii_fold) { + if (PL_fold_locale[i] != toUPPER_LATIN1_MOD(i)) { + unexpected = TRUE; + } + } + else if (PL_fold_locale[i] != i) { + unexpected = TRUE; + } + } + else if (PL_fold_locale[i] != i) { + unexpected = TRUE; + } + + if (unexpected) { + found_unexpected = TRUE; + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "For %s, fold of %02x is %02x\n", + newctype, i, PL_fold_locale[i])); + } + } + } + + if (found_unexpected) { + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "All bytes not mentioned above either fold to" + " themselves or are the expected ASCII or" + " Latin1 ones\n")); + } + else { + DEBUG_Lv(PerlIO_printf(Perl_debug_log, + "No nonstandard folds were found\n")); +# endif + } }