diff --git a/NON-AUTOTOOLS-BUILD b/NON-AUTOTOOLS-BUILD index 2f984eba4..10bfd69db 100644 --- a/NON-AUTOTOOLS-BUILD +++ b/NON-AUTOTOOLS-BUILD @@ -171,7 +171,7 @@ example. src/config.h) . Compile src/pcre2test.c; don't forget -DHAVE_CONFIG_H if necessary, but do NOT define PCRE2_CODE_UNIT_WIDTH. Then link with the appropriate library/ies. If you compiled an 8-bit library, pcre2test also - needs the pcre2posix wrapper library. + needs the pcre2posix wrapper library when linking. (9) Run pcre2test on the testinput files in the testdata directory, and check that the output matches the corresponding testoutput files. There are diff --git a/configure.ac b/configure.ac index be383775d..f394dcd92 100644 --- a/configure.ac +++ b/configure.ac @@ -22,7 +22,7 @@ m4_define(libpcre2_posix_version, [3:6:0]) # NOTE: The CMakeLists.txt file searches for the above variables in the first # 50 lines of this file. Please update that if the variables above are moved. -AC_PREREQ([2.60]) +AC_PREREQ([2.62]) AC_INIT([PCRE2],pcre2_major.pcre2_minor[]pcre2_prerelease,[],[pcre2]) AC_CONFIG_SRCDIR([src/pcre2.h.in]) AM_INIT_AUTOMAKE([dist-bzip2 dist-zip foreign]) diff --git a/doc/pcre2api.3 b/doc/pcre2api.3 index 0453caabd..cd41b187d 100644 --- a/doc/pcre2api.3 +++ b/doc/pcre2api.3 @@ -1,4 +1,4 @@ -.TH PCRE2API 3 "19 July 2025" "PCRE2 10.47-DEV" +.TH PCRE2API 3 "05 October 2025" "PCRE2 10.47-DEV" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .sp @@ -1296,7 +1296,7 @@ documentation for more details. .sp PCRE2_CONFIG_JITTARGET .sp -The \fIwhere\fP argument should point to a buffer that is at least 48 code +The \fIwhere\fP argument should point to a buffer that is at least 64 code units long. (The exact length required can be found by calling \fBpcre2_config()\fP with \fBwhere\fP set to NULL.) The buffer is filled with a string that contains the name of the architecture for which the JIT compiler is @@ -4598,6 +4598,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 19 July 2025 +Last updated: 05 October 2025 Copyright (c) 1997-2024 University of Cambridge. .fi diff --git a/doc/pcre2build.3 b/doc/pcre2build.3 index 03c2610db..06402e508 100644 --- a/doc/pcre2build.3 +++ b/doc/pcre2build.3 @@ -134,8 +134,8 @@ UTF support allows the libraries to process character code points up to 0x10ffff in the strings that they handle. Unicode support also gives access to the Unicode properties of characters, using pattern escapes such as \eP, \ep, and \eX. Only the general category properties such as \fILu\fP and \fINd\fP, -script names, and some bi-directional properties are supported. Details are -given in the +script names, and some bi-directional and binary properties are supported. +Details are given in the .\" HREF \fBpcre2pattern\fP .\" @@ -152,8 +152,8 @@ request this by starting with (*UCP). .sp The \eC escape sequence, which matches a single code unit, even in a UTF mode, can cause unpredictable behaviour because it may leave the current matching -point in the middle of a multi-code-unit character. The application can lock it -out by setting the PCRE2_NEVER_BACKSLASH_C option when calling +point in the middle of a multi-code-unit character. The application can lock +it out by setting the PCRE2_NEVER_BACKSLASH_C option when calling \fBpcre2_compile()\fP. There is also a build-time option .sp --enable-never-backslash-C @@ -517,7 +517,7 @@ use), some extra configuration may be necessary. The INSTALL file for If your environment has not been set up so that an appropriate library is automatically included, you may need to add something like .sp - LIBS="-ncurses" + LIBS="-lncurses" .sp immediately before the \fBconfigure\fP command. . diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c index 895915ece..3ab4592c3 100644 --- a/src/pcre2_jit_compile.c +++ b/src/pcre2_jit_compile.c @@ -6888,7 +6888,7 @@ if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); quit = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); } - else + else { fast_forward_char_simd(common, common->newline, common->newline, 0); diff --git a/src/pcre2_jit_simd_inc.h b/src/pcre2_jit_simd_inc.h index 31b6b295b..0a24cdad8 100644 --- a/src/pcre2_jit_simd_inc.h +++ b/src/pcre2_jit_simd_inc.h @@ -101,7 +101,7 @@ return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00); } #endif -#endif /* SLJIT_CONFIG_X86 || SLJIT_CONFIG_S390X */ +#endif /* SLJIT_CONFIG_X86 || SLJIT_CONFIG_ARM_64 || SLJIT_CONFIG_S390X || SLJIT_CONFIG_LOONGARCH_64 */ #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) @@ -229,7 +229,14 @@ switch (step) } } +/* The AVX2 code path is currently disabled. #define JIT_HAS_FAST_FORWARD_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SIMD)) +*/ +#if defined(SLJIT_CONFIG_X86_64) && SLJIT_CONFIG_X86_64 +#define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1 +#else +#define JIT_HAS_FAST_FORWARD_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_FPU)) +#endif static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset) { @@ -247,10 +254,10 @@ struct sljit_jump *quit; struct sljit_jump *partial_quit[2]; vector_compare_type compare_type = vector_compare_match1; sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1); -sljit_s32 data_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR0); -sljit_s32 cmp1_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR1); -sljit_s32 cmp2_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR2); -sljit_s32 tmp_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR3); +sljit_s32 data_ind = sljit_get_register_index(reg_type, SLJIT_VR0); +sljit_s32 cmp1_ind = sljit_get_register_index(reg_type, SLJIT_VR1); +sljit_s32 cmp2_ind = sljit_get_register_index(reg_type, SLJIT_VR2); +sljit_s32 tmp_ind = sljit_get_register_index(reg_type, SLJIT_VR3); sljit_u32 bit = 0; int i; @@ -366,7 +373,14 @@ if (common->utf && offset > 0) #endif } +/* The AVX2 code path is currently disabled. #define JIT_HAS_FAST_REQUESTED_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SIMD)) +*/ +#if defined(SLJIT_CONFIG_X86_64) && SLJIT_CONFIG_X86_64 +#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD 1 +#else +#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_FPU)) +#endif static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2) { @@ -381,10 +395,10 @@ struct sljit_jump *quit; jump_list *not_found = NULL; vector_compare_type compare_type = vector_compare_match1; sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1); -sljit_s32 data_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR0); -sljit_s32 cmp1_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR1); -sljit_s32 cmp2_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR2); -sljit_s32 tmp_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR3); +sljit_s32 data_ind = sljit_get_register_index(reg_type, SLJIT_VR0); +sljit_s32 cmp1_ind = sljit_get_register_index(reg_type, SLJIT_VR1); +sljit_s32 cmp2_ind = sljit_get_register_index(reg_type, SLJIT_VR2); +sljit_s32 tmp_ind = sljit_get_register_index(reg_type, SLJIT_VR3); sljit_u32 bit = 0; int i; @@ -472,7 +486,14 @@ return not_found; #ifndef _WIN64 +/* The AVX2 code path is currently disabled. #define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SIMD)) +*/ +#if defined(SLJIT_CONFIG_X86_64) && SLJIT_CONFIG_X86_64 +#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1 +#else +#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_FPU)) +#endif static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1, PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b) @@ -489,14 +510,14 @@ sljit_u32 bit1 = 0; sljit_u32 bit2 = 0; sljit_u32 diff = IN_UCHARS(offs1 - offs2); sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1); -sljit_s32 data1_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR0); -sljit_s32 data2_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR1); -sljit_s32 cmp1a_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR2); -sljit_s32 cmp2a_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR3); -sljit_s32 cmp1b_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR4); -sljit_s32 cmp2b_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR5); -sljit_s32 tmp1_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR6); -sljit_s32 tmp2_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_TMP_DEST_VREG); +sljit_s32 data1_ind = sljit_get_register_index(reg_type, SLJIT_VR0); +sljit_s32 data2_ind = sljit_get_register_index(reg_type, SLJIT_VR1); +sljit_s32 cmp1a_ind = sljit_get_register_index(reg_type, SLJIT_VR2); +sljit_s32 cmp2a_ind = sljit_get_register_index(reg_type, SLJIT_VR3); +sljit_s32 cmp1b_ind = sljit_get_register_index(reg_type, SLJIT_VR4); +sljit_s32 cmp2b_ind = sljit_get_register_index(reg_type, SLJIT_VR5); +sljit_s32 tmp1_ind = sljit_get_register_index(reg_type, SLJIT_VR6); +sljit_s32 tmp2_ind = sljit_get_register_index(reg_type, SLJIT_TMP_DEST_VREG); struct sljit_label *start; #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 struct sljit_label *restart; diff --git a/testdata/testinput12 b/testdata/testinput12 index 8f8491446..c618f025b 100644 --- a/testdata/testinput12 +++ b/testdata/testinput12 @@ -368,7 +368,7 @@ /\p{BC: Aሴ}/utf -# A special extra option allows excaped surrogate code points in 32-bit mode, +# A special extra option allows escaped surrogate code points in 32-bit mode, # but subjects containing them must not be UTF-checked. These patterns give # errors in 16-bit mode. diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16 index 7ff32c4d8..4883f5edf 100644 --- a/testdata/testoutput12-16 +++ b/testdata/testoutput12-16 @@ -1428,7 +1428,7 @@ Failed: error 146 at offset 7: malformed \P or \p sequence Failed: error 146 at offset 9: malformed \P or \p sequence here: \p{BC: Aሴ |<--| } -# A special extra option allows excaped surrogate code points in 32-bit mode, +# A special extra option allows escaped surrogate code points in 32-bit mode, # but subjects containing them must not be UTF-checked. These patterns give # errors in 16-bit mode. diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32 index 6d4041e37..5a4255f8f 100644 --- a/testdata/testoutput12-32 +++ b/testdata/testoutput12-32 @@ -1421,7 +1421,7 @@ Failed: error 146 at offset 7: malformed \P or \p sequence Failed: error 146 at offset 9: malformed \P or \p sequence here: \p{BC: Aሴ |<--| } -# A special extra option allows excaped surrogate code points in 32-bit mode, +# A special extra option allows escaped surrogate code points in 32-bit mode, # but subjects containing them must not be UTF-checked. These patterns give # errors in 16-bit mode.