From 7adca3f3973689d4123c6f2c6f825888cd95bbe7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?=
Date: Sun, 17 Aug 2025 20:45:17 -0700
Subject: [PATCH 1/2] maint: introduce LintMan to aid on tracking/updating
values
Allow tagging the documentation with a `#define` value that could
be then updated programmatically.
Update the value for MAX_NAME_SIZE in pcre2limits.3 that was missing
since ced3b0f0 (Increase name length to 128, 2024-03-11) and while
at it, improve on its description and add a tag for a related
variable.
For completeness, add also a tag to the same value in pcre2pattern.3
and the configuration for VMS that was missing since 6c670c78
(Update overlooked cmake update of name size to 128, 2024-03-11) and
add it to UpdateAlways so it can be used in a developer tree.
---
doc/html/pcre2limits.html | 9 ++++--
doc/html/pcre2pattern.html | 6 ++--
doc/pcre2.txt | 10 +++---
doc/pcre2limits.3 | 11 +++++--
doc/pcre2pattern.3 | 7 ++--
maint/CheckMan | 1 +
maint/LintMan | 66 ++++++++++++++++++++++++++++++++++++++
maint/README | 4 +++
maint/UpdateAlways | 7 ++++
vms/configure.com | 2 +-
10 files changed, 106 insertions(+), 17 deletions(-)
create mode 100755 maint/LintMan
diff --git a/doc/html/pcre2limits.html b/doc/html/pcre2limits.html
index eabc4e873..1afb44eca 100644
--- a/doc/html/pcre2limits.html
+++ b/doc/html/pcre2limits.html
@@ -64,8 +64,11 @@
a compile context.
-The maximum length of name for a named capture group is 32 code units, and the
-maximum number of such groups is 10000.
+The maximum length of the name for a named capture group as well as the number
+of such groups is configurable at build time. The maximum length for the name
+defaults to
+128 code units, and the maximum number of such groups to
+10000.
The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb
@@ -96,7 +99,7 @@
REVISION
-Last updated: 16 August 2023
+Last updated: 17 August 2025
Copyright © 1997-2023 University of Cambridge.
diff --git a/doc/html/pcre2pattern.html b/doc/html/pcre2pattern.html
index 0bbd71adf..f301be267 100644
--- a/doc/html/pcre2pattern.html
+++ b/doc/html/pcre2pattern.html
@@ -2007,8 +2007,8 @@
In PCRE2, a capture group can be named in one of three ways: (?<name>...) or
-(?'name'...) as in Perl, or (?P<name>...) as in Python. Names may be up to 128
-code units long. When PCRE2_UTF is not set, they may contain only ASCII
+(?'name'...) as in Perl, or (?P<name>...) as in Python. Names may be up to
+128 code units long. When PCRE2_UTF is not set, they may contain only ASCII
alphanumeric characters and underscores, but must start with a non-digit. When
PCRE2_UTF is set, the syntax of group names is extended to allow any Unicode
letter or Unicode decimal digit. In other words, group names must match one of
@@ -4183,7 +4183,7 @@
-Last updated: 28 March 2025
+Last updated: 17 August 2025
Copyright © 1997-2024 University of Cambridge.
diff --git a/doc/pcre2.txt b/doc/pcre2.txt
index 40db85f00..247941c6e 100644
--- a/doc/pcre2.txt
+++ b/doc/pcre2.txt
@@ -6238,8 +6238,10 @@ SIZE AND OTHER LIMITATIONS
is set to 250. An application can change this limit by calling
pcre2_set_parens_nest_limit() to set the limit in a compile context.
- The maximum length of name for a named capture group is 32 code units,
- and the maximum number of such groups is 10000.
+ The maximum length of the name for a named capture group as well as the
+ number of such groups is configurable at build time. The maximum length
+ for the name defaults to 128 code units, and the maximum number of such
+ groups to 10000.
The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or
(*THEN) verb is 255 code units for the 8-bit library and 65535 code
@@ -6262,7 +6264,7 @@ AUTHOR
REVISION
- Last updated: 16 August 2023
+ Last updated: 17 August 2025
Copyright (c) 1997-2023 University of Cambridge.
@@ -10747,7 +10749,7 @@ AUTHOR
REVISION
- Last updated: 28 March 2025
+ Last updated: 17 August 2025
Copyright (c) 1997-2024 University of Cambridge.
diff --git a/doc/pcre2limits.3 b/doc/pcre2limits.3
index 423dedf60..5e51e5869 100644
--- a/doc/pcre2limits.3
+++ b/doc/pcre2limits.3
@@ -47,8 +47,13 @@ when PCRE2 is built; if not, the default is set to 250. An application can
change this limit by calling pcre2_set_parens_nest_limit() to set the limit in
a compile context.
.P
-The maximum length of name for a named capture group is 32 code units, and the
-maximum number of such groups is 10000.
+The maximum length of the name for a named capture group as well as the number
+of such groups is configurable at build time. The maximum length for the name
+defaults to
+.\" DEFINE MAX_NAME_SIZE
+128 code units, and the maximum number of such groups to
+.\" DEFINE MAX_NAME_COUNT
+10000.
.P
The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb
is 255 code units for the 8-bit library and 65535 code units for the 16-bit and
@@ -76,6 +81,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 16 August 2023
+Last updated: 17 August 2025
Copyright (c) 1997-2023 University of Cambridge.
.fi
diff --git a/doc/pcre2pattern.3 b/doc/pcre2pattern.3
index e49b5a338..45b6f01c1 100644
--- a/doc/pcre2pattern.3
+++ b/doc/pcre2pattern.3
@@ -2015,8 +2015,9 @@ the naming of capture groups. This feature was not added to Perl until release
using the Python syntax. PCRE2 supports both the Perl and the Python syntax.
.P
In PCRE2, a capture group can be named in one of three ways: (?...) or
-(?'name'...) as in Perl, or (?P...) as in Python. Names may be up to 128
-code units long. When PCRE2_UTF is not set, they may contain only ASCII
+(?'name'...) as in Perl, or (?P...) as in Python. Names may be up to
+.\" DEFINE MAX_NAME_SIZE
+128 code units long. When PCRE2_UTF is not set, they may contain only ASCII
alphanumeric characters and underscores, but must start with a non-digit. When
PCRE2_UTF is set, the syntax of group names is extended to allow any Unicode
letter or Unicode decimal digit. In other words, group names must match one of
@@ -4229,6 +4230,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 28 March 2025
+Last updated: 17 August 2025
Copyright (c) 1997-2024 University of Cambridge.
.fi
diff --git a/maint/CheckMan b/maint/CheckMan
index 75cae62f1..b34651e96 100755
--- a/maint/CheckMan
+++ b/maint/CheckMan
@@ -39,6 +39,7 @@ while (scalar(@ARGV) > 0)
^\.P\s*$|
^\.PP\s*$|
^\.\\"(?:\ HREF)?\s*$|
+ ^\.\\"\sDEFINE\s\w+$|
^\.\\"\sHTML\s\s*$|
^\.\\"\sHTML\s<\/a>\s*$|
^\.\\"\s<\/a>\s*$|
diff --git a/maint/LintMan b/maint/LintMan
new file mode 100755
index 000000000..b0bc5f88b
--- /dev/null
+++ b/maint/LintMan
@@ -0,0 +1,66 @@
+#!/usr/bin/perl
+
+use warnings;
+use strict;
+use Getopt::Long;
+use vars qw /$opt_verbose/;
+
+# A script to scan PCRE2's man pages to check for values that might need to
+# be updatd to match the code.
+#
+# It updates numerical values after \" DEFINE or errors if name is
+# not found.
+
+my $file;
+my %defs;
+
+foreach $file ("../src/config.h")
+ {
+ open (INCLUDE, $file) or die "Failed to open include $file\n";
+
+ while ()
+ {
+ next unless /^#define ([[:upper:]_\d]+)\s+(\d+)/a;
+ $defs{$1} = $2;
+ }
+
+ close(INCLUDE);
+ }
+
+GetOptions("verbose");
+while (scalar(@ARGV) > 0)
+ {
+ $file = shift @ARGV;
+
+ open my $fh, "+<", $file or die "Failed to open $file\n";
+
+ my @lines = <$fh>;
+ my $updated = 0;
+
+ foreach my $index (0 .. $#lines)
+ {
+ if ($lines[$index] =~ /^\.\\"\sDEFINE\s([[:upper:]_\d]+)$/a)
+ {
+ my $l = $index + 1;
+ die "Invalid DEFINE line $l of $file\n" unless defined $lines[$l];
+
+ my $key = $1;
+ die "Bad DEFINE key $key line $l of $file\n" unless exists $defs{$key};
+
+ my $value = $defs{$key};
+ if ($lines[$index + 1] !~ /^$value\b/)
+ {
+ $updated += $lines[$index + 1] =~ s/^\d+/$value/a;
+ print "Updated $key in $file to $value\n" if $opt_verbose;
+ }
+ }
+ }
+
+ if ($updated > 0)
+ {
+ seek($fh, 0, 0);
+ print $fh @lines;
+ truncate($fh, tell($fh));
+ }
+ close($fh);
+ }
diff --git a/maint/README b/maint/README
index 7db649844..9763d2346 100644
--- a/maint/README
+++ b/maint/README
@@ -60,6 +60,10 @@ GenerateUcpTables.py
GenerateCommon.py and Unicode data files. The generated file contains tables
for looking up Unicode property names.
+LintMan
+ A Perl script to check and update magic numbers in the documentation that
+ correspond to configurable settings in the codebase.
+
manifest-*
Data files used to verify the contents of the distribution tarball and
`make install` file lists.
diff --git a/maint/UpdateAlways b/maint/UpdateAlways
index ca89fa05a..13cf27197 100755
--- a/maint/UpdateAlways
+++ b/maint/UpdateAlways
@@ -19,6 +19,8 @@
# Detrail A Perl script that removes trailing spaces from files.
+# LintMan A Perl script that lints man pages looking for inconsistencies.
+
# doc/index.html.src
# A file that is copied as index.html into the doc/html directory
# when the HTML documentation is built. It works like this so that
@@ -54,6 +56,11 @@ echo Processing documentation
perl ../maint/CheckMan *.1 *.3
if [ $? != 0 ] ; then exit 1; fi
+if [ -f ../src/config.h ] ; then
+ perl ../maint/LintMan -v *.3
+ if [ $? != 0 ] ; then exit 1; fi
+fi
+
# Verify the version number in the man pages
for file in *.1 *.3 ; do
diff --git a/vms/configure.com b/vms/configure.com
index eccb61980..19c9ff5bb 100644
--- a/vms/configure.com
+++ b/vms/configure.com
@@ -905,7 +905,7 @@ sure both macros are undefined; an emulation function will then be used. */
#define PCRE2_EXPORT
#define LINK_SIZE 2
#define MAX_NAME_COUNT 10000
-#define MAX_NAME_SIZE 32
+#define MAX_NAME_SIZE 128
#define MATCH_LIMIT 10000000
#define HEAP_LIMIT 20000000
#define NEWLINE_DEFAULT 2
From bac89d5b2ed7270ca044f773a76b074860d811cb Mon Sep 17 00:00:00 2001
From: Nicholas Wilson
Date: Fri, 29 Aug 2025 09:57:39 +0000
Subject: [PATCH 2/2] Make documentation match config.h.generic
---
maint/LintMan | 4 ++--
maint/UpdateAlways | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/maint/LintMan b/maint/LintMan
index b0bc5f88b..924e9533d 100755
--- a/maint/LintMan
+++ b/maint/LintMan
@@ -6,7 +6,7 @@ use Getopt::Long;
use vars qw /$opt_verbose/;
# A script to scan PCRE2's man pages to check for values that might need to
-# be updatd to match the code.
+# be updated to match the code.
#
# It updates numerical values after \" DEFINE or errors if name is
# not found.
@@ -14,7 +14,7 @@ use vars qw /$opt_verbose/;
my $file;
my %defs;
-foreach $file ("../src/config.h")
+foreach $file ("../src/config.h.generic")
{
open (INCLUDE, $file) or die "Failed to open include $file\n";
diff --git a/maint/UpdateAlways b/maint/UpdateAlways
index 13cf27197..6ae1c8d8b 100755
--- a/maint/UpdateAlways
+++ b/maint/UpdateAlways
@@ -56,7 +56,7 @@ echo Processing documentation
perl ../maint/CheckMan *.1 *.3
if [ $? != 0 ] ; then exit 1; fi
-if [ -f ../src/config.h ] ; then
+if [ -f ../src/config.h.generic ] ; then
perl ../maint/LintMan -v *.3
if [ $? != 0 ] ; then exit 1; fi
fi