-
Notifications
You must be signed in to change notification settings - Fork 558
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Slow global pattern match in taint mode with input from utf8 #14238
Comments
From @hknutzenThere is a massive slowdown in global pattern match with Perl 5.20.1 in Create test data with this shell command line: $ perlbrew use perl-5.20.1 This slowdown also appears with Perl 5.21.5. I had to revert an upgrade of a production system from 5.16.3 to 5.20.1 |
From @jkeenanOn Thu Nov 13 09:17:13 2014, heinz.knutzen@gmail.com wrote:
Confirmed: [123202] 54 $ perlbrew switch perl-5.18.4 -- |
The RT System itself - Status changed from 'new' to 'open' |
From @tonycozOn Thu Nov 13 09:17:13 2014, heinz.knutzen@gmail.com wrote:
This appears to be caused by: commit 25fdce4 Stop pos() from being confused by changing utf8ness Since taint magic is GMAGIC, MgBYTEPOS_set() always sets mg_len to the character offset, slow in itself since it needs to translate the byte offset to a character offset, but then needs to translate it back on the next \G regex. This is reasonable for most types of magic, since the string may change based on the magic, but taint magic just sets a flag, so this is unnecessary. The attached patch appears to fix the problem, though if someone has a better name for the function... Tony |
From @tonycoz0001-perl-123202-speed-up-scalar-g-against-tainted-string.patchFrom 93e5e5c27a3edf0d96d690812ddca07adba5eadb Mon Sep 17 00:00:00 2001
From: Tony Cook <tony@develop-help.com>
Date: Wed, 4 Feb 2015 15:49:24 +1100
Subject: [PATCH] [perl #123202] speed up scalar //g against tainted strings
---
embed.fnc | 1 +
embed.h | 1 +
inline.h | 24 ++++++++++++++++++++++++
mg.h | 2 +-
proto.h | 5 +++++
5 files changed, 32 insertions(+), 1 deletion(-)
diff --git a/embed.fnc b/embed.fnc
index cfe634f..c7b5f1d 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1447,6 +1447,7 @@ Apd |void |sv_magic |NN SV *const sv|NULLOK SV *const obj|const int how \
Apd |MAGIC *|sv_magicext |NN SV *const sv|NULLOK SV *const obj|const int how \
|NULLOK const MGVTBL *const vtbl|NULLOK const char *const name \
|const I32 namlen
+Ei |bool |sv_only_taint_gmagic|NN SV *sv
: exported for re.pm
EXp |MAGIC *|sv_magicext_mglob|NN SV *sv
ApdbamR |SV* |sv_mortalcopy |NULLOK SV *const oldsv
diff --git a/embed.h b/embed.h
index 802b624..a3b94d3 100644
--- a/embed.h
+++ b/embed.h
@@ -914,6 +914,7 @@
#define reg_temp_copy(a,b) Perl_reg_temp_copy(aTHX_ a,b)
#define report_uninit(a) Perl_report_uninit(aTHX_ a)
#define sv_magicext_mglob(a) Perl_sv_magicext_mglob(aTHX_ a)
+#define sv_only_taint_gmagic(a) S_sv_only_taint_gmagic(aTHX_ a)
#define validate_proto(a,b,c) Perl_validate_proto(aTHX_ a,b,c)
#define vivify_defelem(a) Perl_vivify_defelem(aTHX_ a)
#define yylex() Perl_yylex(aTHX)
diff --git a/inline.h b/inline.h
index cde2c54..1124412 100644
--- a/inline.h
+++ b/inline.h
@@ -378,6 +378,30 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp)
}
/*
+
+Return false if any get magic is on the SV other than taint magic.
+
+*/
+
+PERL_STATIC_INLINE bool
+S_sv_only_taint_gmagic(SV *sv) {
+ MAGIC *mg = SvMAGIC(sv);
+
+ PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
+
+ while (mg) {
+ if (mg->mg_type != PERL_MAGIC_taint
+ && !(mg->mg_flags & MGf_GSKIP)
+ && mg->mg_virtual->svt_get) {
+ return FALSE;
+ }
+ mg = mg->mg_moremagic;
+ }
+
+ return TRUE;
+}
+
+/*
* Local variables:
* c-indentation-style: bsd
* c-basic-offset: 4
diff --git a/mg.h b/mg.h
index 3aa2401..becef4a 100644
--- a/mg.h
+++ b/mg.h
@@ -65,7 +65,7 @@ struct magic {
/* assumes get-magic and stringification have already occurred */
# define MgBYTEPOS_set(mg,sv,pv,off) ( \
assert_((mg)->mg_type == PERL_MAGIC_regex_global) \
- SvPOK(sv) && !SvGMAGICAL(sv) \
+ SvPOK(sv) && (!SvGMAGICAL(sv) || sv_only_taint_gmagic(sv)) \
? (mg)->mg_len = (off), (mg)->mg_flags |= MGf_BYTES \
: ((mg)->mg_len = DO_UTF8(sv) \
? (SSize_t)utf8_length((U8 *)(pv), (U8 *)(pv)+(off)) \
diff --git a/proto.h b/proto.h
index 966c6d8..9ee0ecc 100644
--- a/proto.h
+++ b/proto.h
@@ -4475,6 +4475,11 @@ PERL_CALLCONV NV Perl_sv_nv(pTHX_ SV* sv)
#define PERL_ARGS_ASSERT_SV_NV \
assert(sv)
+PERL_STATIC_INLINE bool S_sv_only_taint_gmagic(pTHX_ SV *sv)
+ __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC \
+ assert(sv)
+
PERL_CALLCONV char* Perl_sv_peek(pTHX_ SV* sv);
PERL_CALLCONV void Perl_sv_pos_b2u(pTHX_ SV *const sv, I32 *const offsetp)
__attribute__nonnull__(pTHX_2);
--
1.7.10.4
|
From @tonycozOn Tue Feb 03 20:54:58 2015, tonyc wrote:
This one with a test. Tony |
From @tonycoz0001-perl-123202-speed-up-scalar-g-against-tainted-string.patchFrom 4b20267a831816c776aec796a5fd5e8ec140acf6 Mon Sep 17 00:00:00 2001
From: Tony Cook <tony@develop-help.com>
Date: Tue, 17 Feb 2015 17:22:25 +1100
Subject: [PATCH] [perl #123202] speed up scalar //g against tainted strings
---
MANIFEST | 1 +
embed.fnc | 1 +
embed.h | 1 +
inline.h | 24 ++++++++++++++++++++++++
mg.h | 2 +-
proto.h | 5 +++++
t/perf/taint.t | 42 ++++++++++++++++++++++++++++++++++++++++++
7 files changed, 75 insertions(+), 1 deletion(-)
create mode 100644 t/perf/taint.t
diff --git a/MANIFEST b/MANIFEST
index 7a6ab41..b163e5c 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -5446,6 +5446,7 @@ t/perf/benchmarks.t test t/perf/benchmarks syntax
t/perf/opcount.t See if optimised subs have the right op counts
t/perf/optree.t Test presence of some op optimisations
t/perf/speed.t See if optimisations are keeping things fast
+t/perf/taint.t See if optimisations are keeping things fast (taint issues)
t/perl.supp Perl valgrind suppressions
t/porting/args_assert.t Check that all PERL_ARGS_ASSERT* macros are used
t/porting/authors.t Check that all authors have been acknowledged
diff --git a/embed.fnc b/embed.fnc
index cfe634f..c7b5f1d 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1447,6 +1447,7 @@ Apd |void |sv_magic |NN SV *const sv|NULLOK SV *const obj|const int how \
Apd |MAGIC *|sv_magicext |NN SV *const sv|NULLOK SV *const obj|const int how \
|NULLOK const MGVTBL *const vtbl|NULLOK const char *const name \
|const I32 namlen
+Ei |bool |sv_only_taint_gmagic|NN SV *sv
: exported for re.pm
EXp |MAGIC *|sv_magicext_mglob|NN SV *sv
ApdbamR |SV* |sv_mortalcopy |NULLOK SV *const oldsv
diff --git a/embed.h b/embed.h
index 802b624..a3b94d3 100644
--- a/embed.h
+++ b/embed.h
@@ -914,6 +914,7 @@
#define reg_temp_copy(a,b) Perl_reg_temp_copy(aTHX_ a,b)
#define report_uninit(a) Perl_report_uninit(aTHX_ a)
#define sv_magicext_mglob(a) Perl_sv_magicext_mglob(aTHX_ a)
+#define sv_only_taint_gmagic(a) S_sv_only_taint_gmagic(aTHX_ a)
#define validate_proto(a,b,c) Perl_validate_proto(aTHX_ a,b,c)
#define vivify_defelem(a) Perl_vivify_defelem(aTHX_ a)
#define yylex() Perl_yylex(aTHX)
diff --git a/inline.h b/inline.h
index cde2c54..1124412 100644
--- a/inline.h
+++ b/inline.h
@@ -378,6 +378,30 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp)
}
/*
+
+Return false if any get magic is on the SV other than taint magic.
+
+*/
+
+PERL_STATIC_INLINE bool
+S_sv_only_taint_gmagic(SV *sv) {
+ MAGIC *mg = SvMAGIC(sv);
+
+ PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
+
+ while (mg) {
+ if (mg->mg_type != PERL_MAGIC_taint
+ && !(mg->mg_flags & MGf_GSKIP)
+ && mg->mg_virtual->svt_get) {
+ return FALSE;
+ }
+ mg = mg->mg_moremagic;
+ }
+
+ return TRUE;
+}
+
+/*
* Local variables:
* c-indentation-style: bsd
* c-basic-offset: 4
diff --git a/mg.h b/mg.h
index 3aa2401..becef4a 100644
--- a/mg.h
+++ b/mg.h
@@ -65,7 +65,7 @@ struct magic {
/* assumes get-magic and stringification have already occurred */
# define MgBYTEPOS_set(mg,sv,pv,off) ( \
assert_((mg)->mg_type == PERL_MAGIC_regex_global) \
- SvPOK(sv) && !SvGMAGICAL(sv) \
+ SvPOK(sv) && (!SvGMAGICAL(sv) || sv_only_taint_gmagic(sv)) \
? (mg)->mg_len = (off), (mg)->mg_flags |= MGf_BYTES \
: ((mg)->mg_len = DO_UTF8(sv) \
? (SSize_t)utf8_length((U8 *)(pv), (U8 *)(pv)+(off)) \
diff --git a/proto.h b/proto.h
index 966c6d8..9ee0ecc 100644
--- a/proto.h
+++ b/proto.h
@@ -4475,6 +4475,11 @@ PERL_CALLCONV NV Perl_sv_nv(pTHX_ SV* sv)
#define PERL_ARGS_ASSERT_SV_NV \
assert(sv)
+PERL_STATIC_INLINE bool S_sv_only_taint_gmagic(pTHX_ SV *sv)
+ __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC \
+ assert(sv)
+
PERL_CALLCONV char* Perl_sv_peek(pTHX_ SV* sv);
PERL_CALLCONV void Perl_sv_pos_b2u(pTHX_ SV *const sv, I32 *const offsetp)
__attribute__nonnull__(pTHX_2);
diff --git a/t/perf/taint.t b/t/perf/taint.t
new file mode 100644
index 0000000..386d97e
--- /dev/null
+++ b/t/perf/taint.t
@@ -0,0 +1,42 @@
+#!./perl -T
+#
+# All the tests in this file are ones that run exceptionally slowly
+# (each test taking seconds or even minutes) in the absence of particular
+# optimisations. Thus it is a sort of canary for optimisations being
+# broken.
+#
+# Although it includes a watchdog timeout, this is set to a generous limit
+# to allow for running on slow systems; therefore a broken optimisation
+# might be indicated merely by this test file taking unusually long to
+# run, rather than actually timing out.
+#
+# This is similar to t/perf/speed.t but tests performance regressions specific
+# to taint.
+#
+
+BEGIN {
+ chdir 't' if -d 't';
+ @INC = ('../lib');
+ require Config; import Config;
+ require './test.pl';
+}
+
+use strict;
+use warnings;
+use Scalar::Util qw(tainted);
+
+$| = 1;
+
+plan tests => 2;
+
+watchdog(60);
+
+{
+ my $in = substr($ENV{PATH}, 0, 0) . ( "ab" x 200_000 );
+ utf8::upgrade($in);
+ ok(tainted($in), "performance issue only when tainted");
+ while ($in =~ /\Ga+b/g) { }
+ pass("\\G on tainted string");
+}
+
+1;
--
1.7.10.4
|
@tonycoz - Status changed from 'open' to 'pending release' |
From @khwilliamsonThanks for submitting this ticket The issue should be resolved with the release today of Perl v5.22, available at http://www.perl.org/get.html -- |
@khwilliamson - Status changed from 'pending release' to 'resolved' |
Migrated from rt.perl.org#123202 (status was 'resolved')
Searchable as RT123202$
The text was updated successfully, but these errors were encountered: