From 933133ff98f491ec090c5e3c1f4e412ba7c030aa Mon Sep 17 00:00:00 2001 From: Yves Orton Date: Tue, 6 Dec 2022 13:28:43 +0100 Subject: [PATCH 01/12] podcheck.t - make error message less confusing podcheck.t assumes all non =head text will be indented at least 4 spaces, but this wasn't explicit in the output from podcheck for verbatim line length checks. --- t/porting/podcheck.t | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/t/porting/podcheck.t b/t/porting/podcheck.t index e00af6efcddc..1b5cd65402b3 100644 --- a/t/porting/podcheck.t +++ b/t/porting/podcheck.t @@ -1135,7 +1135,8 @@ package My::Pod::Checker { # Extend Pod::Checker $self->poderror({ -line => $start_line{$addr} + $i, -msg => $line_length, - parameter => "+$exceeds (including " . ($indent - $INDENT) . " from =over's)", + parameter => "+$exceeds (including " . ($indent - $INDENT) . + " from =over's and $INDENT as base indent)", }); } From 3832602bcf35354f7f3eacec5f80252ccac6ce8c Mon Sep 17 00:00:00 2001 From: Yves Orton Date: Sat, 26 Nov 2022 11:51:54 +0100 Subject: [PATCH 02/12] t/ - INC hardening tests These tests are pretty much all marked TODO. In the following patches we will remove the TODO as we fix the bug or implement the feature. --- t/op/inccode.t | 24 ++++++-- t/op/require_errors.t | 127 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 145 insertions(+), 6 deletions(-) diff --git a/t/op/inccode.t b/t/op/inccode.t index 0dbd46ef1827..6b34284415c5 100644 --- a/t/op/inccode.t +++ b/t/op/inccode.t @@ -21,7 +21,7 @@ unless (is_miniperl()) { use strict; -plan(tests => 68 + !is_miniperl() * (3 + 14 * $can_fork)); +plan(tests => 68 + !is_miniperl() * (4 + 14 * $can_fork)); sub get_temp_fh { my $f = tempfile(); @@ -294,14 +294,18 @@ SKIP: { $$t = sub { $called ++; !1 }; delete $INC{'foo.pm'}; # in case another test uses foo eval { require foo }; - is $INCtie::count, 2, # 2nd time for "Can't locate" -- XXX correct? + { local $::TODO = "Will be fixed in a follow up patch"; + is $INCtie::count, 1, 'FETCH is called once on undef scalar-tied @INC elem'; + } is $called, 1, 'sub in scalar-tied @INC elem is called'; () = "$INC[0]"; # force a fetch, so the SV is ROK $INCtie::count = 0; eval { require foo }; - is $INCtie::count, 2, + { local $::TODO = "Will be fixed in a follow up patch"; + is $INCtie::count, 1, 'FETCH is called once on scalar-tied @INC elem holding ref'; + } is $called, 2, 'sub in scalar-tied @INC elem holding ref is called'; $$t = []; $INCtie::count = 0; @@ -311,8 +315,10 @@ SKIP: { $$t = "string"; $INCtie::count = 0; eval { require foo }; - is $INCtie::count, 2, + { local $::TODO = "Will be fixed in a follow up patch"; + is $INCtie::count, 1, 'FETCH called once on scalar-tied @INC elem returning string'; + } } @@ -397,3 +403,13 @@ if ($can_fork) { is ("@::bbblplast", "0 1 2 3 4 5", "All ran with a filter"); } +SKIP:{ + skip "need fork",1 unless $can_fork; + local $::TODO = "Pending"; + fresh_perl_like('@INC=("A",bless({},"Hook"),"D"); ' + .'sub Hook::INCDIR { return "B","C"} ' + .'eval "require Frobnitz" or print $@;', + qr/\(\@INC contains: A Hook=HASH\(0x[A-Fa-f0-9]+\) B C D\)/, + {}, + "Check if INCDIR hook works as expected"); +} diff --git a/t/op/require_errors.t b/t/op/require_errors.t index e20916241aa2..4cc15c66e201 100644 --- a/t/op/require_errors.t +++ b/t/op/require_errors.t @@ -9,7 +9,13 @@ BEGIN { use strict; use warnings; -plan(tests => 59); +plan(tests => 71); + + +# Dedupe @INC. In a future patch we /may/ refuse to process items +# more than once and deduping here will prevent the tests from failing +# should we make that change. +my %seen; @INC = grep {!$seen{$_}++} @INC; my $nonfile = tempfile(); @@ -297,9 +303,126 @@ like $@, qr/^Can't locate \Q$nonsearch\E at/, # Older perls will output "error at line 1". fresh_perl_like( - 'use lib qq(./lib); BEGIN{ unshift @INC, sub { if ($_[1] eq "CannotParse.pm" and !$seen++) { ' + 'use lib qq(./lib); BEGIN{ unshift @INC, ' + .'sub { if ($_[1] eq "CannotParse.pm" and !$seen++) { ' .'eval q(require $_[1]); warn $@; my $code= qq[die qq(error)];' .'open my $fh,"<", q(lib/Dies.pm); return $fh } } } require CannotParse;', qr!\Asyntax error.*?^error at /loader/0x[A-Fa-f0-9]+/CannotParse\.pm line 1\.!ms, { }, 'Inc hooks have the correct cop_file'); } +{ + local $::TODO = "Pending segfault fix"; + # this can segfault or assert prior to @INC hardening. + fresh_perl_like( + 'unshift @INC, sub { *INC=["a","b"] }; ' + .'eval "require Frobnitz" or print $@', + qr!\(\@INC contains: CODE\(0x[A-Fa-f0-9]+\) b\)!, + { }, 'INC hooks do not segfault when overwritten'); +} +{ + local $::TODO = "Pending error message improvement"; + # this is the defined behavior, but in older perls the error message + # would lie and say "contains: a b", which is true in the sense that + # it is the value of @INC after the require, but not the directory + # list that was looked at. + fresh_perl_like( + '@INC = (sub { @INC=("a","b"); () }, "z"); ' + .'eval "require Frobnitz" or print $@', + qr!\(\@INC contains: CODE\(0x[A-Fa-f0-9]+\) b\)!, + { }, 'INC hooks that overwrite @INC continue as expected (skips a and z)'); +} +{ + local $::TODO = "Pending new feature \$INC"; + # as of 5.37.7 + fresh_perl_like( + '@INC = (sub { @INC=qw(a b); undef $INC }, "z"); ' + .'eval "require Frobnitz" or print $@', + qr!\(\@INC contains: CODE\(0x[A-Fa-f0-9]+\) a b\)!, + { }, 'INC hooks that overwrite @INC and undef $INC continue at start'); +} +{ + local $::TODO = "Pending new feature: INCDIR"; + # as of 5.37.7 + fresh_perl_like( + 'sub CB::INCDIR { return "b", "c","d" }; ' + .'@INC = ("a",bless({},"CB"),"e");' + .'eval "require Frobnitz" or print $@', + qr!\(\@INC contains: a CB=HASH\(0x[A-Fa-f0-9]+\) b c d e\)!, + { }, 'INCDIR works as expected'); +} +{ + local $::TODO = "Pending object handling improvements"; + # as of 5.37.7 + fresh_perl_like( + '@INC = ("a",bless({},"CB"),"e");' + .'eval "require Frobnitz" or print $@', + qr!\(\@INC contains: a CB=HASH\(0x[A-Fa-f0-9]+\) e\)!, + { }, 'Objects with no INC or INCDIR method are stringified'); +} +{ + local $::TODO = "Pending object handling improvements"; + # as of 5.37.7 + fresh_perl_like( + '{package CB; use overload qw("")=>sub { "blorg"};} ' + .'@INC = ("a",bless({},"CB"),"e");' + .'eval "require Frobnitz" or print $@', + qr!\(\@INC contains: a blorg e\)!, + { }, 'Objects with overload and no INC or INCDIR method are stringified'); +} +{ + local $::TODO = "Pending object handling improvments"; + # as of 5.37.7 + fresh_perl_like( + '@INC = ("a",bless(sub { warn "blessed sub called" },"CB"),"e");' + .'eval "require Frobnitz" or print $@', + qr!blessed sub called.*\(\@INC contains: a CB=CODE\(0x[a-fA-F0-9]+\) e\)!s, + { }, 'Blessed subs with no hook methods are executed'); +} +{ + local $::TODO = "Pending better error messages (eval)"; + # as of 5.37.7 + fresh_perl_like( + '@INC = ("a",bless(sub { die "blessed sub called" },"CB"),"e");' + .'eval "require Frobnitz" or print $@', + qr!INC sub hook died--halting \@INC search!s, + { }, 'Blessed subs that die produce expected extra message'); +} +{ + local $::TODO = "Pending better error messages (eval)"; + # as of 5.37.7 + fresh_perl_like( + 'sub CB::INC { die "bad mojo" } ' + .'@INC = ("a",bless(sub { die "blessed sub called" },"CB"),"e");' + .'eval "require Frobnitz" or print $@', + qr!bad mojo.*INC method hook died--halting \@INC search!s, + { }, 'Blessed subs with methods call method and produce expected message'); +} +{ + local $::TODO = "Pending object handling improvments"; + # as of 5.37.7 + fresh_perl_like( + '@INC = ("a",[bless([],"CB"),1],"e");' + .'eval "require Frobnitz" or print $@', + qr!Object with arguments in \@INC does not support a hook method!s, + { }, 'Blessed objects with no hook methods in array form produce expected exception'); +} +{ + local $::TODO = "Pending new feature: INCDIR"; + # as of 5.37.7 + fresh_perl_like( + 'sub CB::INCDIR { "i" } sub CB2::INCDIR { }' + .'@INC = ("a",bless(sub{"b"},"CB"),bless(sub{"c"},"CB2"),"e");' + .'eval "require Frobnitz" or print $@', + qr!\(\@INC contains: a CB=CODE\(0x[a-fA-F0-9]+\) i CB2=CODE\(0x[a-fA-F0-9]+\) e\)!s, + { }, 'Blessed subs with INCDIR methods call INCDIR'); +} +{ + local $::TODO = "Pending new feature: INCDIR"; + # as of 5.37.7 + fresh_perl_like( + 'sub CB::INCDIR { return @{$_[2]} }' + .'@INC = ("a",[bless([],"CB"),"b"],"c");' + .'eval "require Frobnitz" or print $@', + qr!\(\@INC contains: a ARRAY\(0x[a-fA-F0-9]+\) CB=ARRAY\(0x[a-fA-F0-9]+\) b c\)!s, + { }, 'INCDIR ref returns are stringified'); +} From a9e7fb11c231293da6780db764c42c4a491207c1 Mon Sep 17 00:00:00 2001 From: Yves Orton Date: Fri, 25 Nov 2022 19:23:51 +0100 Subject: [PATCH 03/12] pp_ctl.c - rename vars to something more descriptive in require_file() "i" and "ar" is a bit too minimal, and I will be refactoring this code somewhat in following patches, so this preps it for future changes. --- pp_ctl.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pp_ctl.c b/pp_ctl.c index fb760ee18324..d9d1a748de4a 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -4260,16 +4260,16 @@ S_require_file(pTHX_ SV *sv) * For searchable paths, just search @INC normally */ if (!tryrsfp && !(errno == EACCES && !path_searchable)) { - AV * const ar = GvAVn(PL_incgv); - SSize_t i; + AV * const inc_ar = GvAVn(PL_incgv); + SSize_t inc_idx; #ifdef VMS if (vms_unixname) #endif { SV *nsv = sv; namesv = newSV_type(SVt_PV); - for (i = 0; i <= AvFILL(ar); i++) { - SV * const dirsv = *av_fetch(ar, i, TRUE); + for (inc_idx = 0; inc_idx <= AvFILL(inc_ar); inc_idx++) { + SV * const dirsv = *av_fetch(inc_ar, inc_idx, TRUE); SvGETMAGIC(dirsv); if (SvROK(dirsv)) { From 88f33c91b1602acaa383c44b41f4de78f481b57e Mon Sep 17 00:00:00 2001 From: Yves Orton Date: Fri, 25 Nov 2022 19:29:38 +0100 Subject: [PATCH 04/12] pp_ctl.c - make ENTER debug data a bit more clear We will have more than one INC hook in the future. --- pp_ctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pp_ctl.c b/pp_ctl.c index d9d1a748de4a..02c67fc91c30 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -4294,7 +4294,7 @@ S_require_file(pTHX_ SV *sv) SvSetSV_nosteal(nsv,sv); } - ENTER_with_name("call_INC"); + ENTER_with_name("call_INC_hook"); SAVETMPS; EXTEND(SP, 2); @@ -4374,7 +4374,7 @@ S_require_file(pTHX_ SV *sv) PUTBACK; FREETMPS; - LEAVE_with_name("call_INC"); + LEAVE_with_name("call_INC_hook"); /* Now re-mortalize it. */ sv_2mortal(filter_cache); From 9d7961ffd458549cb5152749ed777c9305fcbb7f Mon Sep 17 00:00:00 2001 From: Yves Orton Date: Fri, 25 Nov 2022 19:32:29 +0100 Subject: [PATCH 05/12] pp_ctl.c - refetch @INC from *INC after hook The original value may have been freed by the time the hook returns, so we have to refetch it immediately after execution. We also move the declaration into a more minimal scope. --- pp_ctl.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pp_ctl.c b/pp_ctl.c index 02c67fc91c30..1b0be6f19678 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -4260,7 +4260,6 @@ S_require_file(pTHX_ SV *sv) * For searchable paths, just search @INC normally */ if (!tryrsfp && !(errno == EACCES && !path_searchable)) { - AV * const inc_ar = GvAVn(PL_incgv); SSize_t inc_idx; #ifdef VMS if (vms_unixname) @@ -4268,6 +4267,7 @@ S_require_file(pTHX_ SV *sv) { SV *nsv = sv; namesv = newSV_type(SVt_PV); + AV *inc_ar = GvAVn(PL_incgv); for (inc_idx = 0; inc_idx <= AvFILL(inc_ar); inc_idx++) { SV * const dirsv = *av_fetch(inc_ar, inc_idx, TRUE); @@ -4376,6 +4376,13 @@ S_require_file(pTHX_ SV *sv) FREETMPS; LEAVE_with_name("call_INC_hook"); + /* + It is possible that @INC has been replaced and that inc_ar + now points at a freed AV. So we have to refresh it from + the GV to be sure. + */ + inc_ar = GvAVn(PL_incgv); + /* Now re-mortalize it. */ sv_2mortal(filter_cache); From 4b23889803519cdee0e770aa67ca7bd030feedb0 Mon Sep 17 00:00:00 2001 From: Yves Orton Date: Fri, 25 Nov 2022 19:35:38 +0100 Subject: [PATCH 06/12] pp_ctl.c - eval INC hooks, and rethrow errors with more useful message When an INC hook blows up debugging what is going on can be somewhat difficult. This adds some debugging data if the error message does not seem to be customized. --- pp_ctl.c | 35 +++++++++++++++++++++++++++++++---- t/op/require_errors.t | 1 - 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/pp_ctl.c b/pp_ctl.c index 1b0be6f19678..68ee99bd4ad0 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -4307,10 +4307,13 @@ S_require_file(pTHX_ SV *sv) sv_setsv_nomg(l, loader); loader = l; } - if (sv_isobject(loader)) - count = call_method("INC", G_LIST); - else - count = call_sv(loader, G_LIST); + const char *method = NULL; + if (sv_isobject(loader)) { + method = "INC"; + count = call_method(method, G_LIST|G_EVAL); + } else { + count = call_sv(loader, G_LIST|G_EVAL); + } SPAGAIN; if (count > 0) { @@ -4367,6 +4370,30 @@ S_require_file(pTHX_ SV *sv) PERL_SCRIPT_MODE); } SP--; + } else { + SV *errsv= ERRSV; + if (SvTRUE(errsv) && !SvROK(errsv)) { + STRLEN l; + char *pv= SvPV(errsv,l); + /* Heuristic to tell if this error message + * includes the standard line number info: + * check if the line ends in digit dot newline. + * If it does then we add some extra info so + * its obvious this is coming from a hook. + * If it is a user generated error we try to + * leave it alone. l>12 is to ensure the + * other checks are in string, but also + * accounts for "at ... line 1.\n" to a + * certain extent. Really we should check + * further, but this is good enough for back + * compat I think. + */ + if (l>=12 && pv[l-1] == '\n' && pv[l-2] == '.' && isDIGIT(pv[l-3])) + sv_catpvf(errsv, "%s %s hook died--halting @INC search", + method ? method : "INC", + method ? "method" : "sub"); + croak_sv(errsv); + } } /* FREETMPS may free our filter_cache */ diff --git a/t/op/require_errors.t b/t/op/require_errors.t index 4cc15c66e201..32cd6dbd8c32 100644 --- a/t/op/require_errors.t +++ b/t/op/require_errors.t @@ -388,7 +388,6 @@ like $@, qr/^Can't locate \Q$nonsearch\E at/, { }, 'Blessed subs that die produce expected extra message'); } { - local $::TODO = "Pending better error messages (eval)"; # as of 5.37.7 fresh_perl_like( 'sub CB::INC { die "bad mojo" } ' From 012640444841e6662e53db003dbac73a814a9e65 Mon Sep 17 00:00:00 2001 From: Yves Orton Date: Fri, 25 Nov 2022 20:05:24 +0100 Subject: [PATCH 07/12] pp_ctl.c - require_file: truthful errors and tests: ties are called once We need to keep track of what we actually checked. We cannot simply report the state of @INC at the end of the require, as it might have changed, possibly several times during the require. This also accounts for most "silly" stuff that might upset our internal assumptions, for instance where a tie might report one value to the code doing the directory check and another in the error message. We had long standing tests to see that @INC tie elements where called "once" but they actually tested they were called twice despite claiming otherwise. This fixes all of those test so that a tied @INC entry is called exactly once, and whatever it returned the first time is placed in the error message. This includes a change to the require error message, so that where it once said "@INC contains:" it now says "@INC entries checked:". Note this patch requires parent v0.239 to be available (which was done in the previous commit). --- pod/perldelta.pod | 31 +++++++++++++++++++++++++++++++ pp_ctl.c | 28 +++++++++++++++++++++------- t/op/inccode.t | 8 +------- t/op/require_errors.t | 30 ++++++++++++++---------------- 4 files changed, 67 insertions(+), 30 deletions(-) diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 47b3cc0641e2..5aa97e5cc2b9 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -232,6 +232,37 @@ XXX Changes (i.e. rewording) of diagnostic messages go here =item * +The error message that is produced when a C or C statement +fails has been changed. It used to contain the words C<@INC contains:>, +and it used to show the state of C<@INC> *after* the require had +completed and failed. The error message has been changed to say C<@INC +entries checked:> and to reflect the actual directories or hooks that +were executed during the require statement. For example: + + perl -e'push @INC, sub {@INC=()}; eval "require Frobnitz" + or die $@' + Can't locate Frobnitz.pm in @INC (you may need to install the + Frobnitz module) (@INC contains:) at (eval 1) line 1. + +Will change to (with some output elided for clarity): + + perl -e'push @INC, sub {@INC=()}; eval "require Frobnitz" + or die $@' + Can't locate Frobnitz.pm in @INC (you may need to install the + Frobnitz module) (@INC entries checked: + .../site_perl/5.37.7/x86_64-linux .../site_perl/5.37.7 + .../5.37.7/x86_64-linux .../5.37.7 CODE(0x562745e684b8)) + at (eval 1) line 1. + +thus showing the actual directories checked. Code that checks for +C<@INC contains:> in error messages should be hardened against any future +wording changes between the C<@INC> and C<:>, for instance use +C instead of using C or +C in tests as this will ensure both forward +and backward compatibility. + +=item * + XXX Describe change here =back diff --git a/pp_ctl.c b/pp_ctl.c index 68ee99bd4ad0..618349f9a56f 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -4259,6 +4259,7 @@ S_require_file(pTHX_ SV *sv) * * For searchable paths, just search @INC normally */ + AV *inc_checked = (AV*)sv_2mortal((SV*)newAV()); if (!tryrsfp && !(errno == EACCES && !path_searchable)) { SSize_t inc_idx; #ifdef VMS @@ -4269,9 +4270,21 @@ S_require_file(pTHX_ SV *sv) namesv = newSV_type(SVt_PV); AV *inc_ar = GvAVn(PL_incgv); for (inc_idx = 0; inc_idx <= AvFILL(inc_ar); inc_idx++) { - SV * const dirsv = *av_fetch(inc_ar, inc_idx, TRUE); + SV *dirsv = *av_fetch(inc_ar, inc_idx, TRUE); + + if (SvGMAGICAL(dirsv)) { + SvGETMAGIC(dirsv); + dirsv = newSVsv_nomg(dirsv); + } else { + /* on the other hand, since we aren't copying we do need + * to increment */ + SvREFCNT_inc(dirsv); + } + if (!SvOK(dirsv)) + continue; + + av_push(inc_checked, dirsv); - SvGETMAGIC(dirsv); if (SvROK(dirsv)) { int count; SV **svp; @@ -4536,14 +4549,15 @@ S_require_file(pTHX_ SV *sv) DIE(aTHX_ "Can't locate %s: %s: %s", name, tryname, Strerror(saved_errno)); } else { - if (path_searchable) { /* did we lookup @INC? */ - AV * const ar = GvAVn(PL_incgv); + if (path_searchable) { /* did we lookup @INC? */ SSize_t i; SV *const msg = newSVpvs_flags("", SVs_TEMP); SV *const inc = newSVpvs_flags("", SVs_TEMP); - for (i = 0; i <= AvFILL(ar); i++) { + for (i = 0; i <= AvFILL(inc_checked); i++) { + SV **svp= av_fetch(inc_checked, i, TRUE); + if (!svp || !*svp) continue; sv_catpvs(inc, " "); - sv_catsv(inc, *av_fetch(ar, i, TRUE)); + sv_catsv(inc, *svp); } if (memENDPs(name, len, ".pm")) { const char *e = name + len - (sizeof(".pm") - 1); @@ -4597,7 +4611,7 @@ S_require_file(pTHX_ SV *sv) /* diag_listed_as: Can't locate %s */ DIE(aTHX_ - "Can't locate %s in @INC%" SVf " (@INC contains:%" SVf ")", + "Can't locate %s in @INC%" SVf " (@INC entries checked:%" SVf ")", name, msg, inc); } } diff --git a/t/op/inccode.t b/t/op/inccode.t index 6b34284415c5..728855178421 100644 --- a/t/op/inccode.t +++ b/t/op/inccode.t @@ -294,18 +294,14 @@ SKIP: { $$t = sub { $called ++; !1 }; delete $INC{'foo.pm'}; # in case another test uses foo eval { require foo }; - { local $::TODO = "Will be fixed in a follow up patch"; is $INCtie::count, 1, 'FETCH is called once on undef scalar-tied @INC elem'; - } is $called, 1, 'sub in scalar-tied @INC elem is called'; () = "$INC[0]"; # force a fetch, so the SV is ROK $INCtie::count = 0; eval { require foo }; - { local $::TODO = "Will be fixed in a follow up patch"; is $INCtie::count, 1, 'FETCH is called once on scalar-tied @INC elem holding ref'; - } is $called, 2, 'sub in scalar-tied @INC elem holding ref is called'; $$t = []; $INCtie::count = 0; @@ -315,10 +311,8 @@ SKIP: { $$t = "string"; $INCtie::count = 0; eval { require foo }; - { local $::TODO = "Will be fixed in a follow up patch"; is $INCtie::count, 1, 'FETCH called once on scalar-tied @INC elem returning string'; - } } @@ -409,7 +403,7 @@ SKIP:{ fresh_perl_like('@INC=("A",bless({},"Hook"),"D"); ' .'sub Hook::INCDIR { return "B","C"} ' .'eval "require Frobnitz" or print $@;', - qr/\(\@INC contains: A Hook=HASH\(0x[A-Fa-f0-9]+\) B C D\)/, + qr/\(\@INC[\w ]+: A Hook=HASH\(0x[A-Fa-f0-9]+\) B C D\)/, {}, "Check if INCDIR hook works as expected"); } diff --git a/t/op/require_errors.t b/t/op/require_errors.t index 32cd6dbd8c32..6ea300254f15 100644 --- a/t/op/require_errors.t +++ b/t/op/require_errors.t @@ -27,7 +27,7 @@ for my $file ($nonfile, ' ') { require $file; }; - like $@, qr/^Can't locate $file in \@INC \(\@INC contains: \Q@INC\E\) at/, + like $@, qr/^Can't locate $file in \@INC \(\@INC[\w ]+: \Q@INC\E\) at/, "correct error message for require '$file'"; } @@ -91,7 +91,7 @@ for my $file ($nonfile, ' ') { $hint =~ s/\.pm$//; $exp .= " (you may need to install the $hint module)"; } - $exp .= " (\@INC contains: @INC) at"; + $exp .= " (\@INC entries checked: @INC) at"; } else { # undef implies a require which doesn't compile, @@ -139,14 +139,14 @@ eval { require "$nonfile.ph"; }; -like $@, qr/^Can't locate $nonfile\.ph in \@INC \(did you run h2ph\?\) \(\@INC contains: @INC\) at/; +like $@, qr/^Can't locate $nonfile\.ph in \@INC \(did you run h2ph\?\) \(\@INC[\w ]+: @INC\) at/; for my $file ("$nonfile.h", ".h") { eval { require $file }; - like $@, qr/^Can't locate \Q$file\E in \@INC \(change \.h to \.ph maybe\?\) \(did you run h2ph\?\) \(\@INC contains: @INC\) at/, + like $@, qr/^Can't locate \Q$file\E in \@INC \(change \.h to \.ph maybe\?\) \(did you run h2ph\?\) \(\@INC[\w ]+: @INC\) at/, "correct error message for require '$file'"; } @@ -155,7 +155,7 @@ for my $file ("$nonfile.ph", ".ph") { require $file }; - like $@, qr/^Can't locate \Q$file\E in \@INC \(did you run h2ph\?\) \(\@INC contains: @INC\) at/, + like $@, qr/^Can't locate \Q$file\E in \@INC \(did you run h2ph\?\) \(\@INC[\w ]+: @INC\) at/, "correct error message for require '$file'"; } @@ -311,16 +311,14 @@ like $@, qr/^Can't locate \Q$nonsearch\E at/, { }, 'Inc hooks have the correct cop_file'); } { - local $::TODO = "Pending segfault fix"; # this can segfault or assert prior to @INC hardening. fresh_perl_like( 'unshift @INC, sub { *INC=["a","b"] }; ' .'eval "require Frobnitz" or print $@', - qr!\(\@INC contains: CODE\(0x[A-Fa-f0-9]+\) b\)!, + qr!\(\@INC[\w ]+: CODE\(0x[A-Fa-f0-9]+\) b\)!, { }, 'INC hooks do not segfault when overwritten'); } { - local $::TODO = "Pending error message improvement"; # this is the defined behavior, but in older perls the error message # would lie and say "contains: a b", which is true in the sense that # it is the value of @INC after the require, but not the directory @@ -328,7 +326,7 @@ like $@, qr/^Can't locate \Q$nonsearch\E at/, fresh_perl_like( '@INC = (sub { @INC=("a","b"); () }, "z"); ' .'eval "require Frobnitz" or print $@', - qr!\(\@INC contains: CODE\(0x[A-Fa-f0-9]+\) b\)!, + qr!\(\@INC[\w ]+: CODE\(0x[A-Fa-f0-9]+\) b\)!, { }, 'INC hooks that overwrite @INC continue as expected (skips a and z)'); } { @@ -337,7 +335,7 @@ like $@, qr/^Can't locate \Q$nonsearch\E at/, fresh_perl_like( '@INC = (sub { @INC=qw(a b); undef $INC }, "z"); ' .'eval "require Frobnitz" or print $@', - qr!\(\@INC contains: CODE\(0x[A-Fa-f0-9]+\) a b\)!, + qr!\(\@INC[\w ]+: CODE\(0x[A-Fa-f0-9]+\) a b\)!, { }, 'INC hooks that overwrite @INC and undef $INC continue at start'); } { @@ -347,7 +345,7 @@ like $@, qr/^Can't locate \Q$nonsearch\E at/, 'sub CB::INCDIR { return "b", "c","d" }; ' .'@INC = ("a",bless({},"CB"),"e");' .'eval "require Frobnitz" or print $@', - qr!\(\@INC contains: a CB=HASH\(0x[A-Fa-f0-9]+\) b c d e\)!, + qr!\(\@INC[\w ]+: a CB=HASH\(0x[A-Fa-f0-9]+\) b c d e\)!, { }, 'INCDIR works as expected'); } { @@ -356,7 +354,7 @@ like $@, qr/^Can't locate \Q$nonsearch\E at/, fresh_perl_like( '@INC = ("a",bless({},"CB"),"e");' .'eval "require Frobnitz" or print $@', - qr!\(\@INC contains: a CB=HASH\(0x[A-Fa-f0-9]+\) e\)!, + qr!\(\@INC[\w ]+: a CB=HASH\(0x[A-Fa-f0-9]+\) e\)!, { }, 'Objects with no INC or INCDIR method are stringified'); } { @@ -366,7 +364,7 @@ like $@, qr/^Can't locate \Q$nonsearch\E at/, '{package CB; use overload qw("")=>sub { "blorg"};} ' .'@INC = ("a",bless({},"CB"),"e");' .'eval "require Frobnitz" or print $@', - qr!\(\@INC contains: a blorg e\)!, + qr!\(\@INC[\w ]+: a blorg e\)!, { }, 'Objects with overload and no INC or INCDIR method are stringified'); } { @@ -375,7 +373,7 @@ like $@, qr/^Can't locate \Q$nonsearch\E at/, fresh_perl_like( '@INC = ("a",bless(sub { warn "blessed sub called" },"CB"),"e");' .'eval "require Frobnitz" or print $@', - qr!blessed sub called.*\(\@INC contains: a CB=CODE\(0x[a-fA-F0-9]+\) e\)!s, + qr!blessed sub called.*\(\@INC[\w ]+: a CB=CODE\(0x[a-fA-F0-9]+\) e\)!s, { }, 'Blessed subs with no hook methods are executed'); } { @@ -412,7 +410,7 @@ like $@, qr/^Can't locate \Q$nonsearch\E at/, 'sub CB::INCDIR { "i" } sub CB2::INCDIR { }' .'@INC = ("a",bless(sub{"b"},"CB"),bless(sub{"c"},"CB2"),"e");' .'eval "require Frobnitz" or print $@', - qr!\(\@INC contains: a CB=CODE\(0x[a-fA-F0-9]+\) i CB2=CODE\(0x[a-fA-F0-9]+\) e\)!s, + qr!\(\@INC[\w ]+: a CB=CODE\(0x[a-fA-F0-9]+\) i CB2=CODE\(0x[a-fA-F0-9]+\) e\)!s, { }, 'Blessed subs with INCDIR methods call INCDIR'); } { @@ -422,6 +420,6 @@ like $@, qr/^Can't locate \Q$nonsearch\E at/, 'sub CB::INCDIR { return @{$_[2]} }' .'@INC = ("a",[bless([],"CB"),"b"],"c");' .'eval "require Frobnitz" or print $@', - qr!\(\@INC contains: a ARRAY\(0x[a-fA-F0-9]+\) CB=ARRAY\(0x[a-fA-F0-9]+\) b c\)!s, + qr!\(\@INC[\w ]+: a ARRAY\(0x[a-fA-F0-9]+\) CB=ARRAY\(0x[a-fA-F0-9]+\) b c\)!s, { }, 'INCDIR ref returns are stringified'); } From 6c363f64503d9d85a6a18a3c3d6b529663eb578f Mon Sep 17 00:00:00 2001 From: Yves Orton Date: Fri, 25 Nov 2022 20:10:05 +0100 Subject: [PATCH 08/12] pp_ctl.c - support $INC in require_file @INC hook $INC is localized to be the C level index of the loop over the @INC array. At the end of the hook its value is assigned back to the C level loop iterator (inc_idx). This allows a hook to control where in the @INC array the loop should continue, for instance -1 represents "reprocess from the beginning" (and as a convenience so does undef). This can be useful if the @INC array is modified by a hook. Normally we would just "continue along", but this may or may not be the right thing to do, so we let the user decide. --- pp_ctl.c | 18 ++++++++++++++++++ t/op/require_errors.t | 1 - 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/pp_ctl.c b/pp_ctl.c index 618349f9a56f..6872b8c025b9 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -4307,6 +4307,9 @@ S_require_file(pTHX_ SV *sv) SvSetSV_nosteal(nsv,sv); } + SV * inc_idx_sv = save_scalar(PL_incgv); + sv_setiv(inc_idx_sv,inc_idx); + ENTER_with_name("call_INC_hook"); SAVETMPS; EXTEND(SP, 2); @@ -4412,6 +4415,21 @@ S_require_file(pTHX_ SV *sv) /* FREETMPS may free our filter_cache */ SvREFCNT_inc_simple_void(filter_cache); + /* + Let the hook override which @INC entry we visit + next by setting $INC to a different value than it + was before we called the hook. If they have + completely rewritten the array they might want us + to start traversing from the beginning, which is + represented by -1. We use undef as an equivalent of + -1. This can't be used as a way to call a hook + twice, as we still dedupe. + We have to do this before we LEAVE, as we localized + $INC before we called the hook. + */ + inc_idx_sv = GvSVn(PL_incgv); + inc_idx = SvOK(inc_idx_sv) ? SvIV(inc_idx_sv) : -1; + PUTBACK; FREETMPS; LEAVE_with_name("call_INC_hook"); diff --git a/t/op/require_errors.t b/t/op/require_errors.t index 6ea300254f15..849d57fc325b 100644 --- a/t/op/require_errors.t +++ b/t/op/require_errors.t @@ -330,7 +330,6 @@ like $@, qr/^Can't locate \Q$nonsearch\E at/, { }, 'INC hooks that overwrite @INC continue as expected (skips a and z)'); } { - local $::TODO = "Pending new feature \$INC"; # as of 5.37.7 fresh_perl_like( '@INC = (sub { @INC=qw(a b); undef $INC }, "z"); ' From 2a59bf1d31c90d149473426e75383b246d359472 Mon Sep 17 00:00:00 2001 From: Yves Orton Date: Fri, 25 Nov 2022 20:32:53 +0100 Subject: [PATCH 09/12] pp_ctl.c - handle objects in @INC a bit more gracefully If an object doesn't have an INC hook then don't call it. Either simply stringify the ref (think overloads), OR, if it is a blessed coderef, then just execute it like it was an unblessed coderef. Also handle when an object is passed as the first argument of the array form of call. Previously this would throw an exception as the first argument on the stack when we call_method() would not be blessed. When this is the scenario we pass in the array as the third argument to the method. --- pod/perldelta.pod | 5 +++++ pod/perldiag.pod | 8 ++++++++ pp_ctl.c | 47 +++++++++++++++++++++++++++++++++++-------- t/op/require_errors.t | 5 ----- 4 files changed, 52 insertions(+), 13 deletions(-) diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 5aa97e5cc2b9..dc7cd8343db4 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -210,6 +210,11 @@ and New Warnings =item * +L + +=item * + XXX L =back diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 6b3d132a23ef..6e538cf70266 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -4461,6 +4461,14 @@ and you mentioned a variable that starts with 0 that has more than one digit. You probably want to remove the leading 0, or if the intent was to express a variable name in octal you should convert to decimal. +=item Object with arguments in @INC does not support a hook method + +(F) You pushed an array reference hook into C<@INC> which has an object +as the first argument, but the object doesn't support any known hooks. +Since you used the array form of creating a hook, you should have supplied +an object that supports either the C or C methods. You +could also use a coderef instead of an object. + =item Octal number > 037777777777 non-portable (W portable) The octal number you specified is larger than 2**32-1 diff --git a/pp_ctl.c b/pp_ctl.c index 6872b8c025b9..6c3a34b20c94 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -4307,25 +4307,55 @@ S_require_file(pTHX_ SV *sv) SvSetSV_nosteal(nsv,sv); } + const char *method = NULL; SV * inc_idx_sv = save_scalar(PL_incgv); sv_setiv(inc_idx_sv,inc_idx); + if (sv_isobject(loader)) { + /* if it is an object and it has an INC method, then + * call the method. + */ + HV *pkg = SvSTASH(SvRV(loader)); + GV * gv = gv_fetchmethod_pvn_flags(pkg, "INC", 3, 0); + if (gv && isGV(gv)) { + method = "INC"; + } + /* But if we have no method, check if this is a + * coderef, if it is then we treat it as an + * unblessed coderef would be treated: we + * execute it. If it is some other and it is in + * an array ref wrapper, then really we don't + * know what to do with it, (why use the + * wrapper?) and we throw an exception to help + * debug. If it is not in a wrapper assume it + * has an overload and treat it as a string. + * Maybe in the future we can detect if it does + * have overloading and throw an error if not. + */ + if (!method) { + if (SvTYPE(SvRV(loader)) != SVt_PVCV) { + if (dirsv != loader) + croak("Object with arguments in @INC does not support a hook method"); + else + goto treat_as_string; + } + } + } ENTER_with_name("call_INC_hook"); SAVETMPS; - EXTEND(SP, 2); - + EXTEND(SP, 2 + ((method && (loader != dirsv)) ? 1 : 0)); PUSHMARK(SP); - PUSHs(dirsv); + PUSHs(method ? loader : dirsv); /* always use the object for method calls */ PUSHs(nsv); + if (method && (loader != dirsv)) /* add the args array for method calls */ + PUSHs(dirsv); PUTBACK; if (SvGMAGICAL(loader)) { SV *l = sv_newmortal(); sv_setsv_nomg(l, loader); loader = l; } - const char *method = NULL; - if (sv_isobject(loader)) { - method = "INC"; + if (method) { count = call_method(method, G_LIST|G_EVAL); } else { count = call_sv(loader, G_LIST|G_EVAL); @@ -4482,12 +4512,13 @@ S_require_file(pTHX_ SV *sv) filter_sub = NULL; } } - else if (path_searchable) { + else + treat_as_string: + if (path_searchable) { /* match against a plain @INC element (non-searchable * paths are only matched against refs in @INC) */ const char *dir; STRLEN dirlen; - if (SvOK(dirsv)) { dir = SvPV_nomg_const(dirsv, dirlen); } else { diff --git a/t/op/require_errors.t b/t/op/require_errors.t index 849d57fc325b..b4d07016d790 100644 --- a/t/op/require_errors.t +++ b/t/op/require_errors.t @@ -348,7 +348,6 @@ like $@, qr/^Can't locate \Q$nonsearch\E at/, { }, 'INCDIR works as expected'); } { - local $::TODO = "Pending object handling improvements"; # as of 5.37.7 fresh_perl_like( '@INC = ("a",bless({},"CB"),"e");' @@ -357,7 +356,6 @@ like $@, qr/^Can't locate \Q$nonsearch\E at/, { }, 'Objects with no INC or INCDIR method are stringified'); } { - local $::TODO = "Pending object handling improvements"; # as of 5.37.7 fresh_perl_like( '{package CB; use overload qw("")=>sub { "blorg"};} ' @@ -367,7 +365,6 @@ like $@, qr/^Can't locate \Q$nonsearch\E at/, { }, 'Objects with overload and no INC or INCDIR method are stringified'); } { - local $::TODO = "Pending object handling improvments"; # as of 5.37.7 fresh_perl_like( '@INC = ("a",bless(sub { warn "blessed sub called" },"CB"),"e");' @@ -376,7 +373,6 @@ like $@, qr/^Can't locate \Q$nonsearch\E at/, { }, 'Blessed subs with no hook methods are executed'); } { - local $::TODO = "Pending better error messages (eval)"; # as of 5.37.7 fresh_perl_like( '@INC = ("a",bless(sub { die "blessed sub called" },"CB"),"e");' @@ -394,7 +390,6 @@ like $@, qr/^Can't locate \Q$nonsearch\E at/, { }, 'Blessed subs with methods call method and produce expected message'); } { - local $::TODO = "Pending object handling improvments"; # as of 5.37.7 fresh_perl_like( '@INC = ("a",[bless([],"CB"),1],"e");' From ff72302a3c28539b9b183c9937184f670ea06936 Mon Sep 17 00:00:00 2001 From: Yves Orton Date: Fri, 25 Nov 2022 20:38:04 +0100 Subject: [PATCH 10/12] pp_ctl.c - add support for an INCDIR hook This hook returns a list of directories for Perl to search. If it returns an empty list it acts like a no-op (except for the error message). The return from INCDIR is always stringified, they are not treated the same as normal @INC entries so no hooks returning hooks. --- pp_ctl.c | 80 ++++++++++++++++++++++++++++++++++++++++--- t/op/inccode.t | 1 - t/op/require_errors.t | 3 -- 3 files changed, 76 insertions(+), 8 deletions(-) diff --git a/pp_ctl.c b/pp_ctl.c index 6c3a34b20c94..e5db2ba1541a 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -4266,11 +4266,32 @@ S_require_file(pTHX_ SV *sv) if (vms_unixname) #endif { - SV *nsv = sv; + AV *incdir_av = (AV*)sv_2mortal((SV*)newAV()); + SV *nsv = sv; /* non const copy we can change if necessary */ namesv = newSV_type(SVt_PV); AV *inc_ar = GvAVn(PL_incgv); - for (inc_idx = 0; inc_idx <= AvFILL(inc_ar); inc_idx++) { - SV *dirsv = *av_fetch(inc_ar, inc_idx, TRUE); + SSize_t incdir_continue_inc_idx = -1; + + for ( + inc_idx = 0; + (AvFILL(incdir_av)>=0 /* we have INCDIR items pending */ + || inc_idx <= AvFILL(inc_ar)); /* @INC entries remain */ + inc_idx++ + ) { + SV *dirsv; + + /* do we have any pending INCDIR items? */ + if (AvFILL(incdir_av)>=0) { + /* yep, shift it out */ + dirsv = av_shift(incdir_av); + if (AvFILL(incdir_av)<0) { + /* incdir is now empty, continue from where + * we left off after we process this entry */ + inc_idx = incdir_continue_inc_idx; + } + } else { + dirsv = *av_fetch(inc_ar, inc_idx, TRUE); + } if (SvGMAGICAL(dirsv)) { SvGETMAGIC(dirsv); @@ -4289,6 +4310,7 @@ S_require_file(pTHX_ SV *sv) int count; SV **svp; SV *loader = dirsv; + UV diruv = PTR2UV(SvRV(dirsv)); if (SvTYPE(SvRV(loader)) == SVt_PVAV && !SvOBJECT(SvRV(loader))) @@ -4298,7 +4320,7 @@ S_require_file(pTHX_ SV *sv) } Perl_sv_setpvf(aTHX_ namesv, "/loader/0x%" UVxf "/%s", - PTR2UV(SvRV(dirsv)), name); + diruv, name); tryname = SvPVX_const(namesv); tryrsfp = NULL; @@ -4308,6 +4330,7 @@ S_require_file(pTHX_ SV *sv) } const char *method = NULL; + bool is_incdir = FALSE; SV * inc_idx_sv = save_scalar(PL_incgv); sv_setiv(inc_idx_sv,inc_idx); if (sv_isobject(loader)) { @@ -4318,6 +4341,12 @@ S_require_file(pTHX_ SV *sv) GV * gv = gv_fetchmethod_pvn_flags(pkg, "INC", 3, 0); if (gv && isGV(gv)) { method = "INC"; + } else { + gv = gv_fetchmethod_pvn_flags(pkg, "INCDIR", 6, 0); + if (gv && isGV(gv)) { + method = "INCDIR"; + is_incdir = TRUE; + } } /* But if we have no method, check if this is a * coderef, if it is then we treat it as an @@ -4367,6 +4396,48 @@ S_require_file(pTHX_ SV *sv) SV *arg; SP -= count - 1; + + if (is_incdir) { + /* push the stringified returned items into the + * incdir_av array for processing immediately + * afterwards. we deliberately stringify or copy + * "special" arguments, so that overload logic for + * instance applies, but so that the end result is + * stable. We speficially do *not* support returning + * coderefs from an INCDIR call. */ + while (count-->0) { + arg = SP[i++]; + SvGETMAGIC(arg); + if (!SvOK(arg)) + continue; + if (SvROK(arg)) { + STRLEN l; + char *pv = SvPV(arg,l); + arg = newSVpvn(pv,l); + } + else if (SvGMAGICAL(arg)) { + arg = newSVsv_nomg(arg); + } + else { + SvREFCNT_inc(arg); + } + av_push(incdir_av, arg); + } + /* We copy $INC into incdir_continue_inc_idx + * so that when we finish processing the items + * we just inserted into incdir_av we can continue + * as though we had just finished executing the INCDIR + * hook. We honour $INC here just like we would for + * an INC hook, the hook might have rewritten @INC + * at the same time as returning something to us. + */ + inc_idx_sv = GvSVn(PL_incgv); + incdir_continue_inc_idx = SvOK(inc_idx_sv) + ? SvIV(inc_idx_sv) : -1; + + goto done_hook; + } + arg = SP[i++]; if (SvROK(arg) && (SvTYPE(SvRV(arg)) <= SVt_PVLV) @@ -4415,6 +4486,7 @@ S_require_file(pTHX_ SV *sv) tryrsfp = PerlIO_open(BIT_BUCKET, PERL_SCRIPT_MODE); } + done_hook: SP--; } else { SV *errsv= ERRSV; diff --git a/t/op/inccode.t b/t/op/inccode.t index 728855178421..26b6993f299f 100644 --- a/t/op/inccode.t +++ b/t/op/inccode.t @@ -399,7 +399,6 @@ if ($can_fork) { } SKIP:{ skip "need fork",1 unless $can_fork; - local $::TODO = "Pending"; fresh_perl_like('@INC=("A",bless({},"Hook"),"D"); ' .'sub Hook::INCDIR { return "B","C"} ' .'eval "require Frobnitz" or print $@;', diff --git a/t/op/require_errors.t b/t/op/require_errors.t index b4d07016d790..a7ce7c1cefce 100644 --- a/t/op/require_errors.t +++ b/t/op/require_errors.t @@ -338,7 +338,6 @@ like $@, qr/^Can't locate \Q$nonsearch\E at/, { }, 'INC hooks that overwrite @INC and undef $INC continue at start'); } { - local $::TODO = "Pending new feature: INCDIR"; # as of 5.37.7 fresh_perl_like( 'sub CB::INCDIR { return "b", "c","d" }; ' @@ -398,7 +397,6 @@ like $@, qr/^Can't locate \Q$nonsearch\E at/, { }, 'Blessed objects with no hook methods in array form produce expected exception'); } { - local $::TODO = "Pending new feature: INCDIR"; # as of 5.37.7 fresh_perl_like( 'sub CB::INCDIR { "i" } sub CB2::INCDIR { }' @@ -408,7 +406,6 @@ like $@, qr/^Can't locate \Q$nonsearch\E at/, { }, 'Blessed subs with INCDIR methods call INCDIR'); } { - local $::TODO = "Pending new feature: INCDIR"; # as of 5.37.7 fresh_perl_like( 'sub CB::INCDIR { return @{$_[2]} }' From 9cb789966e9b6acee596f1878324fcc4c97e7d03 Mon Sep 17 00:00:00 2001 From: Yves Orton Date: Fri, 25 Nov 2022 20:39:42 +0100 Subject: [PATCH 11/12] pp_ctl.c - move logic closer to where it is used No sense doing something that has not effect. --- pp_ctl.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pp_ctl.c b/pp_ctl.c index e5db2ba1541a..07dd227b4fab 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -4319,11 +4319,6 @@ S_require_file(pTHX_ SV *sv) SvGETMAGIC(loader); } - Perl_sv_setpvf(aTHX_ namesv, "/loader/0x%" UVxf "/%s", - diruv, name); - tryname = SvPVX_const(namesv); - tryrsfp = NULL; - if (SvPADTMP(nsv)) { nsv = sv_newmortal(); SvSetSV_nosteal(nsv,sv); @@ -4370,6 +4365,11 @@ S_require_file(pTHX_ SV *sv) } } + Perl_sv_setpvf(aTHX_ namesv, "/loader/0x%" UVxf "/%s", + diruv, name); + tryname = SvPVX_const(namesv); + tryrsfp = NULL; + ENTER_with_name("call_INC_hook"); SAVETMPS; EXTEND(SP, 2 + ((method && (loader != dirsv)) ? 1 : 0)); From 9db465e391d25c4461e1df92826c0d16cfbe3a69 Mon Sep 17 00:00:00 2001 From: Yves Orton Date: Tue, 6 Dec 2022 12:33:11 +0100 Subject: [PATCH 12/12] pod/ - document $INC and INCDIR --- pod/perldelta.pod | 31 +++++++++++ pod/perlfunc.pod | 131 +++++++++++++++++++++++++++++++++++++--------- pod/perlvar.pod | 17 ++++++ 3 files changed, 153 insertions(+), 26 deletions(-) diff --git a/pod/perldelta.pod b/pod/perldelta.pod index dc7cd8343db4..f2bd3d00b63a 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -35,6 +35,37 @@ the caller provided an undefined or false value (respectively), rather than simply when the parameter is missing entirely. For more detail see the documentation in L. +=head2 @INC Hook Enhancements and $INC and INCDIR + +The internals for C<@INC> hooks have been hardened to handle various +edge cases and should no longer segfault or throw assert failures when +hooks modify C<@INC> during a require operation. As part of this we +now ensure that any given hook is executed at most once during a require +call, and that any duplicate directories do not trigger additional +directories probes. + +To provide developers more control over dynamic module lookup a new hook +method C is now supported. An object supporting this method may be +injected into the C<@INC> array, and when it is encountered in the module +search process it will be executed, just like how INC hooks are executed, +and its return value used as a list of directories to search for the +module. Returning an empty list acts as a no-op. Note that any references +returned by this hook will be stringified and used as strings, you may not +return a hook to be executed later via this API. + +When an C<@INC> hook (either C or C) is called during +require the C<$INC> variable will be localized to be the value of the +index of C<@INC> that the hook came from. If the hook wishes to override +what the "next" index in C<@INC> should be it may update C<$INC> to be one +less than the desired index (C is equivalent to C<-1>). This +allows an C<@INC> hook to completely rewrite the C<@INC> array and have +perl restart its directory probes from the beginning of C<@INC>. + +Blessed CODE references in C<@INC> that do not support the C or +C methods will no longer trigger an exception, and instead will +be treated the same as unblessed coderefs are, and executed as though +they were an C hook. + =head1 Security XXX Any security-related notices go here. In particular, any security diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod index 39f749977dd4..81cd68eb1e0c 100644 --- a/pod/perlfunc.pod +++ b/pod/perlfunc.pod @@ -6716,8 +6716,14 @@ would have semantics similar to the following: croak "Compilation failed in require"; } - foreach $prefix (@INC) { - if (ref($prefix)) { + local $INC; + # this type of loop lets a hook overwrite $INC if they wish + for($INC = 0; $INC < @INC; $INC++) { + my $prefix = $INC[$INC]; + if (!defined $prefix) { + next; + } + if (ref $prefix) { #... do other stuff - see text below .... } # (see text below about possible appending of .pmc @@ -6800,16 +6806,20 @@ F<.pmc> extension. If this file is found, it will be loaded in place of any file ending in a F<.pm> extension. This applies to both the explicit C form and the C form. -You can also insert hooks into the import facility by putting Perl code -directly into the L|perlvar/@INC> array. There are three forms -of hooks: subroutine references, array references, and blessed objects. +You can also insert hooks into the import facility by putting Perl +coderefs or objects directly into the L|perlvar/@INC> array. +There are two types of hooks, INC filters, and INCDIR hooks, and there +are three forms of representing a hook: subroutine references, array +references, and blessed objects. Subroutine references are the simplest case. When the inclusion system -walks through L|perlvar/@INC> and encounters a subroutine, this -subroutine gets called with two parameters, the first a reference to -itself, and the second the name of the file to be included (e.g., -F). The subroutine should return either nothing or else a -list of up to four values in the following order: +walks through L|perlvar/@INC> and encounters a subroutine, unless +this subroutine is blessed and supports an INCDIR hook this +subroutine will be assumed to be an INC hook will be called with two +parameters, the first a reference to itself, and the second the name of +the file to be included (e.g., F). The subroutine should +return either nothing or else a list of up to four values in the +following order: =over @@ -6848,10 +6858,37 @@ Note that this filehandle must be a real filehandle (strictly a typeglob or reference to a typeglob, whether blessed or unblessed); tied filehandles will be ignored and processing will stop there. -If the hook is an array reference, its first element must be a subroutine -reference. This subroutine is called as above, but the first parameter is -the array reference. This lets you indirectly pass arguments to -the subroutine. +If the hook is an object, it should provide an C or C +method that will be called as above, the first parameter being the +object itself. If it does not provide either method, and the object is +not CODE ref then an exception will be thrown, otherwise it will simply +be executed like an unblessed CODE ref would. Note that you must fully +qualify the method name when you declare an C sub (unlike the +C sub), as the unqualified symbol C is always forced into +package C
. Here is a typical code layout for an C hook: + + # In Foo.pm + package Foo; + sub new { ... } + sub Foo::INC { + my ($self, $filename) = @_; + ... + } + + # In the main program + push @INC, Foo->new(...); + +If the hook is an array reference, its first element must be a +subroutine reference or an object as described above. When the first +element is an object that supports an C or C method then +the method will be called with the object as the first argument, the +filename requested as the second, and the hook array reference as the +the third. When the first element is a subroutine then it will be +called with the array as the first argument, and the filename as the +second, no third parameter will be passed in. In both forms you can +modify the contents of the array to provide state between calls, or +whatever you like. + In other words, you can write: @@ -6871,24 +6908,66 @@ or: ... } -If the hook is an object, it must provide an C method that will be -called as above, the first parameter being the object itself. (Note that -you must fully qualify the sub's name, as unqualified C is always forced -into package C
.) Here is a typical code layout: +or: - # In Foo.pm - package Foo; - sub new { ... } - sub Foo::INC { - my ($self, $filename) = @_; + push @INC, [ HookObj->new(), $x, $y, ... ]; + sub HookObj::INC { + my ($self, $filename, $arrayref)= @_; + my (undef, @parameters) = @$arrayref; ... } - # In the main program - push @INC, Foo->new(...); - These hooks are also permitted to set the L|perlvar/%INC> entry corresponding to the files they have loaded. See L. +Should an C hook not do this then perl will set the C<%INC> entry +to be the hook reference itself. + +A hook may also be used to rewrite the C<@INC> array. While this might +sound strange, there are situations where it can be very useful to do +this. Such hooks usually just return undef and do not mix filtering and +C<@INC> modifications. While in older versions of perl having a hook +modify C<@INC> was fraught with issues and could even result in +segfaults or assert failures, as of 5.37.7 the logic has been made much +more robust and the hook now has control over the loop iteration if it +wishes to do so. + +There is a now a facility to control the iterator for the C<@INC> array +traversal that is performed during require. The C<$INC> variable will be +initialized with the index of the currently executing hook. Once the +hook returns the next slot in C<@INC> that will be checked will be the +integer successor of value in C<$INC> (or -1 if it is undef). For example +the following code + + push @INC, sub { + splice @INC, $INC, 1; # remove this hook from @INC + unshift @INC, sub { warn "A" }; + undef $INC; # reset the $INC iterator so we + # execute the newly installed sub + # immediately. + }; + +would install a sub into C<@INC> that when executed as a hook (by for +instance a require of a file that does not exist), the hook will splice +itself out of C<@INC>, and add a new sub to the front that will warn +whenever someone does a require operation that requires an C<@INC> +search, and then immediately execute that hook. + +Prior to 5.37.7, there was no way to cause perl to use the newly +installed hook immediately, or to inspect any changed items in C<@INC> to +the left of the iterator, and so the warning would only be generated on +the second call to require. In more recent perl the presence of the last +statement which undefines C<$INC> will cause perl to restart the +traversal of the C<@INC> array at the beginning and execute the newly +installed sub immediately. + +Whatever value C<$INC> held, if any, will be restored at the end of the +require. Any changes made to C<$INC> during the lifetime of the hook +will be unrolled after the hook exits, and its value only has meaning +immediately after execution of the hook, thus setting C<$INC> to some +value prior to executing a C will have no effect on how the +require executes at all. + +As of 5.37.7 C<@INC> values of undef will be silently ignored. For a yet-more-powerful import facility, see L|/use Module VERSION LIST> and L. diff --git a/pod/perlvar.pod b/pod/perlvar.pod index e74bf4dae5ab..86ea6fc9cc52 100644 --- a/pod/perlvar.pod +++ b/pod/perlvar.pod @@ -542,6 +542,23 @@ by default inserted into C<%INC> in place of a filename. Note, however, that the hook may have set the C<%INC> entry by itself to provide some more specific info. +=item $INC +X<$INC> + +As of 5.37.7 when an C<@INC> hook is executed the index of the C<@INC> +array that holds the hook will be localized into the C<$INC> variable. +When the hook returns the integer successor of its value will be used to +determine the next index in C<@INC> that will be checked, thus if it is +set to -1 (or C) the traversal over the C<@INC> array will be +restarted from its beginning. + +Normally traversal through the C<@INC> array is from beginning to end +(C<0 .. $#INC>), and if the C<@INC> array is modified by the hook the +iterator may be left in a state where newly added entries are skipped. +Changing this value allows an C<@INC> hook to rewrite the C<@INC> array +and tell Perl where to continue afterwards. See L for +details on C<@INC> hooks. + =item $INPLACE_EDIT =item $^I