From c885aa2beb87b082941c129fd6dedf0b994320fa Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Tue, 11 Nov 2025 11:06:50 +0000 Subject: [PATCH 1/3] propagate correct ref context to both ?: branches GH #18669 In something like @{ expr } = ... the expression is expected to return an array ref. If the expression is something like $h{foo}, then the helem op needs to know both that: - it is in lvalue context, so should autovivify the foo element if not present; - it is in array ref context, so it should autovivify the value to an empty array ref, rather than just to undef. The function Perl_doref() is used to propagate this ref context at compile time, e.g. by setting the OPf_MOD and OPpDEREF_AV flags on the OP_HELEM op. My commit v5.31.1-87-ge9b0092a10 made this function non-recursive (so that deep expressions wouldn't SEGV during compilation), but introduced a bug when the expression included the ternary condition operator, '?:'. In particular, since '?:' is the only OP where doref() needs to recurse down *two* branches, I made the function just iterate down the tree, and then have special handling for OP_COND_EXPR. This involved, once having finished iterating down the tree, to work back up the tree looking for OP_COND_EXPR nodes, and if found, iterate back down the second branch. This had a fatal flaw: a 'type' variable indicated what context to apply. For example in @{$h{expr}} = ..., type would start off as OP_RV2AV, but as the tree was walked, would change to OP_HELEM and then to OP_RV2HV. When walking back up the tree, this value wasn't being restored. The specific bug in the ticket boiled down to something like @{ $cond ? $h{p} : $h{q} } = ...; where the correct OPpDEREF_AV flag was being set on the first helem op, but an incorrect OPpDEREF_HV on the second. Since I can't think of anything better, the fix in this commit restores some limited recursion to doref(). Namely, for an OP_COND_EXPR op, it now recurses down that op's first branch, then after it returns, iterates as normal down the second branch. Thus extremely deeply nested ternary code like: @{ $c1 ? $c2 ? $c3 ? .... } ... could start to SEGV during compilation again. --- op.c | 26 +++++++++++--------------- t/op/ref.t | 43 ++++++++++++++++++++++++++++++++++++++++++- t/run/todo.t | 35 ----------------------------------- 3 files changed, 53 insertions(+), 51 deletions(-) diff --git a/op.c b/op.c index 37062d237ee5..6af254cefdee 100644 --- a/op.c +++ b/op.c @@ -3774,7 +3774,13 @@ Perl_doref(pTHX_ OP *o, I32 type, bool set_op_ref) break; case OP_COND_EXPR: + /* OP_COND_EXPR is the only op where we have to propagate + * context to *both* branches. Recurse on the first branch, + * then iterate on the second branch. + */ o = OpSIBLING(cUNOPo->op_first); + doref(o, type, set_op_ref); + o = OpSIBLING(o); continue; case OP_RV2SV: @@ -3847,22 +3853,12 @@ Perl_doref(pTHX_ OP *o, I32 type, bool set_op_ref) break; } /* switch */ - while (1) { - if (o == top_op) - return scalar(top_op); /* at top; no parents/siblings to try */ - if (OpHAS_SIBLING(o)) { - o = o->op_sibparent; - /* Normally skip all siblings and go straight to the parent; - * the only op that requires two children to be processed - * is OP_COND_EXPR */ - if (!OpHAS_SIBLING(o) - && o->op_sibparent->op_type == OP_COND_EXPR) - break; - continue; - } - o = o->op_sibparent; /* try parent's next sibling */ - } + /* whole tree has been scanned for ref stuff; now propagate + * scalar context */ + return scalar(top_op); + } /* while */ + } diff --git a/t/op/ref.t b/t/op/ref.t index 3cf6ab047259..a44cff17dc23 100644 --- a/t/op/ref.t +++ b/t/op/ref.t @@ -8,7 +8,7 @@ BEGIN { use strict qw(refs subs); -plan(257); +plan(265); # Test this first before we extend the stack with other operations. # This caused an asan failure due to a bad write past the end of the stack. @@ -913,6 +913,47 @@ EOF 'rt#130861: heap uaf in pp_rv2sv'); } +# GH 18669 +# The correct autovivification lvalue ref context should be propagated to +# both branches of a ?:. So in something like: +# @{ $cond ? $h{a} : $h{b} } = ...; +# the helem ops on *both* sides of the conditional should get the DREFAV +# flag set, indicating that if the hash element doesn't exist, it should +# be autovivified as an *array ref*. +# + +{ + my $x = { arr => undef }; + eval { + push(@{ $x->{ decide } ? $x->{ not_here } : $x->{ new } }, "mana"); + }; + + is($@, "", "GH 18669: push on non-existent hash ref entry: no errors"); + is(eval {$x->{new}[0] }, 'mana', + "GH 18669: push on non-existent hash ref entry: autovivifies" + ); + + $x = { arr => undef }; + eval { + push(@{ $x->{ decide } ? $x->{ not_here } : $x->{ arr } }, "mana"); + }; + + is($@, "", "GH 18669: push on undef hash ref entry: no errors"); + is(eval { $x->{arr}[0] }, 'mana', + "GH 18669: push on undef hash ref entry: autovivifies" + ); + + # try both branches + for my $cond (0, 1) { + my %h; + eval { @{ $cond ? $h{p} : $h{q} } = 99; }; + is($@, "", "GH 18669: array assign on $cond cond: no errors"); + is($h{$cond ? 'p' : 'q'}[0], 99, + "GH 18669: array assign on $cond cond: autovivifies" + ); + } +} + # Bit of a hack to make test.pl happy. There are 3 more tests after it leaves. $test = curr_test(); curr_test($test + 3); diff --git a/t/run/todo.t b/t/run/todo.t index dae83f56e7f8..9eb865c1ad6e 100644 --- a/t/run/todo.t +++ b/t/run/todo.t @@ -352,41 +352,6 @@ TODO: { is($?, 0, "No panic; GH 16971"); } -TODO: { - local $::TODO = 'GH 18669'; - - my $x = { arr => undef }; - eval { - push(@{ $x->{ decide } ? $x->{ not_here } : $x->{ new } }, "mana"); - }; - unlike( - $@, - qr/Not an ARRAY reference/, - "push on non-existent hash entry does not throw 'Not an ARRAY reference' error; GH 18669" - ); - is( - eval { $x->{ new }[0] }, - 'mana', - 'push on non-existent hash entry from ternary autovivifies array ref; GH 18669' - ); - - $x = { arr => undef }; - eval { - push(@{ $x->{ decide } ? $x->{ not_here } : $x->{ arr } }, "mana"); - }; - unlike( - $@, - qr/Not an ARRAY reference/, - "push on undef hash entry does not throw 'Not an ARRAY reference' error; GH 18669" - ); - is( - eval { $x->{ arr }[0] }, - 'mana', - 'push on undef hash entry from ternary autovivifies array ref; GH 18669' - ); - -} - TODO: { local $::TODO = 'GH 19378'; fresh_perl_like( From 2faee6b142a7cfa20e67eae0383cd1f8b3c9ed5f Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Tue, 11 Nov 2025 11:57:14 +0000 Subject: [PATCH 2/3] Perl_doref(): eliminate duplicated code This compile-time function propagates lvalue ref context down a chain of ops. It does the same thing (setting OPf_MOD and OPpDEREF_XV flags) in three places. Consolidate this code into a single place. Should be no functional changes. Technically the code is slightly different in that OP_[AH]ELEM now checks for kids before following them, but since they always have kids, this makes no difference (except being infinitesimally slower during compilation). --- op.c | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/op.c b/op.c index 6af254cefdee..1b41db2e3938 100644 --- a/op.c +++ b/op.c @@ -3764,12 +3764,8 @@ Perl_doref(pTHX_ OP *o, I32 type, bool set_op_ref) op_null(cLISTOPx(cUNOPo->op_first)->op_first); o->op_flags |= OPf_SPECIAL; } - else if (type == OP_RV2SV || type == OP_RV2AV || type == OP_RV2HV){ - o->op_private |= (type == OP_RV2AV ? OPpDEREF_AV - : type == OP_RV2HV ? OPpDEREF_HV - : OPpDEREF_SV); - o->op_flags |= OPf_MOD; - } + else + goto set_cxt; break; @@ -3787,14 +3783,19 @@ Perl_doref(pTHX_ OP *o, I32 type, bool set_op_ref) if (type == OP_DEFINED) o->op_flags |= OPf_SPECIAL; /* don't create GV */ /* FALLTHROUGH */ + case OP_AELEM: + case OP_HELEM: case OP_PADSV: + set_cxt: + /* if the parent wants an SV/AV/HV ref, set flags indicating + * that this op should autovivify such a value if need be */ if (type == OP_RV2SV || type == OP_RV2AV || type == OP_RV2HV) { o->op_private |= (type == OP_RV2AV ? OPpDEREF_AV : type == OP_RV2HV ? OPpDEREF_HV : OPpDEREF_SV); o->op_flags |= OPf_MOD; } - if (o->op_flags & OPf_KIDS) { + if (o->op_flags & OPf_KIDS && o->op_type != OP_ENTERSUB) { type = o->op_type; o = cUNOPo->op_first; continue; @@ -3826,18 +3827,6 @@ Perl_doref(pTHX_ OP *o, I32 type, bool set_op_ref) o = cBINOPo->op_first; continue; - case OP_AELEM: - case OP_HELEM: - if (type == OP_RV2SV || type == OP_RV2AV || type == OP_RV2HV) { - o->op_private |= (type == OP_RV2AV ? OPpDEREF_AV - : type == OP_RV2HV ? OPpDEREF_HV - : OPpDEREF_SV); - o->op_flags |= OPf_MOD; - } - type = o->op_type; - o = cBINOPo->op_first; - continue;; - case OP_SCOPE: case OP_LEAVE: set_op_ref = FALSE; From 859c02d48888d2475109ed05a0858f91d64daea9 Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Tue, 11 Nov 2025 12:45:56 +0000 Subject: [PATCH 3/3] Perl_doref(): improve code comments Having just messed with this function, I understand it better, so can comment it better. --- op.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/op.c b/op.c index 1b41db2e3938..d0e481b614f3 100644 --- a/op.c +++ b/op.c @@ -3731,16 +3731,25 @@ S_refkids(pTHX_ OP *o, I32 type) } -/* Apply reference (autovivification) context to the subtree at o. - * For example in - * push @{expression}, ....; - * o will be the head of 'expression' and type will be OP_RV2AV. - * It marks the op o (or a suitable child) as autovivifying, e.g. by - * setting OPf_MOD. - * For OP_RV2AV/OP_PADAV and OP_RV2HV/OP_PADHV sets OPf_REF too if - * set_op_ref is true. +/* doref(): apply reference autovivification context (and scalar and + * lvalue context) to a subtree. For example, in: + * + * @{expression} = ...; + * + * the expression is expected to return an AV ref. If the expression + * is (for example) $h{foo}, then the OP_HELEM op associated with the + * expression needs to be flagged with: + * - OPf_MOD to indicate that it should autovivify if the element + * doesn't exist + * - OPpDEREF_AV to indicate that the autovivified return value should + * be [] rather than undef. + * + * The 'o' parameter is the head of the expression and 'type' is the + * context to apply (OP_RV2AV in the example above). + * + * If 'set_op_ref' is true, it also sets the OPf_REF flag on OP_RV2[AH]V + * and OP_PAD[AH]V ops * - * Also calls scalar(o). */ OP * @@ -3753,11 +3762,16 @@ Perl_doref(pTHX_ OP *o, I32 type, bool set_op_ref) if (PL_parser && PL_parser->error_count) return o; + /* iterate down the tree */ + while (1) { switch (o->op_type) { case OP_ENTERSUB: if ((type == OP_EXISTS || type == OP_DEFINED) && - !(o->op_flags & OPf_STACKED)) { + !(o->op_flags & OPf_STACKED)) + { + /* 'defined &foo' etc: downgrade from a func call + * to just a special CV retrieval */ OpTYPE_set(o, OP_RV2CV); /* entersub => rv2cv */ assert(cUNOPo->op_first->op_type == OP_NULL); /* disable pushmark */ @@ -3796,6 +3810,7 @@ Perl_doref(pTHX_ OP *o, I32 type, bool set_op_ref) o->op_flags |= OPf_MOD; } if (o->op_flags & OPf_KIDS && o->op_type != OP_ENTERSUB) { + /* propagate the context of *this* op to its children */ type = o->op_type; o = cUNOPo->op_first; continue;