diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 062eef0480db..7ff321cdfb9d 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -348,7 +348,119 @@ well. =item * -XXX +Reading the string form of an integer value no longer sets the flag C. +The string form is still cached internally, and still re-read directly by the +macros C I (inline, without calling a C function). XS code that +already the APIs to get values will not be affected by this change. XS code +that accesses flags directly instead of using API calls to express its intent +I break, but such code likely is already buggy if passed some other +values, such as floating point values or objects with string overloading. + +This small change permits code (such as JSON serealisers) to reliably determine +between + +=over 4 + +=item * + +a value that was initially B as an integer, but then B as a string + + my $answer = 42; + print "The answer is $answer\n"; + +=item * + +that same value that was initially B as a string, but then B as an integer + + my $answer = "42"; + print "That doesn't look right\n" + unless $answer == 6 * 9; + +=back + +For the first case (originally written as an integer), we now have: + + use Devel::Peek; + my $answer = 42; + Dump ($answer); + my $void = "$answer"; + print STDERR "\n"; + Dump($answer) + + + SV = IV(0x562538925778) at 0x562538925788 + REFCNT = 1 + FLAGS = (IOK,pIOK) + IV = 42 + + SV = PVIV(0x5625389263c0) at 0x562538925788 + REFCNT = 1 + FLAGS = (IOK,pIOK,pPOK) + IV = 42 + PV = 0x562538919b50 "42"\0 + CUR = 2 + LEN = 10 + +For the second (originally written as a string), we now have: + + use Devel::Peek; + my $answer = "42"; + Dump ($answer); + my $void = $answer == 6 * 9; + print STDERR "\n"; + Dump($answer)' + + + SV = PV(0x5586ffe9bfb0) at 0x5586ffec0788 + REFCNT = 1 + FLAGS = (POK,IsCOW,pPOK) + PV = 0x5586ffee7fd0 "42"\0 + CUR = 2 + LEN = 10 + COW_REFCNT = 1 + + SV = PVIV(0x5586ffec13c0) at 0x5586ffec0788 + REFCNT = 1 + FLAGS = (IOK,POK,IsCOW,pIOK,pPOK) + IV = 42 + PV = 0x5586ffee7fd0 "42"\0 + CUR = 2 + LEN = 10 + COW_REFCNT = 1 + +(One can't rely on the presence or absence of the flag C to +determine the history of operations on a scalar.) + +Previously both cases would be indistinguishable, with all 4 flags set: + + SV = PVIV(0x55d4d62edaf0) at 0x55d4d62f0930 + REFCNT = 1 + FLAGS = (IOK,POK,pIOK,pPOK) + IV = 42 + PV = 0x55d4d62e1740 "42"\0 + CUR = 2 + LEN = 10 + +(and possibly C, but not always) + +This now means that if XS code I needs to determine which form a value +was first written as, it should implement logic roughly + + if (flags & SVf_IOK|SVf_NOK) && !(flags & SVf_POK) + serealise as number + else if (flags & SVf_POK) + serealise as string + else + the existing guesswork ... + +Note that this doesn't cover "dualvars" - scalars that report different +values when asked for their string form or number form (such as C<$!>). +Most serialisation formats cannot represent such duplicity. + +I remains because as well as dualvars, values might +be C, references, overloaded references, typeglobs and other things that +Perl itself can represent but do not map one-to-one into external formats, so +need some amount of approximation or encapsulation. =back diff --git a/sv.c b/sv.c index b199e1a94735..eed290d15bb9 100644 --- a/sv.c +++ b/sv.c @@ -3141,7 +3141,19 @@ Perl_sv_2pv_flags(pTHX_ SV *const sv, STRLEN *const lp, const U32 flags) Move(ptr, s, len, char); s += len; *s = '\0'; - SvPOK_on(sv); + /* We used to call SvPOK_on(). Whilst this is fine for (most) Perl code, + it means that after this stringification is cached, there is no way + to distinguish between values originally assigned as $a = 42; and + $a = "42"; (or results of string operators vs numeric operators) + where the value has subsequently been used in the other "context" + and had a value cached. + This (somewhat) hack means that we retain the cached stringification, + but the existing SvPV() macros end up entering this function (which + returns the cached value) instead of using it directly inline. + However, the result is that if a value is SVf_IOK|SVf_POK then it + originated as "42", whereas if it's SVf_IOK then it originated as 42. + (ignore SVp_IOK and SVp_POK) */ + SvPOKp_on(sv); } else if (SvNOK(sv)) { if (SvTYPE(sv) < SVt_PVNV) @@ -4697,6 +4709,12 @@ Perl_sv_setsv_flags(pTHX_ SV *dsv, SV* ssv, const I32 flags) SvIV_set(dsv, SvIVX(ssv)); if (sflags & SVf_IVisUV) SvIsUV_on(dsv); + if ((sflags & SVf_IOK) && !(sflags & SVf_POK)) { + /* Source was SVf_IOK|SVp_IOK|SVp_POK but not SVf_POK, meaning + an value set as an integer and later stringified. So mark + destination the same: */ + SvFLAGS(dsv) &= ~SVf_POK; + } } SvFLAGS(dsv) |= sflags & (SVf_IOK|SVp_IOK|SVf_NOK|SVp_NOK|SVf_UTF8); { diff --git a/sv.h b/sv.h index 17d376cb161f..44520cf55e6f 100644 --- a/sv.h +++ b/sv.h @@ -1844,19 +1844,22 @@ scalar. #define SvPV_const(sv, len) SvPV_flags_const(sv, len, SV_GMAGIC) #define SvPV_mutable(sv, len) SvPV_flags_mutable(sv, len, SV_GMAGIC) -#define SvPV_flags(sv, len, flags) \ - (SvPOK_nog(sv) \ +#define SvPOK_or_cached_IV(sv) \ + (((SvFLAGS(sv) & (SVf_POK|SVs_GMG)) == SVf_POK) || ((SvFLAGS(sv) & (SVf_IOK|SVp_POK|SVs_GMG)) == (SVf_IOK|SVp_POK))) + +#define SvPV_flags(sv, len, flags) \ + (SvPOK_or_cached_IV(sv) \ ? ((len = SvCUR(sv)), SvPVX(sv)) : sv_2pv_flags(sv, &len, flags)) #define SvPV_flags_const(sv, len, flags) \ - (SvPOK_nog(sv) \ + (SvPOK_or_cached_IV(sv) \ ? ((len = SvCUR(sv)), SvPVX_const(sv)) : \ (const char*) sv_2pv_flags(sv, &len, (flags|SV_CONST_RETURN))) #define SvPV_flags_const_nolen(sv, flags) \ - (SvPOK_nog(sv) \ + (SvPOK_or_cached_IV(sv) \ ? SvPVX_const(sv) : \ (const char*) sv_2pv_flags(sv, 0, (flags|SV_CONST_RETURN))) #define SvPV_flags_mutable(sv, len, flags) \ - (SvPOK_nog(sv) \ + (SvPOK_or_cached_IV(sv) \ ? ((len = SvCUR(sv)), SvPVX_mutable(sv)) : \ sv_2pv_flags(sv, &len, (flags|SV_MUTABLE_RETURN))) @@ -1881,16 +1884,16 @@ scalar. : sv_pvn_force_flags(sv, &len, flags|SV_MUTABLE_RETURN)) #define SvPV_nolen(sv) \ - (SvPOK_nog(sv) \ + (SvPOK_or_cached_IV(sv) \ ? SvPVX(sv) : sv_2pv_flags(sv, 0, SV_GMAGIC)) /* "_nomg" in these defines means no mg_get() */ #define SvPV_nomg_nolen(sv) \ - (SvPOK_nog(sv) \ + (SvPOK_or_cached_IV(sv) \ ? SvPVX(sv) : sv_2pv_flags(sv, 0, 0)) #define SvPV_nolen_const(sv) \ - (SvPOK_nog(sv) \ + (SvPOK_or_cached_IV(sv) \ ? SvPVX_const(sv) : sv_2pv_flags(sv, 0, SV_GMAGIC|SV_CONST_RETURN)) #define SvPV_nomg(sv, len) SvPV_flags(sv, len, 0) diff --git a/t/porting/known_pod_issues.dat b/t/porting/known_pod_issues.dat index 2641fb9ce040..27883839a48b 100644 --- a/t/porting/known_pod_issues.dat +++ b/t/porting/known_pod_issues.dat @@ -389,6 +389,7 @@ pod/perlandroid.pod Verbatim line length including indents exceeds 78 by 3 pod/perlbook.pod Verbatim line length including indents exceeds 78 by 1 pod/perldebguts.pod Verbatim line length including indents exceeds 78 by 24 pod/perldebtut.pod Verbatim line length including indents exceeds 78 by 2 +pod/perldelta.pod line containing nothing but whitespace in paragraph 6 pod/perldtrace.pod Verbatim line length including indents exceeds 78 by 7 pod/perlgit.pod ? Should you be using F<...> or maybe L<...> instead of 3 pod/perlgit.pod Verbatim line length including indents exceeds 78 by 1