-
Notifications
You must be signed in to change notification settings - Fork 601
Part 1 of implementing RFC0013 (join) #20503
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -657,16 +657,67 @@ Magic and tainting are handled. | |
| =cut | ||
| */ | ||
|
|
||
| PERL_STATIC_INLINE SV * | ||
| S_do_join_inner(pTHX_ SV *lhs, SV *rhs) | ||
| { | ||
| PERL_ARGS_ASSERT_DO_JOIN_INNER; | ||
|
|
||
| /* lhs has already had SvGETMAGIC called */ | ||
| if (SvGAMAGIC(rhs)) { | ||
| SvGETMAGIC(rhs); | ||
| rhs = sv_mortalcopy_flags(rhs, 0); | ||
| } | ||
|
|
||
| int applies_flags = 0; | ||
| /* LD_LIBRARY_PATH=`pwd` perf stat ./perl -e ' | ||
| my @x = ("a" x 100) x 1000; | ||
| for my $i (0 .. 10000) { my $z = join(",", @x); }' | ||
| * We speed this up by 20% (about) with a check for both ROK and RMG, | ||
| * because those must be set if an SV has any overload. */ | ||
| U32 flags = SvFLAGS(lhs)|SvFLAGS(rhs); | ||
| if (UNLIKELY((flags & (SVf_ROK|SVs_GMG))) && | ||
| UNLIKELY(amagic_applies(lhs, concat_amg, applies_flags) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and then replace this call to amagic_applies() with lhs_has_mg. This will halve the number of times this code calls amagic_applies() for a given string. |
||
| || amagic_applies(rhs, concat_amg, applies_flags))) { | ||
|
|
||
| SV *tmpsv = amagic_call(lhs, rhs, concat_amg, 0); | ||
| assert(tmpsv); | ||
| if (!tmpsv) | ||
| croak("panic: do_join_inner expected amagic_call to succeed\n"); | ||
| return tmpsv; | ||
| } | ||
|
|
||
| /* We speed this up another 10% (about) using sv_catpvn_flags instead of | ||
| * sv_catsv_nomg(lhs, rhs). Passing in the *delims to reduce calls | ||
| * to SvPV_nomg_const for the delim made no measurable difference. */ | ||
| STRLEN len; | ||
| const char * const s = SvPV_nomg_const(rhs, len); | ||
| sv_catpvn_flags(lhs, s, len, DO_UTF8(rhs) ? SV_CATUTF8 : SV_CATBYTES); | ||
book marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| return lhs; | ||
| } | ||
|
|
||
| void | ||
| Perl_do_join(pTHX_ SV *sv, SV *delim, SV **mark, SV **sp) | ||
| { | ||
| PERL_ARGS_ASSERT_DO_JOIN; | ||
|
|
||
| SV ** const oldmark = mark; | ||
| SV *orig_sv = sv; | ||
| if (SvGAMAGIC(delim)) { | ||
| SvGETMAGIC(delim); | ||
| delim = sv_mortalcopy_flags(delim, 0); | ||
| } | ||
| int applies_flags = 0; | ||
| bool delim_has_concat = amagic_applies(delim, concat_amg, applies_flags); | ||
| I32 items = sp - mark; | ||
| STRLEN len; | ||
| STRLEN delimlen; | ||
| const char * const delims = SvPV_const(delim, delimlen); | ||
| const char * const delims = SvPV_nomg_const(delim, delimlen); | ||
|
|
||
| PERL_ARGS_ASSERT_DO_JOIN; | ||
| /* stringify once and use that unless the delim has concat_amg */ | ||
| if (!delim_has_concat) { | ||
| delim = newSVpvn_flags(delims, delimlen, SVs_TEMP | SvUTF8(delim)); | ||
| } | ||
|
|
||
| mark++; | ||
| len = (items > 0 ? (delimlen * (items - 1) ) : 0); | ||
|
|
@@ -696,30 +747,27 @@ Perl_do_join(pTHX_ SV *sv, SV *delim, SV **mark, SV **sp) | |
|
|
||
| if (items-- > 0) { | ||
| if (*mark) | ||
| sv_catsv(sv, *mark); | ||
| sv = do_join_inner(sv, *mark); | ||
| mark++; | ||
| } | ||
|
|
||
| if (delimlen) { | ||
| const U32 delimflag = DO_UTF8(delim) ? SV_CATUTF8 : SV_CATBYTES; | ||
| for (; items > 0; items--,mark++) { | ||
| STRLEN len; | ||
| const char *s; | ||
| sv_catpvn_flags(sv,delims,delimlen,delimflag); | ||
| s = SvPV_const(*mark,len); | ||
| sv_catpvn_flags(sv,s,len, | ||
| DO_UTF8(*mark) ? SV_CATUTF8 : SV_CATBYTES); | ||
| sv = do_join_inner(sv, delim); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Notice that here... |
||
| sv = do_join_inner(sv, *mark); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ... here ...
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it would also be interesting to see if the compiler did anything different with or |
||
| } | ||
| } | ||
| else { | ||
| for (; items > 0; items--,mark++) | ||
| { | ||
| STRLEN len; | ||
| const char *s = SvPV_const(*mark,len); | ||
| sv_catpvn_flags(sv,s,len, | ||
| DO_UTF8(*mark) ? SV_CATUTF8 : SV_CATBYTES); | ||
| for (; items > 0; items--,mark++) { | ||
| sv = do_join_inner(sv, *mark); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and here we call do_join_inner() with the same SV as the lhs. So we can avoid the need to check it for magic each time we call by passing that state in as a flag.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't the reason we assign the sv each time is that the return value may not be the same sv, especially in the case where the first element didn't have overload concat magic, but the second element does, after which the lhs gets the concat magic? Checking once would break that, right?
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, magic isnt viral like that. See comment below. |
||
| } | ||
| } | ||
|
|
||
| if (sv != orig_sv) { | ||
| sv_setsv_nomg(orig_sv, sv); | ||
| sv = orig_sv; | ||
| } | ||
|
|
||
| SvSETMAGIC(sv); | ||
| } | ||
|
|
||
|
|
||
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,173 @@ | ||
| use v5.36; | ||
|
|
||
| BEGIN { | ||
| chdir 't' if -d 't'; | ||
| @INC = '../lib'; | ||
| require './test.pl'; | ||
| } | ||
|
|
||
| # This class: | ||
| # - adds * around non-whitespace "words" when stringified | ||
| # - assimilates strings when concatenated (i.e. always return an object) | ||
|
|
||
| package BoldStars; | ||
|
|
||
| use overload | ||
| '""' => sub { | ||
| return ${ $_[0] } =~ s{(\S+)}{*$1*}gr; | ||
| }, | ||
| '.' => sub { | ||
| my ( $left, $right, $swapped ) = @_; | ||
| if ( ref $right eq __PACKAGE__ ) { | ||
| return __PACKAGE__->new( $swapped ? "$$right$$left" : "$$left$$right" ); | ||
| } | ||
| else { | ||
| return __PACKAGE__->new( $swapped ? "$right$$left" : "$$left$right" ); | ||
| } | ||
| }, | ||
| fallback => 1, | ||
| ; | ||
|
|
||
| sub new { | ||
| my ( $class, $string ) = @_; | ||
| return bless \$string, $class; | ||
| } | ||
|
|
||
| package TiedCounter { | ||
| sub TIESCALAR { my $class = shift; bless [''], $class } | ||
| sub FETCH { $_[0][0] .= "-"; return BoldStars->new( $_[0][0] ) } | ||
| } | ||
|
|
||
|
|
||
| # test script starts here | ||
|
|
||
| package main; | ||
| use Data::Dumper; | ||
| use List::Util qw( reduce ); | ||
|
|
||
| # test our test class behaves as expected | ||
| my $kayo = BoldStars->new('kayo'); | ||
| my $biff = BoldStars->new('biff'); | ||
| my $ouch = BoldStars->new('ouch'); | ||
| my $sock = BoldStars->new('sock'); | ||
|
|
||
| is( "$kayo", "*kayo*", 'BoldStars stringification' ); | ||
| is( $kayo . " zamm", "*kayo* *zamm*", 'BoldStars concatenation' ); | ||
| is( "zamm " . $kayo, "*zamm* *kayo*", 'BoldStars concatenation (swapped)' ); | ||
| is( "$kayo + $biff", "*kayo* *+* *biff*", 'BoldStars stringification' ); | ||
|
|
||
| my $obj = "$kayo "; | ||
| is( $obj, '*kayo* ', 'Contatenation with space (right)' ); | ||
| isa_ok( $obj, 'BoldStars' ); | ||
|
|
||
| $obj = " $kayo"; | ||
| is( $obj, ' *kayo*', 'Contatenation with space (left)' ); | ||
| isa_ok( $obj, 'BoldStars' ); | ||
|
|
||
| $obj = $kayo . $biff; | ||
| is( "$obj", "*kayobiff*", "BoldStars concatenation" ); | ||
| isa_ok( $obj, 'BoldStars' ); | ||
|
|
||
| # test that join uses concat overload | ||
| { | ||
| my $expected = BoldStars->new('kayo biff'); | ||
| my $expected_str = "$expected"; | ||
|
|
||
| my $interpolated = "$kayo $biff"; | ||
| is( $interpolated, $expected, 'interpolation (space)' ); | ||
| is( $interpolated, $expected_str, 'interpolation str (space)' ); | ||
| isa_ok( $interpolated, 'BoldStars' ); | ||
|
|
||
| my $reduced = reduce { $a . ' ' . $b } $kayo, $biff; | ||
| is( $reduced, $expected, 'reduce (space)' ); | ||
| is( $reduced, $expected_str, 'reduce str (space)' ); | ||
| isa_ok( $reduced, 'BoldStars' ); | ||
|
|
||
| my $joined = join( ' ', $kayo, $biff ); | ||
| is( $joined, $expected, "join (space)" ); | ||
| is( $joined, $expected_str, "join str (space)" ); | ||
| isa_ok( $joined, 'BoldStars' ); | ||
| } | ||
|
|
||
| { | ||
| my $expected = BoldStars->new('kayo + sock'); | ||
| my $expected_str = "$expected"; | ||
|
|
||
| my $interpolated = "$kayo + $sock"; | ||
| is( $interpolated, $expected, 'interpolation (non-space)' ); | ||
| is( $interpolated, $expected_str, 'interpolation str (non-space)' ); | ||
| isa_ok( $interpolated, 'BoldStars' ); | ||
|
|
||
| my $reduced = reduce { $a . ' + ' . $b } $kayo, $sock; | ||
| is( $reduced, $expected, 'reduce (non-space)' ); | ||
| is( $reduced, $expected_str, 'reduce str (non-space)' ); | ||
| isa_ok( $reduced, 'BoldStars' ); | ||
|
|
||
| my $joined = join( ' + ', $kayo, $sock ); | ||
| is( $joined, $expected, "join (non-space)" ); | ||
| is( $joined, $expected_str, "join str (non-space)" ); | ||
| isa_ok( $joined, 'BoldStars' ); | ||
|
|
||
| } | ||
|
|
||
| # join with a overloaded delim | ||
| { | ||
| my $expected = BoldStars->new('ouch + sock'); | ||
| my $expected_str = "$expected"; | ||
|
|
||
| my $delim = BoldStars->new(' + '); | ||
| my $interpolated = "ouch${delim}sock"; | ||
| is( $interpolated, $expected, 'interpolation (overload-delim)' ); | ||
| is( $interpolated, $expected_str, 'interpolation str (overload-delim)' ); | ||
| isa_ok( $interpolated, 'BoldStars' ); | ||
|
|
||
| my $reduced = reduce { $a . $delim . $b } 'ouch', 'sock'; | ||
| is( $reduced, $expected, 'reduce (overload-delim)' ); | ||
| is( $reduced, $expected_str, 'reduce str (overload-delim)' ); | ||
| isa_ok( $reduced, 'BoldStars' ); | ||
|
|
||
| my $joined = join( $delim, 'ouch', 'sock' ); | ||
| is( $joined, $expected, "join (overload-delim)" ); | ||
| is( $joined, $expected_str, "join str (overload-delim)" ); | ||
| isa_ok( $joined, 'BoldStars' ); | ||
| } | ||
|
|
||
| # join with overloaded delim *and* list values | ||
| { | ||
| my $expected = BoldStars->new('kayo + sock'); | ||
| my $expected_str = "$expected"; | ||
|
|
||
| my $delim = BoldStars->new(' + '); | ||
| my $interpolated = "$kayo${delim}sock"; | ||
| is( $interpolated, $expected, 'interpolation (delim and list)' ); | ||
| is( $interpolated, $expected_str, 'interpolation str (delim and list)' ); | ||
| isa_ok( $interpolated, 'BoldStars' ); | ||
|
|
||
| my $reduced = reduce { $a . $delim . $b } $kayo, 'sock'; | ||
| is( $reduced, $expected, 'reduce (delim and list)' ); | ||
| is( $reduced, $expected_str, 'reduce str (delim and list)' ); | ||
| isa_ok( $reduced, 'BoldStars' ); | ||
|
|
||
| my $joined = join( $delim, $kayo, 'sock' ); | ||
| is( $joined, $expected, "join (delim and list)" ); | ||
| is( $joined, $expected_str, "join str (delim and list)" ); | ||
| isa_ok( $joined, 'BoldStars' ); | ||
| } | ||
|
|
||
| # tied overload as the delim should run FETCH only once | ||
| { | ||
| tie my $delim, "TiedCounter"; | ||
| my $joined = join( $delim, 'whiz', 'bang', 'biff' ); | ||
| is( $joined, "*whiz-bang-biff*", "joined tied" ); | ||
| } | ||
|
|
||
| # tied overload as list item | ||
| { | ||
| tie my $dashes, "TiedCounter"; | ||
| my $joined = join( '.', 'whiz', $dashes, 'bang' ); | ||
| is( $joined, "*whiz.-.bang*", "joined tied in list" ); | ||
| } | ||
|
|
||
| done_testing; | ||
|
|
||
| __END__ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I recommend adding a flag parameter, lhs_has_mg, which is preinitialized before calling do_join_inner() with: