Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -5778,6 +5778,7 @@ t/lib/mypragma.pm An example user pragma
t/lib/mypragma.t Test the example user pragma
t/lib/no_load.t Test that some modules don't load others
t/lib/overload_fallback.t Test that using overload 2x in a scope doesn't clobber fallback
t/lib/overload_join.t Test that join supports concat overload
t/lib/overload_nomethod.t Test that nomethod works as expected
t/lib/proxy_constant_subs.t Test that Proxy Constant Subs behave correctly
t/lib/Sans_mypragma.pm Test module for t/lib/mypragma.t
Expand Down
80 changes: 64 additions & 16 deletions doop.c
Original file line number Diff line number Diff line change
Expand Up @@ -657,16 +657,67 @@ Magic and tainting are handled.
=cut
*/

PERL_STATIC_INLINE SV *
S_do_join_inner(pTHX_ SV *lhs, SV *rhs)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I recommend adding a flag parameter, lhs_has_mg, which is preinitialized before calling do_join_inner() with:

lhs_has_mg = amagic_applies(lhs, concat_amg, applies_flags);

{
PERL_ARGS_ASSERT_DO_JOIN_INNER;

/* lhs has already had SvGETMAGIC called */
if (SvGAMAGIC(rhs)) {
SvGETMAGIC(rhs);
rhs = sv_mortalcopy_flags(rhs, 0);
}

int applies_flags = 0;
/* LD_LIBRARY_PATH=`pwd` perf stat ./perl -e '
my @x = ("a" x 100) x 1000;
for my $i (0 .. 10000) { my $z = join(",", @x); }'
* We speed this up by 20% (about) with a check for both ROK and RMG,
* because those must be set if an SV has any overload. */
U32 flags = SvFLAGS(lhs)|SvFLAGS(rhs);
if (UNLIKELY((flags & (SVf_ROK|SVs_GMG))) &&
UNLIKELY(amagic_applies(lhs, concat_amg, applies_flags)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and then replace this call to amagic_applies() with lhs_has_mg.

This will halve the number of times this code calls amagic_applies() for a given string.

|| amagic_applies(rhs, concat_amg, applies_flags))) {

SV *tmpsv = amagic_call(lhs, rhs, concat_amg, 0);
assert(tmpsv);
if (!tmpsv)
croak("panic: do_join_inner expected amagic_call to succeed\n");
return tmpsv;
}

/* We speed this up another 10% (about) using sv_catpvn_flags instead of
* sv_catsv_nomg(lhs, rhs). Passing in the *delims to reduce calls
* to SvPV_nomg_const for the delim made no measurable difference. */
STRLEN len;
const char * const s = SvPV_nomg_const(rhs, len);
sv_catpvn_flags(lhs, s, len, DO_UTF8(rhs) ? SV_CATUTF8 : SV_CATBYTES);

return lhs;
}

void
Perl_do_join(pTHX_ SV *sv, SV *delim, SV **mark, SV **sp)
{
PERL_ARGS_ASSERT_DO_JOIN;

SV ** const oldmark = mark;
SV *orig_sv = sv;
if (SvGAMAGIC(delim)) {
SvGETMAGIC(delim);
delim = sv_mortalcopy_flags(delim, 0);
}
int applies_flags = 0;
bool delim_has_concat = amagic_applies(delim, concat_amg, applies_flags);
I32 items = sp - mark;
STRLEN len;
STRLEN delimlen;
const char * const delims = SvPV_const(delim, delimlen);
const char * const delims = SvPV_nomg_const(delim, delimlen);

PERL_ARGS_ASSERT_DO_JOIN;
/* stringify once and use that unless the delim has concat_amg */
if (!delim_has_concat) {
delim = newSVpvn_flags(delims, delimlen, SVs_TEMP | SvUTF8(delim));
}

mark++;
len = (items > 0 ? (delimlen * (items - 1) ) : 0);
Expand Down Expand Up @@ -696,30 +747,27 @@ Perl_do_join(pTHX_ SV *sv, SV *delim, SV **mark, SV **sp)

if (items-- > 0) {
if (*mark)
sv_catsv(sv, *mark);
sv = do_join_inner(sv, *mark);
mark++;
}

if (delimlen) {
const U32 delimflag = DO_UTF8(delim) ? SV_CATUTF8 : SV_CATBYTES;
for (; items > 0; items--,mark++) {
STRLEN len;
const char *s;
sv_catpvn_flags(sv,delims,delimlen,delimflag);
s = SvPV_const(*mark,len);
sv_catpvn_flags(sv,s,len,
DO_UTF8(*mark) ? SV_CATUTF8 : SV_CATBYTES);
sv = do_join_inner(sv, delim);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Notice that here...

sv = do_join_inner(sv, *mark);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

... here ...

Copy link
Collaborator

@demerphq demerphq Jan 13, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it would also be interesting to see if the compiler did anything different with

sv = do_join_inner(do_join_inner(sv, delim),*mark);

or

sv = do_join_inner(do_join_inner(sv, delim,lhs_has_mg),*mark,lhs_has_mg);

}
}
else {
for (; items > 0; items--,mark++)
{
STRLEN len;
const char *s = SvPV_const(*mark,len);
sv_catpvn_flags(sv,s,len,
DO_UTF8(*mark) ? SV_CATUTF8 : SV_CATBYTES);
for (; items > 0; items--,mark++) {
sv = do_join_inner(sv, *mark);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and here we call do_join_inner() with the same SV as the lhs. So we can avoid the need to check it for magic each time we call by passing that state in as a flag.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't the reason we assign the sv each time is that the return value may not be the same sv, especially in the case where the first element didn't have overload concat magic, but the second element does, after which the lhs gets the concat magic? Checking once would break that, right?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, magic isnt viral like that. See comment below.

}
}

if (sv != orig_sv) {
sv_setsv_nomg(orig_sv, sv);
sv = orig_sv;
}

SvSETMAGIC(sv);
}

Expand Down
2 changes: 2 additions & 0 deletions embed.fnc
Original file line number Diff line number Diff line change
Expand Up @@ -4031,6 +4031,8 @@ S |IO * |openn_setup |NN GV *gv \
|NN char *savetype
#endif /* defined(PERL_IN_DOIO_C) */
#if defined(PERL_IN_DOOP_C)
Ri |SV * |do_join_inner |NN SV *lhs \
|NN SV *rhs
RS |Size_t |do_trans_complex \
|NN SV * const sv \
|NN const OPtrans_map * const tbl
Expand Down
1 change: 1 addition & 0 deletions embed.h
Original file line number Diff line number Diff line change
Expand Up @@ -1160,6 +1160,7 @@
# define openn_setup(a,b,c,d,e,f) S_openn_setup(aTHX_ a,b,c,d,e,f)
# endif /* defined(PERL_IN_DOIO_C) */
# if defined(PERL_IN_DOOP_C)
# define do_join_inner(a,b) S_do_join_inner(aTHX_ a,b)
# define do_trans_complex(a,b) S_do_trans_complex(aTHX_ a,b)
# define do_trans_count(a,b) S_do_trans_count(aTHX_ a,b)
# define do_trans_count_invmap(a,b) S_do_trans_count_invmap(aTHX_ a,b)
Expand Down
6 changes: 6 additions & 0 deletions pod/perldelta.pod
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ here, but most should go in the L</Performance Enhancements> section.

[ List each enhancement as a =head2 entry ]

=head2 Support for overloaded objects in join()

As per Perl RFC 0013, join() now correctly handles arguments with concat
(C<.>) overload, meaning that C<join( $delim, @list )> behaves exactly
like C<reduce { $a . $delim . $b } @list>.

=head1 Security

XXX Any security-related notices go here. In particular, any security
Expand Down
9 changes: 9 additions & 0 deletions proto.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

173 changes: 173 additions & 0 deletions t/lib/overload_join.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
use v5.36;

BEGIN {
chdir 't' if -d 't';
@INC = '../lib';
require './test.pl';
}

# This class:
# - adds * around non-whitespace "words" when stringified
# - assimilates strings when concatenated (i.e. always return an object)

package BoldStars;

use overload
'""' => sub {
return ${ $_[0] } =~ s{(\S+)}{*$1*}gr;
},
'.' => sub {
my ( $left, $right, $swapped ) = @_;
if ( ref $right eq __PACKAGE__ ) {
return __PACKAGE__->new( $swapped ? "$$right$$left" : "$$left$$right" );
}
else {
return __PACKAGE__->new( $swapped ? "$right$$left" : "$$left$right" );
}
},
fallback => 1,
;

sub new {
my ( $class, $string ) = @_;
return bless \$string, $class;
}

package TiedCounter {
sub TIESCALAR { my $class = shift; bless [''], $class }
sub FETCH { $_[0][0] .= "-"; return BoldStars->new( $_[0][0] ) }
}


# test script starts here

package main;
use Data::Dumper;
use List::Util qw( reduce );

# test our test class behaves as expected
my $kayo = BoldStars->new('kayo');
my $biff = BoldStars->new('biff');
my $ouch = BoldStars->new('ouch');
my $sock = BoldStars->new('sock');

is( "$kayo", "*kayo*", 'BoldStars stringification' );
is( $kayo . " zamm", "*kayo* *zamm*", 'BoldStars concatenation' );
is( "zamm " . $kayo, "*zamm* *kayo*", 'BoldStars concatenation (swapped)' );
is( "$kayo + $biff", "*kayo* *+* *biff*", 'BoldStars stringification' );

my $obj = "$kayo ";
is( $obj, '*kayo* ', 'Contatenation with space (right)' );
isa_ok( $obj, 'BoldStars' );

$obj = " $kayo";
is( $obj, ' *kayo*', 'Contatenation with space (left)' );
isa_ok( $obj, 'BoldStars' );

$obj = $kayo . $biff;
is( "$obj", "*kayobiff*", "BoldStars concatenation" );
isa_ok( $obj, 'BoldStars' );

# test that join uses concat overload
{
my $expected = BoldStars->new('kayo biff');
my $expected_str = "$expected";

my $interpolated = "$kayo $biff";
is( $interpolated, $expected, 'interpolation (space)' );
is( $interpolated, $expected_str, 'interpolation str (space)' );
isa_ok( $interpolated, 'BoldStars' );

my $reduced = reduce { $a . ' ' . $b } $kayo, $biff;
is( $reduced, $expected, 'reduce (space)' );
is( $reduced, $expected_str, 'reduce str (space)' );
isa_ok( $reduced, 'BoldStars' );

my $joined = join( ' ', $kayo, $biff );
is( $joined, $expected, "join (space)" );
is( $joined, $expected_str, "join str (space)" );
isa_ok( $joined, 'BoldStars' );
}

{
my $expected = BoldStars->new('kayo + sock');
my $expected_str = "$expected";

my $interpolated = "$kayo + $sock";
is( $interpolated, $expected, 'interpolation (non-space)' );
is( $interpolated, $expected_str, 'interpolation str (non-space)' );
isa_ok( $interpolated, 'BoldStars' );

my $reduced = reduce { $a . ' + ' . $b } $kayo, $sock;
is( $reduced, $expected, 'reduce (non-space)' );
is( $reduced, $expected_str, 'reduce str (non-space)' );
isa_ok( $reduced, 'BoldStars' );

my $joined = join( ' + ', $kayo, $sock );
is( $joined, $expected, "join (non-space)" );
is( $joined, $expected_str, "join str (non-space)" );
isa_ok( $joined, 'BoldStars' );

}

# join with a overloaded delim
{
my $expected = BoldStars->new('ouch + sock');
my $expected_str = "$expected";

my $delim = BoldStars->new(' + ');
my $interpolated = "ouch${delim}sock";
is( $interpolated, $expected, 'interpolation (overload-delim)' );
is( $interpolated, $expected_str, 'interpolation str (overload-delim)' );
isa_ok( $interpolated, 'BoldStars' );

my $reduced = reduce { $a . $delim . $b } 'ouch', 'sock';
is( $reduced, $expected, 'reduce (overload-delim)' );
is( $reduced, $expected_str, 'reduce str (overload-delim)' );
isa_ok( $reduced, 'BoldStars' );

my $joined = join( $delim, 'ouch', 'sock' );
is( $joined, $expected, "join (overload-delim)" );
is( $joined, $expected_str, "join str (overload-delim)" );
isa_ok( $joined, 'BoldStars' );
}

# join with overloaded delim *and* list values
{
my $expected = BoldStars->new('kayo + sock');
my $expected_str = "$expected";

my $delim = BoldStars->new(' + ');
my $interpolated = "$kayo${delim}sock";
is( $interpolated, $expected, 'interpolation (delim and list)' );
is( $interpolated, $expected_str, 'interpolation str (delim and list)' );
isa_ok( $interpolated, 'BoldStars' );

my $reduced = reduce { $a . $delim . $b } $kayo, 'sock';
is( $reduced, $expected, 'reduce (delim and list)' );
is( $reduced, $expected_str, 'reduce str (delim and list)' );
isa_ok( $reduced, 'BoldStars' );

my $joined = join( $delim, $kayo, 'sock' );
is( $joined, $expected, "join (delim and list)" );
is( $joined, $expected_str, "join str (delim and list)" );
isa_ok( $joined, 'BoldStars' );
}

# tied overload as the delim should run FETCH only once
{
tie my $delim, "TiedCounter";
my $joined = join( $delim, 'whiz', 'bang', 'biff' );
is( $joined, "*whiz-bang-biff*", "joined tied" );
}

# tied overload as list item
{
tie my $dashes, "TiedCounter";
my $joined = join( '.', 'whiz', $dashes, 'bang' );
is( $joined, "*whiz.-.bang*", "joined tied in list" );
}

done_testing;

__END__