Skip to content

Commit

Permalink
Implement the sort pragma. Split sort code from pp_ctl.c
Browse files Browse the repository at this point in the history
to pp_sort.c.  Includes the quicksort stabilizing layer
from John P. Linderman.  -Msort=qsort or -Msort=fast is
faster than without (or with -Msort=mergesort or -Msort=safe)
for short random inputs, but for some reason not quite as fast
as 5.6.1 qsort.  More benchmarking, profiling, tuning, and
optimizing definitely needed.

p4raw-id: //depot/perl@13179
  • Loading branch information
jhi committed Nov 21, 2001
1 parent c857072 commit 84d4ea4
Show file tree
Hide file tree
Showing 18 changed files with 1,795 additions and 781 deletions.
3 changes: 3 additions & 0 deletions MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -1193,6 +1193,8 @@ lib/Shell.t Tests for above
lib/shellwords.pl Perl library to split into words with shell quoting
lib/sigtrap.pm For trapping an abort and giving traceback
lib/sigtrap.t See if sigtrap works
lib/sort.pm For "use sort"
lib/sort.t See if "use sort" works
lib/stat.pl Perl library supporting stat function
lib/strict.pm For "use strict"
lib/strict.t See if strictures work
Expand Down Expand Up @@ -1963,6 +1965,7 @@ pp.sym Push/Pop code symbols
pp_ctl.c Push/Pop code for control flow
pp_hot.c Push/Pop code for heavily used opcodes
pp_pack.c Push/Pop code for pack/unpack
pp_sort.c Push/Pop code for sort
pp_proto.h C++ definitions for Push/Pop code
pp_sys.c Push/Pop code for system interaction
proto.h Prototypes
Expand Down
4 changes: 2 additions & 2 deletions Makefile.SH
Original file line number Diff line number Diff line change
Expand Up @@ -287,13 +287,13 @@ h = $(h1) $(h2) $(h3) $(h4) $(h5)
c1 = $(mallocsrc) av.c scope.c op.c doop.c doio.c dump.c hv.c mg.c
c2 = perl.c perly.c pp.c pp_hot.c pp_ctl.c pp_sys.c regcomp.c regexec.c utf8.c
c3 = gv.c sv.c taint.c toke.c util.c deb.c run.c universal.c xsutils.c
c4 = globals.c perlio.c perlapi.c numeric.c locale.c pp_pack.c sharedsv.c
c4 = globals.c perlio.c perlapi.c numeric.c locale.c pp_pack.c pp_sort.c sharedsv.c
c = $(c1) $(c2) $(c3) $(c4) miniperlmain.c perlmain.c
obj1 = $(mallocobj) gv$(OBJ_EXT) toke$(OBJ_EXT) perly$(OBJ_EXT) op$(OBJ_EXT) regcomp$(OBJ_EXT) dump$(OBJ_EXT) util$(OBJ_EXT) mg$(OBJ_EXT)
obj2 = hv$(OBJ_EXT) av$(OBJ_EXT) run$(OBJ_EXT) pp_hot$(OBJ_EXT) sv$(OBJ_EXT) pp$(OBJ_EXT) scope$(OBJ_EXT) pp_ctl$(OBJ_EXT) pp_sys$(OBJ_EXT)
obj3 = doop$(OBJ_EXT) doio$(OBJ_EXT) regexec$(OBJ_EXT) utf8$(OBJ_EXT) taint$(OBJ_EXT) deb$(OBJ_EXT) universal$(OBJ_EXT) xsutils$(OBJ_EXT) globals$(OBJ_EXT) perlio$(OBJ_EXT) perlapi$(OBJ_EXT) numeric$(OBJ_EXT) locale$(OBJ_EXT) pp_pack$(OBJ_EXT) sharedsv$(OBJ_EXT)
obj3 = doop$(OBJ_EXT) doio$(OBJ_EXT) regexec$(OBJ_EXT) utf8$(OBJ_EXT) taint$(OBJ_EXT) deb$(OBJ_EXT) universal$(OBJ_EXT) xsutils$(OBJ_EXT) globals$(OBJ_EXT) perlio$(OBJ_EXT) perlapi$(OBJ_EXT) numeric$(OBJ_EXT) locale$(OBJ_EXT) pp_pack$(OBJ_EXT) pp_sort$(OBJ_EXT) sharedsv$(OBJ_EXT)
obj = $(obj1) $(obj2) $(obj3) $(ARCHOBJS)
Expand Down
5 changes: 4 additions & 1 deletion Makefile.micro
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ O = uav$(_O) udeb$(_O) udoio$(_O) udoop$(_O) udump$(_O) \
uglobals$(_O) ugv$(_O) uhv$(_O) \
umg$(_O) uperlmain$(_O) uop$(_O) \
uperl$(_O) uperlio$(_O) uperly$(_O) upp$(_O) \
upp_ctl$(_O) upp_hot$(_O) upp_sys$(_O) upp_pack$(_O) \
upp_ctl$(_O) upp_hot$(_O) upp_sys$(_O) upp_pack$(_O) upp_sort$(_O) \
uregcomp$(_O) uregexec$(_O) urun$(_O) \
uscope$(_O) usv$(_O) utaint$(_O) utoke$(_O) \
unumeric$(_O) ulocale$(_O) \
Expand Down Expand Up @@ -96,6 +96,9 @@ upp_sys$(_O): $(HE) pp_sys.c
upp_pack$(_O): $(HE) pp_pack.c
$(CC) -c -o $@ $(CFLAGS) pp_pack.c

upp_sort$(_O): $(HE) pp_sort.c
$(CC) -c -o $@ $(CFLAGS) pp_sort.c

uregcomp$(_O): $(HE) regcomp.c regcomp.h regnodes.h INTERN.h
$(CC) -c -o $@ $(CFLAGS) regcomp.c

Expand Down
1 change: 1 addition & 0 deletions NetWare/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -684,6 +684,7 @@ MICROCORE_SRC = \
..\pp_ctl.c \
..\pp_hot.c \
..\pp_pack.c \
..\pp_sort.c \
..\pp_sys.c \
..\regcomp.c \
..\regexec.c \
Expand Down
2 changes: 0 additions & 2 deletions embed.h
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,6 @@
#define mess Perl_mess
#define vmess Perl_vmess
#define qerror Perl_qerror
#define sortsv Perl_sortsv
#define mg_clear Perl_mg_clear
#define mg_copy Perl_mg_copy
#define mg_find Perl_mg_find
Expand Down Expand Up @@ -1916,7 +1915,6 @@
#endif
#define vmess(a,b) Perl_vmess(aTHX_ a,b)
#define qerror(a) Perl_qerror(aTHX_ a)
#define sortsv(a,b,c) Perl_sortsv(aTHX_ a,b,c)
#define mg_clear(a) Perl_mg_clear(aTHX_ a)
#define mg_copy(a,b,c,d) Perl_mg_copy(aTHX_ a,b,c,d)
#define mg_find(a,b) Perl_mg_find(aTHX_ a,b)
Expand Down
2 changes: 1 addition & 1 deletion embed.pl
Original file line number Diff line number Diff line change
Expand Up @@ -1471,7 +1471,7 @@ END
Afp |SV* |mess |const char* pat|...
Ap |SV* |vmess |const char* pat|va_list* args
p |void |qerror |SV* err
Apd |void |sortsv |SV ** array|size_t num_elts|SVCOMPARE_t f
Apd |void |sortsv |SV ** array|size_t num_elts|SVCOMPARE_t cmp
Apd |int |mg_clear |SV* sv
Apd |int |mg_copy |SV* sv|SV* nsv|const char* key|I32 klen
Apd |MAGIC* |mg_find |SV* sv|int type
Expand Down
1 change: 0 additions & 1 deletion global.sym
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,6 @@ Perl_grok_oct
Perl_markstack_grow
Perl_mess
Perl_vmess
Perl_sortsv
Perl_mg_clear
Perl_mg_copy
Perl_mg_find
Expand Down
111 changes: 111 additions & 0 deletions lib/sort.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
package sort;

our $VERSION = '1.00';

$sort::hint_bits = 0x00020000; # HINT_LOCALIZE_HH, really...

$sort::quicksort_bit = 0x00000001;
$sort::mergesort_bit = 0x00000002;
$sort::sort_bits = 0x000000FF; # allow 256 different ones
$sort::stable_bit = 0x00000100;
$sort::insensitive_bit = 0x00000200;
$sort::safe_bits = 0x00000300;
$sort::fast_bit = 0x00000400;

use strict;

sub import {
shift;
if (@_ == 0) {
require Carp;
Carp::croak("sort pragma requires arguments");
}
$^H |= $sort::hint_bits;
local $_;
while ($_ = shift(@_)) {
if (/^q(?:uick)?sort$/) {
$^H{SORT} &= ~$sort::sort_bits;
$^H{SORT} |= $sort::quicksort_bit;
return;
} elsif ($_ eq 'mergesort') {
$^H{SORT} &= ~$sort::sort_bits;
$^H{SORT} |= $sort::mergesort_bit;
return;
} elsif ($_ eq 'safe') {
$^H{SORT} &= ~$sort::fast_bit;
$^H{SORT} |= $sort::safe_bits;
$_ = 'mergesort';
redo;
} elsif ($_ eq 'fast') {
$^H{SORT} &= ~$sort::safe_bits;
$^H{SORT} |= $sort::fast_bit;
$_ = 'quicksort';
redo;
} else {
require Carp;
Carp::croak("sort: unknown subpragma '@_'");
}
}
}

sub current {
my @sort;
if ($^H{SORT}) {
push @sort, 'quicksort' if $^H{SORT} & $sort::quicksort_bit;
push @sort, 'mergesort' if $^H{SORT} & $sort::mergesort_bit;
push @sort, 'safe' if $^H{SORT} & $sort::safe_bits;
push @sort, 'fast' if $^H{SORT} & $sort::fast_bit;
}
push @sort, 'mergesort' unless @sort;
join(' ', @sort);
}

1;
__END__
=head1 NAME
sort - perl pragma to control sort() behaviour
=head1 SYNOPSIS
use sort 'quicksort';
use sort 'mergesort';
use sort 'qsort'; # alias for quicksort
# alias for mergesort: insenstive and stable
use sort 'safe';
# alias for raw quicksort: sensitive and nonstable
use sort 'fast';
my $current = sort::current();
=head1 DESCRIPTION
With the sort pragma you can control the behaviour of the builtin
sort() function.
In Perl versions 5.6 and earlier the quicksort algorithm was used to
implement sort(), but in Perl 5.8 the algorithm was changed to mergesort,
mainly to guarantee insensitiveness to sort input: the worst case of
quicksort is O(N**2), while mergesort is always O(N log N).
On the other hand, for same cases (especially for shorter inputs)
quicksort is faster.
In Perl 5.8 and later by default quicksort is wrapped into a
stabilizing layer. A stable sort means that for records that compare
equal, the original input ordering is preserved. Mergesort is stable;
quicksort is not.
The metapragmas 'fast' and 'safe' select quicksort without the
stabilizing layer and mergesort, respectively. In other words,
'safe' is the default.
Finally, the sort performance is also dependent on the platform
(smaller CPU caches favour quicksort).
=cut
17 changes: 17 additions & 0 deletions lib/sort.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!./perl

BEGIN {
chdir 't' if -d 't';
@INC = '../lib';
}

use Test::More tests => 2;

BEGIN {
require "sort.pm"; # require sort; does not work
ok(sort::current() eq 'mergesort');
}

use sort 'fast';
ok(sort::current() eq 'quicksort fast');

8 changes: 8 additions & 0 deletions perl.h
Original file line number Diff line number Diff line change
Expand Up @@ -3100,6 +3100,14 @@ enum { /* pass one of these to get_vtbl */
#define HINT_FILETEST_ACCESS 0x00400000
#define HINT_UTF8 0x00800000

#define HINT_SORT_SORT_BITS 0x000000FF /* allow 256 different ones */
#define HINT_SORT_QUICKSORT 0x00000001
#define HINT_SORT_MERGESORT 0x00000002
#define HINT_SORT_STABLE 0x00000100 /* sort styles */
#define HINT_SORT_INSENSITIVE 0x00000200
#define HINT_SORT_SAFE 0x00000300 /* stable and insensitive */
#define HINT_SORT_FAST 0x00000400 /* damn the icebergs */

/* Various states of the input record separator SV (rs) */
#define RsSNARF(sv) (! SvOK(sv))
#define RsSIMPLE(sv) (SvOK(sv) && (! SvPOK(sv) || SvCUR(sv)))
Expand Down
Loading

0 comments on commit 84d4ea4

Please sign in to comment.