Skip to content

Commit

Permalink
[Buf] improvements to Str.encode
Browse files Browse the repository at this point in the history
This week saw the addition of ByteView, a stopgap PMC that allows us access
to the bytes in a Parrot string, pending the Parrot people's long-term
solution. Thanks to it, we now do string encoding of UTF-8 strings right.
  • Loading branch information
Carl Masak committed Jun 6, 2010
1 parent 841262f commit 359dbdf
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 14 deletions.
2 changes: 1 addition & 1 deletion build/Makefile.in
Expand Up @@ -226,7 +226,7 @@ PMC_SOURCES = \
$(PMC_DIR)/perl6scalar.pmc $(PMC_DIR)/mutablevar.pmc \
$(PMC_DIR)/perl6multisub.pmc $(PMC_DIR)/p6invocation.pmc \
$(PMC_DIR)/p6opaque.pmc $(PMC_DIR)/p6role.pmc \
$(PMC_DIR)/p6lowlevelsig.pmc
$(PMC_DIR)/p6lowlevelsig.pmc $(PMC_DIR)/byteview.pmc

OPS_SOURCE = perl6.ops

Expand Down
6 changes: 1 addition & 5 deletions src/core/Buf.pm
@@ -1,14 +1,10 @@
role Buf[::T = Int] does Stringy {
has @.contents;
has T @.contents;

multi method new(@contents) {
self.bless(*, :contents(@contents.list));
}

multi method new(*@contents) {
self.bless(*, :contents(@contents.list));
}

multi method decode($encoding = 'UTF-8') {
my @contents = @.contents;
my $str = ~Q:PIR {
Expand Down
14 changes: 7 additions & 7 deletions src/core/Str.pm
Expand Up @@ -23,19 +23,19 @@ augment class Str does Stringy {
# XXX: We have no $?ENC or $?NF compile-time constants yet.
multi method encode($encoding = 'UTF-8', $nf = '') {
my @bytes = Q:PIR {
.local int bin_coding, i, max, byte
.local string bin_string
.local pmc it, result
.local int i, max, byte
.local pmc byteview, it, result
$P0 = find_lex 'self'
$S0 = $P0
bin_coding = find_encoding 'fixed_8'
bin_string = trans_encoding $S0, bin_coding
byteview = new ['ByteView']
byteview = $S0
result = new ['Parcel']
i = 0
max = length bin_string
max = elements byteview
bytes_loop:
if i >= max goto bytes_done
byte = ord bin_string, i
byte = byteview[i]
push result, byte
inc i
goto bytes_loop
Expand Down
90 changes: 90 additions & 0 deletions src/pmc/byteview.pmc
@@ -0,0 +1,90 @@
/*
$Id$
Copyright (C) 2010, The Perl Foundation.

=head1 NAME

src/pmc/byteview.pmc - PMC providing a byte view of a string

=head1 DESCRIPTION

The ByteView PMC provides a byte view of a string. It lazily just looks at the
bytes of the string if it is only used in a readonly way; if anybody tries to
set a value, however, it will then make a mutable copy that it can work with,
thus not affecting the original Parrot STRING* (since those are immutable, plus
avoiding action at a distance is a Good Thing).

=head2 Methods

=cut

*/

#include "parrot/parrot.h"

pmclass ByteView dynpmc group perl6_group auto_attrs {
ATTR STRING *the_string;
ATTR unsigned char *bytes;
ATTR INTVAL length;

VTABLE void init() {
/* Need custom mark. */
PObj_custom_mark_SET(SELF);
}

VTABLE void mark() {
STRING *the_string;
GET_ATTR_the_string(INTERP, SELF, the_string);
if (the_string)
Parrot_gc_mark_STRING_alive(INTERP, the_string);
}

VTABLE INTVAL elements() {
STRING *the_string;
GET_ATTR_the_string(INTERP, SELF, the_string);
if (the_string) {
return the_string->bufused;
}
else {
INTVAL length;
GET_ATTR_length(INTERP, SELF, length);
return length;
}
}

VTABLE void set_string_native(STRING *new_string) {
/* Put the string in place and invalidate any byte array we may have. */
SET_ATTR_the_string(INTERP, SELF, new_string);
SET_ATTR_bytes(INTERP, SELF, NULL);
SET_ATTR_length(INTERP, SELF, 0);
}

VTABLE INTVAL get_integer_keyed_int(INTVAL key) {
STRING *the_string;
GET_ATTR_the_string(INTERP, SELF, the_string);
if (the_string) {
/* We're looking into the string. */
if (key < the_string->bufused)
return (INTVAL)(unsigned char)the_string->strstart[key];
}
else {
/* Need to look at our underlying byte array instead. */
unsigned char *bytes;
INTVAL length;
GET_ATTR_bytes(INTERP, SELF, bytes);
GET_ATTR_length(INTERP, SELF, length);
if (bytes && key < length)
return (INTVAL)bytes[key];
}
return 0;
}
}


/*
* Local variables:
* c-file-style: "parrot"
* End:
* vim: expandtab shiftwidth=4:
*/

2 changes: 1 addition & 1 deletion t/spectest.data
Expand Up @@ -544,7 +544,7 @@ S32-str/capitalize.t # icu
S32-str/chomp.t
S32-str/chop.t
S32-str/comb.t # icu
# S32-str/encode.t
S32-str/encode.t
S32-str/flip.t
S32-str/index.t
S32-str/lcfirst.t # icu
Expand Down

0 comments on commit 359dbdf

Please sign in to comment.