Skip to content

Commit

Permalink
[Buf] worked on &pack, .unpack and .encode
Browse files Browse the repository at this point in the history
* &pack and .unpack now exist and do simple things
* The directives they know about: A H C S L n N v V x
* .encode now recognizes its $encoding parameter
* It can handle utf-8 (default), latin-1, and ascii
  • Loading branch information
Carl Masak committed Aug 15, 2010
1 parent c4b6df5 commit 83b2cdf
Show file tree
Hide file tree
Showing 3 changed files with 151 additions and 3 deletions.
121 changes: 120 additions & 1 deletion src/core/Buf.pm
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
role Buf[::T = Int] does Stringy does Positional {
has T @.contents;

multi method new(@contents) {
multi method new(*@contents) {
self.bless(*, :contents(@contents.list));
}

Expand Down Expand Up @@ -31,6 +31,72 @@ role Buf[::T = Int] does Stringy does Positional {
return $str;
}

multi method unpack($template) {
my @bytes = @.contents;
my @fields;
for $template.comb(/<[a..zA..Z]>[\d+|'*']?/) -> $unit {
my $directive = $unit.substr(0, 1);
my $amount = $unit.substr(1);

given $directive {
when 'A' {
my $asciistring;
if $amount eq '*' {
$amount = @bytes.elems;
}
for ^$amount {
$asciistring ~= chr(shift @bytes);
}
@fields.push($asciistring);
}
when 'H' {
my $hexstring;
while @bytes {
my $byte = shift @bytes;
$hexstring ~= ($byte +> 4).fmt('%x')
~ ($byte % 16).fmt('%x');
}
@fields.push($hexstring);
}
when 'x' {
if $amount eq '*' {
$amount = 0;
}
elsif $amount eq '' {
$amount = 1;
}
splice @bytes, 0, $amount;
}
when 'C' {
@fields.push: shift @bytes;
}
when 'S' | 'v' {
@fields.push: shift(@bytes)
+ (shift(@bytes) +< 0x08);
}
when 'L' | 'V' {
@fields.push: shift(@bytes)
+ (shift(@bytes) +< 0x08)
+ (shift(@bytes) +< 0x10)
+ (shift(@bytes) +< 0x18);
}
when 'n' {
@fields.push: (shift(@bytes) +< 0x08)
+ shift(@bytes);
}
when 'N' {
@fields.push: (shift(@bytes) +< 0x18)
+ (shift(@bytes) +< 0x10)
+ (shift(@bytes) +< 0x08)
+ shift(@bytes);
}
die "Unrecognized directive $directive";
}
}

return |@fields;
}

multi method elems() {
@.contents.elems;
}
Expand All @@ -43,3 +109,56 @@ role Buf[::T = Int] does Stringy does Positional {
our multi sub infix:<eqv>(Buf $a, Buf $b) {
return $a.contents eqv $b.contents;
}

our multi sub pack(Str $template, *@items) {
my @bytes;
for $template.comb(/<[a..zA..Z]>[\d+|'*']?/) -> $unit {
my $directive = $unit.substr(0, 1);
my $amount = $unit.substr(1);

given $directive {
when 'A' {
my $ascii = shift @items // '';
for $ascii.comb -> $char {
die "Non-ASCII character $char" if ord($char) > 0x7f;
@bytes.push: ord($char);
}
if $amount ne '*' {
@bytes.push: 0x20 xx ($amount - $ascii.chars);
}
}
when 'H' {
my $hexstring = shift @items // '';
if $hexstring % 2 {
$hexstring ~= '0';
}
@bytes.push: map { :16($_) }, $hexstring.comb(/../);
}
when 'C' {
my $number = shift(@items);
@bytes.push: $number % 0x100;
}
when 'S' | 'v' {
my $number = shift(@items);
@bytes.push: ($number, $number +> 0x08) >>%>> 0x100;
}
when 'L' | 'V' {
my $number = shift(@items);
@bytes.push: ($number, $number +> 0x08,
$number +> 0x10, $number +> 0x18) >>%>> 0x100;
}
when 'n' {
my $number = shift(@items);
@bytes.push: ($number +> 0x08, $number) >>%>> 0x100;
}
when 'N' {
my $number = shift(@items);
@bytes.push: ($number +> 0x18, $number +> 0x10,
$number +> 0x08, $number) >>%>> 0x100;
}
die "Unrecognized directive $directive";
}
}

return Buf.new(@bytes);
}
30 changes: 29 additions & 1 deletion src/core/Str.pm
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,41 @@ augment class Str does Stringy {
multi method Int { (+self).Int; }
multi method Num { (+self).Num; }

my @KNOWN_ENCODINGS = <utf-8 iso-8859-1 ascii>;

# XXX: We have no $?ENC or $?NF compile-time constants yet.
multi method encode($encoding = 'UTF-8', $nf = '') {
multi method encode($encoding is copy = 'utf-8', $nf = '') {
if $encoding eq 'latin-1' {
$encoding = 'iso-8859-1';
}
die "Unknown encoding $encoding"
unless $encoding.lc eq any @KNOWN_ENCODINGS;
$encoding .= lc;
my @bytes = Q:PIR {
.local int byte
.local pmc bytebuffer, it, result
$P0 = find_lex 'self'
$S0 = $P0
$P1 = find_lex '$encoding'
$S1 = $P1
if $S1 == 'ascii' goto transcode_ascii
if $S1 == 'iso-88591-1' goto transcode_iso_8859_1
# NOTE: There's an assumption here, that all strings coming in
# from the rest of Rakudo are always in UTF-8 form. Don't
# know if this assumption always holds; to be on the safe
# side, we might transcode even to UTF-8.
goto finished_transcoding
transcode_ascii:
$I0 = find_charset 'ascii'
$S0 = trans_charset $S0, $I0
$I0 = find_encoding 'fixed_8'
$S0 = trans_encoding $S0, $I0
transcode_iso_8859_1:
$I0 = find_charset 'iso-8859-1'
$S0 = trans_charset $S0, $I0
$I0 = find_encoding 'fixed_8'
$S0 = trans_encoding $S0, $I0
finished_transcoding:
bytebuffer = new ['ByteBuffer']
bytebuffer = $S0

Expand Down
3 changes: 2 additions & 1 deletion t/spectest.data
Original file line number Diff line number Diff line change
Expand Up @@ -575,6 +575,7 @@ S32-str/lcfirst.t # icu
S32-str/lc.t # icu
# S32-str/p5chomp.t
S32-str/p5chop.t
S32-str/pack.t
S32-str/pos.t
S32-str/rindex.t
S32-str/samecase.t # icu
Expand All @@ -586,7 +587,7 @@ S32-str/substr.t
S32-str/trim.t
S32-str/ucfirst.t # icu
S32-str/uc.t # icu
# S32-str/unpack.t
S32-str/unpack.t
S32-str/words.t # icu
S32-temporal/calendar.t
S32-temporal/Date.t
Expand Down

0 comments on commit 83b2cdf

Please sign in to comment.