Skip to content

Commit

Permalink
Data::Dumper: useqq implementation for xs
Browse files Browse the repository at this point in the history
Tests are mainly unchanged, just a "cheat" and a couple of TODOs were
removed.
  • Loading branch information
srezic authored and tonycoz committed Jul 17, 2013
1 parent 49fb45d commit 9baac1a
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 40 deletions.
1 change: 0 additions & 1 deletion dist/Data-Dumper/Dumper.pm
Expand Up @@ -221,7 +221,6 @@ sub DESTROY {}
sub Dump {
return &Dumpxs
unless $Data::Dumper::Useperl || (ref($_[0]) && $_[0]->{useperl}) ||
$Data::Dumper::Useqq || (ref($_[0]) && $_[0]->{useqq}) ||
$Data::Dumper::Deparse || (ref($_[0]) && $_[0]->{deparse});
return &Dumpperl;
}
Expand Down
85 changes: 60 additions & 25 deletions dist/Data-Dumper/Dumper.xs
Expand Up @@ -18,15 +18,15 @@

static I32 num_q (const char *s, STRLEN slen);
static I32 esc_q (char *dest, const char *src, STRLEN slen);
static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen);
static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq);
static I32 needs_quote(const char *s, STRLEN len);
static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n);
static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
HV *seenhv, AV *postav, I32 *levelp, I32 indent,
SV *pad, SV *xpad, SV *apad, SV *sep, SV *pair,
SV *freezer, SV *toaster,
I32 purity, I32 deepcopy, I32 quotekeys, SV *bless,
I32 maxdepth, SV *sortkeys, int use_sparse_seen_hash);
I32 maxdepth, SV *sortkeys, int use_sparse_seen_hash, I32 useqq);

#ifndef HvNAME_get
#define HvNAME_get HvNAME
Expand Down Expand Up @@ -158,8 +158,9 @@ esc_q(char *d, const char *s, STRLEN slen)
return ret;
}

/* this function is also misused for implementing $Useqq */
static I32
esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq)
{
char *r, *rstart;
const char *s = src;
Expand All @@ -176,8 +177,8 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
int increment;

/* this will need EBCDICification */
for (s = src; s < send; s += increment) {
const UV k = utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL);
for (s = src; s < send; do_utf8 ? s += increment : s++) {
const UV k = do_utf8 ? utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL) : *(U8*)s;

/* check for invalid utf8 */
increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s);
Expand All @@ -195,6 +196,14 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
#endif
);
#ifndef EBCDIC
} else if (useqq && (k <= 10 || k == 12 || k == 13 || k == 27)) {
grow += 2;
} else if (useqq && k <= 31) {
grow += 3;
} else if (useqq && k >= 127) {
grow += 4;
#endif
} else if (k == '\\') {
backslashes++;
} else if (k == '\'') {
Expand All @@ -205,16 +214,17 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
normal++;
}
}
if (grow) {
if (grow || useqq) {
/* We have something needing hex. 3 is ""\0 */
sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
+ 2*qq_escapables + normal);
rstart = r = SvPVX(sv) + cur;

*r++ = '"';

for (s = src; s < send; s += UTF8SKIP(s)) {
const UV k = utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL);
for (s = src; s < send; do_utf8 ? s += UTF8SKIP(s) : s++) {
const UV k = do_utf8 ? utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL) : *(U8*)s;


if (k == '"' || k == '\\' || k == '$' || k == '@') {
*r++ = '\\';
Expand All @@ -224,6 +234,33 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen)
#ifdef EBCDIC
if (isprint(k) && k < 256)
#else
if (useqq && (k <= 31 || k == 127 || (!do_utf8 && k > 127))) {
*r++ = '\\';
switch (k) {
case 7: *r++ = 'a'; break;
case 8: *r++ = 'b'; break;
case 9: *r++ = 't'; break;
case 10: *r++ = 'n'; break;
case 12: *r++ = 'f'; break;
case 13: *r++ = 'r'; break;
case 27: *r++ = 'e'; break;
default:
/* faster than
* r = r + my_sprintf(r, "%o", k);
*/
if (k <= 7) {
*r++ = (char)k + '0';
} else if (k <= 63) {
*r++ = (char)(k>>3) + '0';
*r++ = (char)(k&7) + '0';
} else {
*r++ = (char)(k>>6) + '0';
*r++ = (char)((k&63)>>3) + '0';
*r++ = (char)(k&7) + '0';
}
}
}
else
if (k < 0x80)
#endif
*r++ = (char)k;
Expand Down Expand Up @@ -298,7 +335,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
AV *postav, I32 *levelp, I32 indent, SV *pad, SV *xpad,
SV *apad, SV *sep, SV *pair, SV *freezer, SV *toaster, I32 purity,
I32 deepcopy, I32 quotekeys, SV *bless, I32 maxdepth, SV *sortkeys,
int use_sparse_seen_hash)
int use_sparse_seen_hash, I32 useqq)
{
char tmpbuf[128];
U32 i;
Expand Down Expand Up @@ -524,15 +561,15 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
postav, levelp, indent, pad, xpad, apad, sep, pair,
freezer, toaster, purity, deepcopy, quotekeys, bless,
maxdepth, sortkeys, use_sparse_seen_hash);
maxdepth, sortkeys, use_sparse_seen_hash, useqq);
sv_catpvn(retval, ")}", 2);
} /* plain */
else {
sv_catpvn(retval, "\\", 1);
DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
postav, levelp, indent, pad, xpad, apad, sep, pair,
freezer, toaster, purity, deepcopy, quotekeys, bless,
maxdepth, sortkeys, use_sparse_seen_hash);
maxdepth, sortkeys, use_sparse_seen_hash, useqq);
}
SvREFCNT_dec(namesv);
}
Expand All @@ -544,7 +581,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
DD_dump(aTHX_ ival, SvPVX_const(namesv), SvCUR(namesv), retval, seenhv,
postav, levelp, indent, pad, xpad, apad, sep, pair,
freezer, toaster, purity, deepcopy, quotekeys, bless,
maxdepth, sortkeys, use_sparse_seen_hash);
maxdepth, sortkeys, use_sparse_seen_hash, useqq);
SvREFCNT_dec(namesv);
}
else if (realtype == SVt_PVAV) {
Expand Down Expand Up @@ -617,7 +654,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
DD_dump(aTHX_ elem, iname, ilen, retval, seenhv, postav,
levelp, indent, pad, xpad, apad, sep, pair,
freezer, toaster, purity, deepcopy, quotekeys, bless,
maxdepth, sortkeys, use_sparse_seen_hash);
maxdepth, sortkeys, use_sparse_seen_hash, useqq);
if (ix < ixmax)
sv_catpvn(retval, ",", 1);
}
Expand Down Expand Up @@ -777,9 +814,9 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
The code is also smaller (22044 vs 22260) because I've been
able to pull the common logic out to both sides. */
if (quotekeys || needs_quote(key,keylen)) {
if (do_utf8) {
if (do_utf8 || useqq) {
STRLEN ocur = SvCUR(retval);
nlen = esc_q_utf8(aTHX_ retval, key, klen);
nlen = esc_q_utf8(aTHX_ retval, key, klen, do_utf8, useqq);
nkey = SvPVX(retval) + ocur;
}
else {
Expand Down Expand Up @@ -824,7 +861,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
DD_dump(aTHX_ hval, SvPVX_const(sname), SvCUR(sname), retval, seenhv,
postav, levelp, indent, pad, xpad, newapad, sep, pair,
freezer, toaster, purity, deepcopy, quotekeys, bless,
maxdepth, sortkeys, use_sparse_seen_hash);
maxdepth, sortkeys, use_sparse_seen_hash, useqq);
SvREFCNT_dec(sname);
Safefree(nkey_buffer);
if (indent >= 2)
Expand Down Expand Up @@ -973,7 +1010,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
r = SvPVX(retval)+SvCUR(retval);
r[0] = '*'; r[1] = '{';
SvCUR_set(retval, SvCUR(retval)+2);
esc_q_utf8(aTHX_ retval, c, i);
esc_q_utf8(aTHX_ retval, c, i, 1, useqq);
sv_grow(retval, SvCUR(retval)+2);
r = SvPVX(retval)+SvCUR(retval);
r[0] = '}'; r[1] = '\0';
Expand Down Expand Up @@ -1033,7 +1070,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
seenhv, postav, &nlevel, indent, pad, xpad,
newapad, sep, pair, freezer, toaster, purity,
deepcopy, quotekeys, bless, maxdepth,
sortkeys, use_sparse_seen_hash);
sortkeys, use_sparse_seen_hash, useqq);
SvREFCNT_dec(e);
}
}
Expand Down Expand Up @@ -1062,8 +1099,8 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
else {
integer_came_from_string:
c = SvPV(val, i);
if (DO_UTF8(val))
i += esc_q_utf8(aTHX_ retval, c, i);
if (DO_UTF8(val) || useqq)
i += esc_q_utf8(aTHX_ retval, c, i, DO_UTF8(val), useqq);
else {
sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */
r = SvPVX(retval) + SvCUR(retval);
Expand Down Expand Up @@ -1108,7 +1145,7 @@ Data_Dumper_Dumpxs(href, ...)
HV *seenhv = NULL;
AV *postav, *todumpav, *namesav;
I32 level = 0;
I32 indent, terse, i, imax, postlen;
I32 indent, terse, useqq, i, imax, postlen;
SV **svp;
SV *val, *name, *pad, *xpad, *apad, *sep, *pair, *varname;
SV *freezer, *toaster, *bless, *sortkeys;
Expand Down Expand Up @@ -1149,7 +1186,7 @@ Data_Dumper_Dumpxs(href, ...)
= freezer = toaster = bless = sortkeys = &PL_sv_undef;
name = sv_newmortal();
indent = 2;
terse = purity = deepcopy = 0;
terse = purity = deepcopy = useqq = 0;
quotekeys = 1;

retval = newSVpvn("", 0);
Expand All @@ -1173,10 +1210,8 @@ Data_Dumper_Dumpxs(href, ...)
purity = SvIV(*svp);
if ((svp = hv_fetch(hv, "terse", 5, FALSE)))
terse = SvTRUE(*svp);
#if 0 /* useqq currently unused */
if ((svp = hv_fetch(hv, "useqq", 5, FALSE)))
useqq = SvTRUE(*svp);
#endif
if ((svp = hv_fetch(hv, "pad", 3, FALSE)))
pad = *svp;
if ((svp = hv_fetch(hv, "xpad", 4, FALSE)))
Expand Down Expand Up @@ -1280,7 +1315,7 @@ Data_Dumper_Dumpxs(href, ...)
DD_dump(aTHX_ val, SvPVX_const(name), SvCUR(name), valstr, seenhv,
postav, &level, indent, pad, xpad, newapad, sep, pair,
freezer, toaster, purity, deepcopy, quotekeys,
bless, maxdepth, sortkeys, use_sparse_seen_hash);
bless, maxdepth, sortkeys, use_sparse_seen_hash, useqq);
SPAGAIN;

if (indent >= 2 && !terse)
Expand Down
17 changes: 3 additions & 14 deletions dist/Data-Dumper/t/dumper.t
Expand Up @@ -307,20 +307,9 @@ $foo = { "abc\000\'\efg" => "mno\000",
{
local $Data::Dumper::Useqq = 1;
TEST q(Dumper($foo));
TEST q(Data::Dumper::DumperX($foo)) if $XS;
}

$WANT = <<"EOT";
#\$VAR1 = {
# 'abc\0\\'\efg' => 'mno\0',
# 'reftest' => \\\\1
#};
EOT

{
local $Data::Dumper::Useqq = 1;
TEST q(Data::Dumper::DumperX($foo)) if $XS; # cheat
}



#############
Expand Down Expand Up @@ -1461,7 +1450,7 @@ EOT
$foo = [ join "", map chr, 0..255 ];
local $Data::Dumper::Useqq = 1;
TEST q(Dumper($foo)), 'All latin1 characters';
for (1..3) { print "not ok " . (++$TNUM) . " # TODO NYI\n" if $XS } # TEST q(Data::Dumper::DumperX($foo)) if $XS;
TEST q(Data::Dumper::DumperX($foo)) if $XS;
}

############# 372
Expand All @@ -1481,7 +1470,7 @@ EOT
TEST q(Dumper($foo)),
'All latin1 characters with utf8 flag including a wide character';
}
for (1..3) { print "not ok " . (++$TNUM) . " # TODO NYI\n" if $XS } # TEST q(Data::Dumper::DumperX($foo)) if $XS;
TEST q(Data::Dumper::DumperX($foo)) if $XS;
}

############# 378
Expand Down

0 comments on commit 9baac1a

Please sign in to comment.