Skip to content

Commit

Permalink
Make ext/XS-APItest/t/cophh.t work on EBCDIC
Browse files Browse the repository at this point in the history
The new EBCDIC-only code will also work on ASCII platforms, but I left
the ASCII code as-is.
  • Loading branch information
khwilliamson committed Sep 25, 2015
1 parent 95c6a21 commit 7290064
Show file tree
Hide file tree
Showing 2 changed files with 145 additions and 2 deletions.
2 changes: 1 addition & 1 deletion ext/XS-APItest/APItest.pm
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use strict;
use warnings;
use Carp;

our $VERSION = '0.75';
our $VERSION = '0.76';

require XSLoader;

Expand Down
145 changes: 144 additions & 1 deletion ext/XS-APItest/APItest.xs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,61 @@ typedef PTR_TBL_t *XS__APItest__PtrTable;
#define croak_fail() croak("fail at " __FILE__ " line %d", __LINE__)
#define croak_fail_ne(h, w) croak("fail %p!=%p at " __FILE__ " line %d", (h), (w), __LINE__)

#ifdef EBCDIC

void
cat_utf8a2n(SV* sv, const char * const ascii_utf8, STRLEN len)
{
/* Converts variant UTF-8 text pointed to by 'ascii_utf8' of length 'len',
* to UTF-EBCDIC, appending that text to the text already in 'sv'.
* Currently doesn't work on invariants, as that is unneeded here, and we
* could get double translations if we did.
*
* It has the algorithm for strict UTF-8 hard-coded in to find the code
* point it represents, then calls uvchr_to_utf8() to convert to
* UTF-EBCDIC).
*
* Note that this uses code points, not characters. Thus if the input is
* the UTF-8 for the code point 0xFF, the output will be the UTF-EBCDIC for
* 0xFF, even though that code point represents different characters on
* ASCII vs EBCDIC platforms. */

dTHX;
char * p = (char *) ascii_utf8;
const char * const e = p + len;

while (p < e) {
UV code_point;
U8 native_utf8[UTF8_MAXBYTES + 1];
U8 * char_end;
U8 start = (U8) *p;

/* Start bytes are the same in both UTF-8 and I8, therefore we can
* treat this ASCII UTF-8 byte as an I8 byte. But PL_utf8skip[] is
* indexed by NATIVE_UTF8 bytes, so transform to that */
STRLEN char_bytes_len = PL_utf8skip[I8_TO_NATIVE_UTF8(start)];

if (start < 0xc2) {
croak("fail: Expecting start byte, instead got 0x%X at %s line %d",
(U8) *p, __FILE__, __LINE__);
}
code_point = (start & (((char_bytes_len) >= 7)
? 0x00
: (0x1F >> ((char_bytes_len)-2))));
p++;
while (p < e && ((( (U8) *p) & 0xC0) == 0x80)) {

code_point = (code_point << 6) | (( (U8) *p) & 0x3F);
p++;
}

char_end = uvchr_to_utf8(native_utf8, code_point);
sv_catpvn(sv, (char *) native_utf8, char_end - native_utf8);
}
}

#endif

/* for my_cxt tests */

#define MY_CXT_KEY "XS::APItest::_guts" XS_VERSION
Expand Down Expand Up @@ -2907,6 +2962,11 @@ void
test_cophh()
PREINIT:
COPHH *a, *b;
#ifdef EBCDIC
SV* key_sv;
char * key_name;
STRLEN key_len;
#endif
CODE:
#define check_ph(EXPR) \
do { if((EXPR) != &PL_sv_placeholder) croak("fail"); } while(0)
Expand Down Expand Up @@ -2970,24 +3030,81 @@ test_cophh()
check_iv(cophh_fetch_pvs(a, "foo_3", 0), 333);
check_iv(cophh_fetch_pvs(a, "foo_4", 0), 444);
check_ph(cophh_fetch_pvs(a, "foo_5", 0));
a = cophh_store_pvs(a, "foo_1", msviv(11111), COPHH_KEY_UTF8);
a = cophh_store_pvs(a, "foo_1", msviv(11111), COPHH_KEY_UTF8);
a = cophh_store_pvs(a, "foo_\xaa", msviv(123), 0);
#ifndef EBCDIC
a = cophh_store_pvs(a, "foo_\xc2\xbb", msviv(456), COPHH_KEY_UTF8);
#else
/* On EBCDIC, we need to translate the UTF-8 in the ASCII test to the
* equivalent UTF-EBCDIC for the code page. This is done at runtime
* (with the helper function in this file). Therefore we can't use
* cophhh_store_pvs(), as we don't have literal string */
key_sv = sv_2mortal(newSVpvs("foo_"));
cat_utf8a2n(key_sv, STR_WITH_LEN("\xc2\xbb"));
key_name = SvPV(key_sv, key_len);
a = cophh_store_pvn(a, key_name, key_len, 0, msviv(456), COPHH_KEY_UTF8);
#endif
#ifndef EBCDIC
a = cophh_store_pvs(a, "foo_\xc3\x8c", msviv(789), COPHH_KEY_UTF8);
#else
sv_setpvs(key_sv, "foo_");
cat_utf8a2n(key_sv, STR_WITH_LEN("\xc3\x8c"));
key_name = SvPV(key_sv, key_len);
a = cophh_store_pvn(a, key_name, key_len, 0, msviv(789), COPHH_KEY_UTF8);
#endif
#ifndef EBCDIC
a = cophh_store_pvs(a, "foo_\xd9\xa6", msviv(666), COPHH_KEY_UTF8);
#else
sv_setpvs(key_sv, "foo_");
cat_utf8a2n(key_sv, STR_WITH_LEN("\xd9\xa6"));
key_name = SvPV(key_sv, key_len);
a = cophh_store_pvn(a, key_name, key_len, 0, msviv(666), COPHH_KEY_UTF8);
#endif
check_iv(cophh_fetch_pvs(a, "foo_1", 0), 11111);
check_iv(cophh_fetch_pvs(a, "foo_1", COPHH_KEY_UTF8), 11111);
check_iv(cophh_fetch_pvs(a, "foo_\xaa", 0), 123);
#ifndef EBCDIC
check_iv(cophh_fetch_pvs(a, "foo_\xc2\xaa", COPHH_KEY_UTF8), 123);
check_ph(cophh_fetch_pvs(a, "foo_\xc2\xaa", 0));
#else
sv_setpvs(key_sv, "foo_");
cat_utf8a2n(key_sv, STR_WITH_LEN("\xc2\xaa"));
key_name = SvPV(key_sv, key_len);
check_iv(cophh_fetch_pvn(a, key_name, key_len, 0, COPHH_KEY_UTF8), 123);
check_ph(cophh_fetch_pvn(a, key_name, key_len, 0, 0));
#endif
check_iv(cophh_fetch_pvs(a, "foo_\xbb", 0), 456);
#ifndef EBCDIC
check_iv(cophh_fetch_pvs(a, "foo_\xc2\xbb", COPHH_KEY_UTF8), 456);
check_ph(cophh_fetch_pvs(a, "foo_\xc2\xbb", 0));
#else
sv_setpvs(key_sv, "foo_");
cat_utf8a2n(key_sv, STR_WITH_LEN("\xc2\xbb"));
key_name = SvPV(key_sv, key_len);
check_iv(cophh_fetch_pvn(a, key_name, key_len, 0, COPHH_KEY_UTF8), 456);
check_ph(cophh_fetch_pvn(a, key_name, key_len, 0, 0));
#endif
check_iv(cophh_fetch_pvs(a, "foo_\xcc", 0), 789);
#ifndef EBCDIC
check_iv(cophh_fetch_pvs(a, "foo_\xc3\x8c", COPHH_KEY_UTF8), 789);
check_ph(cophh_fetch_pvs(a, "foo_\xc2\x8c", 0));
#else
sv_setpvs(key_sv, "foo_");
cat_utf8a2n(key_sv, STR_WITH_LEN("\xc3\x8c"));
key_name = SvPV(key_sv, key_len);
check_iv(cophh_fetch_pvn(a, key_name, key_len, 0, COPHH_KEY_UTF8), 789);
check_ph(cophh_fetch_pvn(a, key_name, key_len, 0, 0));
#endif
#ifndef EBCDIC
check_iv(cophh_fetch_pvs(a, "foo_\xd9\xa6", COPHH_KEY_UTF8), 666);
check_ph(cophh_fetch_pvs(a, "foo_\xd9\xa6", 0));
#else
sv_setpvs(key_sv, "foo_");
cat_utf8a2n(key_sv, STR_WITH_LEN("\xd9\xa6"));
key_name = SvPV(key_sv, key_len);
check_iv(cophh_fetch_pvn(a, key_name, key_len, 0, COPHH_KEY_UTF8), 666);
check_ph(cophh_fetch_pvn(a, key_name, key_len, 0, 0));
#endif
ENTER;
SAVEFREECOPHH(a);
LEAVE;
Expand Down Expand Up @@ -3022,15 +3139,41 @@ HV *
example_cophh_2hv()
PREINIT:
COPHH *a;
#ifdef EBCDIC
SV* key_sv;
char * key_name;
STRLEN key_len;
#endif
CODE:
#define msviv(VALUE) sv_2mortal(newSViv(VALUE))
a = cophh_new_empty();
a = cophh_store_pvs(a, "foo_0", msviv(999), 0);
a = cophh_store_pvs(a, "foo_1", msviv(111), 0);
a = cophh_store_pvs(a, "foo_\xaa", msviv(123), 0);
#ifndef EBCDIC
a = cophh_store_pvs(a, "foo_\xc2\xbb", msviv(456), COPHH_KEY_UTF8);
#else
key_sv = sv_2mortal(newSVpvs("foo_"));
cat_utf8a2n(key_sv, STR_WITH_LEN("\xc2\xbb"));
key_name = SvPV(key_sv, key_len);
a = cophh_store_pvn(a, key_name, key_len, 0, msviv(456), COPHH_KEY_UTF8);
#endif
#ifndef EBCDIC
a = cophh_store_pvs(a, "foo_\xc3\x8c", msviv(789), COPHH_KEY_UTF8);
#else
sv_setpvs(key_sv, "foo_");
cat_utf8a2n(key_sv, STR_WITH_LEN("\xc3\x8c"));
key_name = SvPV(key_sv, key_len);
a = cophh_store_pvn(a, key_name, key_len, 0, msviv(789), COPHH_KEY_UTF8);
#endif
#ifndef EBCDIC
a = cophh_store_pvs(a, "foo_\xd9\xa6", msviv(666), COPHH_KEY_UTF8);
#else
sv_setpvs(key_sv, "foo_");
cat_utf8a2n(key_sv, STR_WITH_LEN("\xd9\xa6"));
key_name = SvPV(key_sv, key_len);
a = cophh_store_pvn(a, key_name, key_len, 0, msviv(666), COPHH_KEY_UTF8);
#endif
a = cophh_delete_pvs(a, "foo_0", 0);
a = cophh_delete_pvs(a, "foo_2", 0);
RETVAL = cophh_2hv(a, 0);
Expand Down

0 comments on commit 7290064

Please sign in to comment.