Permalink
Browse files

removed unused files, fixed bugs, added the crappy ICU EUC-JP support

git-svn-id: http://svn.macosforge.org/repository/ruby/MacRuby/trunk@4053 23306eb0-4c56-4727-a40e-e92c0eb68959
  • Loading branch information...
1 parent d0ac593 commit e23dfced1b7672027ed053fdb979024a0cb76856 @vincentisambart vincentisambart committed May 9, 2010
Showing with 36 additions and 582 deletions.
  1. +1 −0 encoding.c
  2. +1 −1 encoding.h
  3. +0 −2 inits.c
  4. +1 −1 rakelib/builder/builder.rb
  5. +0 −8 spec/frozen/tags/macruby/core/string/encode_tags.txt
  6. +33 −10 string.c
  7. +0 −474 transcode.c
  8. +0 −86 transcode_data.h
View
@@ -268,6 +268,7 @@ Init_PreEncoding(void)
add_encoding(ENCODING_ISO8859_1, ENCODING_TYPE_UCNV, "ISO-8859-1", 1, true, true, "ISO8859-1", NULL);
add_encoding(ENCODING_MACROMAN, ENCODING_TYPE_UCNV, "macRoman", 1, true, true, NULL);
// FIXME: the ICU conversion tables do not seem to match Ruby's Japanese conversion tables
+ add_encoding(ENCODING_EUCJP, ENCODING_TYPE_UCNV, "EUC-JP", 1, false, true, "eucJP", NULL);
//add_encoding(ENCODING_EUCJP, ENCODING_TYPE_RUBY, "EUC-JP", 1, false, true, "eucJP", NULL);
//add_encoding(ENCODING_SJIS, ENCODING_TYPE_RUBY, "Shift_JIS", 1, false, true, "SJIS", NULL);
//add_encoding(ENCODING_CP932, ENCODING_TYPE_RUBY, "Windows-31J", 1, false, true, "CP932", "csWindows31J", NULL);
View
@@ -145,7 +145,7 @@ enum {
ENCODING_UTF32LE,
ENCODING_ISO8859_1,
ENCODING_MACROMAN,
- //ENCODING_EUCJP,
+ ENCODING_EUCJP,
//ENCODING_SJIS,
//ENCODING_CP932,
View
@@ -16,7 +16,6 @@ void Init_Bignum(void);
void Init_Binding(void);
void Init_Comparable(void);
void Init_Complex(void);
-void Init_transcode(void);
void Init_Dir(void);
void Init_Enumerable(void);
void Init_Enumerator(void);
@@ -87,7 +86,6 @@ rb_call_inits()
Init_Struct();
Init_Regexp();
Init_pack();
- Init_transcode();
Init_marshal();
Init_Range();
Init_IO();
@@ -3,7 +3,7 @@
OBJS = %w{
array bignum class compar complex enum enumerator error eval file load proc
gc hash env inits io math numeric object pack parse prec dir process
- random range rational re ruby signal sprintf st string struct time transcode
+ random range rational re ruby signal sprintf st string struct time
util variable version thread id objc bs ucnv encoding main dln dmyext marshal
gcd vm_eval prelude miniprelude gc-stub bridgesupport compiler dispatcher vm
symbol debugger MacRuby MacRubyDebuggerConnector NSArray NSDictionary
@@ -1,19 +1,11 @@
-fails:String#encode! transcodes to the default internal encoding with no argument
-fails:String#encode! transcodes self to the given encoding
fails:String#encode! can convert between encodings where a multi-stage conversion path is needed
-fails:String#encode! raises an Encoding::InvalidByteSequenceError for invalid byte sequences
fails:String#encode! raises UndefinedConversionError if the String contains characters invalid for the target encoding
fails:String#encode! raises Encoding::ConverterNotFoundError for invalid target encodings
-fails:String#encode transcodes to the default internal encoding with no argument
fails:String#encode returns self when called with only a target encoding
-fails:String#encode transcodes self to the given encoding
fails:String#encode can convert between encodings where a multi-stage conversion path is needed
-fails:String#encode raises an Encoding::InvalidByteSequenceError for invalid byte sequences
fails:String#encode raises UndefinedConversionError if the String contains characters invalid for the target encoding
fails:String#encode raises Encoding::ConverterNotFoundError for invalid target encodings
-fails:String#encode! replaces undefined characters
fails:String#encode! replaces xml characters
fails:String#encode! replaces xml characters and quotes the result
-fails:String#encode replaces undefined characters
fails:String#encode replaces xml characters
fails:String#encode replaces xml characters and quotes the result
View
@@ -1248,6 +1248,17 @@ enum {
TRANSCODE_BEHAVIOR_REPLACE_WITH_XML_TEXT,
TRANSCODE_BEHAVIOR_REPLACE_WITH_XML_ATTR
};
+
+
+static rb_str_t *
+str_transcode(rb_str_t *self, rb_encoding_t *src_encoding, rb_encoding_t *dst_encoding,
+ int behavior_for_invalid, int behavior_for_undefined, rb_str_t *replacement_str);
+static inline rb_str_t *
+str_simple_transcode(rb_str_t *self, rb_encoding_t *dst_encoding)
+{
+ return str_transcode(self, self->encoding, dst_encoding,
+ TRANSCODE_BEHAVIOR_RAISE_EXCEPTION, TRANSCODE_BEHAVIOR_RAISE_EXCEPTION, NULL);
+}
static rb_str_t *
str_transcode(rb_str_t *self, rb_encoding_t *src_encoding, rb_encoding_t *dst_encoding,
int behavior_for_invalid, int behavior_for_undefined, rb_str_t *replacement_str)
@@ -1256,7 +1267,7 @@ str_transcode(rb_str_t *self, rb_encoding_t *src_encoding, rb_encoding_t *dst_en
|| (behavior_for_undefined == TRANSCODE_BEHAVIOR_REPLACE_WITH_STRING)) {
assert(replacement_str != NULL);
assert(replacement_str->encoding != NULL);
- assert(replacement_str->encoding == dst_encoding);
+ assert((replacement_str->length_in_bytes == 0) || (replacement_str->encoding == dst_encoding));
}
rb_str_t *dst_str = str_alloc(rb_cRubyString);
@@ -1326,10 +1337,24 @@ str_transcode(rb_str_t *self, rb_encoding_t *src_encoding, rb_encoding_t *dst_en
rb_raise(rb_eUndefinedConversionError, "U+%04X from %s to %s", c, src_encoding->public_name, dst_encoding->public_name);
break;
case TRANSCODE_BEHAVIOR_REPLACE_WITH_STRING:
- str_concat_bytes(dst_str, replacement_str->data.bytes, replacement_str->length_in_bytes);
+ if (replacement_str->length_in_bytes > 0) {
+ str_concat_bytes(dst_str, replacement_str->data.bytes, replacement_str->length_in_bytes);
+ }
break;
case TRANSCODE_BEHAVIOR_REPLACE_WITH_XML_TEXT:
case TRANSCODE_BEHAVIOR_REPLACE_WITH_XML_ATTR:
+ {
+ char xml[10];
+ snprintf(xml, 10, "&#x%X;", c);
+ if (dst_encoding->ascii_compatible) {
+ str_concat_bytes(dst_str, xml, strlen(xml));
+ }
+ else {
+ rb_str_t *xml_str = RSTR(rb_str_new2(xml));
+ xml_str = str_simple_transcode(xml_str, dst_encoding);
+ str_concat_bytes(dst_str, xml_str->data.bytes, xml_str->length_in_bytes);
+ }
+ }
break;
default:
abort();
@@ -1360,7 +1385,9 @@ str_transcode(rb_str_t *self, rb_encoding_t *src_encoding, rb_encoding_t *dst_en
}
break;
case TRANSCODE_BEHAVIOR_REPLACE_WITH_STRING:
- str_concat_bytes(dst_str, replacement_str->data.bytes, replacement_str->length_in_bytes);
+ if (replacement_str->length_in_bytes > 0) {
+ str_concat_bytes(dst_str, replacement_str->data.bytes, replacement_str->length_in_bytes);
+ }
break;
default:
abort();
@@ -1817,10 +1844,8 @@ rstr_encode(VALUE str, SEL sel, int argc, VALUE *argv)
VALUE replacement = rb_hash_aref(opt, replace_sym);
if (!NIL_P(replacement)) {
replacement_str = str_need_string(replacement);
- if (replacement_str->encoding != dst_encoding) {
- replacement_str = str_transcode(replacement_str, replacement_str->encoding,
- dst_encoding, TRANSCODE_BEHAVIOR_RAISE_EXCEPTION,
- TRANSCODE_BEHAVIOR_RAISE_EXCEPTION, NULL);
+ if ((replacement_str->encoding != dst_encoding) && (replacement_str->length_in_bytes > 0)) {
+ replacement_str = str_simple_transcode(replacement_str, dst_encoding);
}
if ((behavior_for_invalid != TRANSCODE_BEHAVIOR_REPLACE_WITH_STRING)
&& (behavior_for_undefined == TRANSCODE_BEHAVIOR_RAISE_EXCEPTION)) {
@@ -1849,9 +1874,7 @@ rstr_encode(VALUE str, SEL sel, int argc, VALUE *argv)
}
else {
replacement_str = RSTR(rb_enc_str_new("?", 1, rb_encodings[ENCODING_ASCII]));
- replacement_str = str_transcode(replacement_str, replacement_str->encoding,
- dst_encoding, TRANSCODE_BEHAVIOR_RAISE_EXCEPTION,
- TRANSCODE_BEHAVIOR_RAISE_EXCEPTION, NULL);
+ replacement_str = str_simple_transcode(replacement_str, dst_encoding);
}
}
Oops, something went wrong.

0 comments on commit e23dfce

Please sign in to comment.