Permalink
Browse files

Add support for Encoding::Converter and move String#encode and String…

…#encode! into the corresponding file.

git-svn-id: http://svn.macosforge.org/repository/ruby/MacRuby/trunk@4182 23306eb0-4c56-4727-a40e-e92c0eb68959
  • Loading branch information...
1 parent 70ea0b5 commit ffe45d2b9239322228346351c725e00894cf54c0 Patrick Thomson committed Jun 1, 2010
View
@@ -146,6 +146,32 @@ mr_enc_dummy_p(VALUE self, SEL sel)
return Qfalse;
}
+// For UTF-[8, 16, 32] it's /uFFFD, and for others it's '?'
+rb_str_t *replacement_string_for_encoding(rb_encoding_t* destination)
+{
+ rb_str_t *replacement_str = NULL;
+ if (destination == rb_encodings[ENCODING_UTF16BE]) {
+ replacement_str = RSTR(rb_enc_str_new("\xFF\xFD", 2, destination));
+ }
+ else if (destination == rb_encodings[ENCODING_UTF32BE]) {
+ replacement_str = RSTR(rb_enc_str_new("\0\0\xFF\xFD", 4, destination));
+ }
+ else if (destination == rb_encodings[ENCODING_UTF16LE]) {
+ replacement_str = RSTR(rb_enc_str_new("\xFD\xFF", 2, destination));
+ }
+ else if (destination == rb_encodings[ENCODING_UTF32LE]) {
+ replacement_str = RSTR(rb_enc_str_new("\xFD\xFF\0\0", 4, destination));
+ }
+ else if (destination == rb_encodings[ENCODING_UTF8]) {
+ replacement_str = RSTR(rb_enc_str_new("\xEF\xBF\xBD", 3, destination));
+ }
+ else {
+ replacement_str = RSTR(rb_enc_str_new("?", 1, rb_encodings[ENCODING_ASCII]));
+ replacement_str = str_simple_transcode(replacement_str, destination);
+ }
+ return replacement_str;
+}
+
static void
define_encoding_constant(const char *name, rb_encoding_t *encoding)
{
@@ -291,6 +317,7 @@ Init_PreEncoding(void)
add_encoding(ENCODING_BIG5, ENCODING_TYPE_UCNV, "Big5", 1, false, true, "CP950", NULL);
// FIXME: the ICU conversion tables do not seem to match Ruby's Japanese conversion tables
add_encoding(ENCODING_EUCJP, ENCODING_TYPE_UCNV, "EUC-JP", 1, false, true, "eucJP", NULL);
+ add_encoding(ENCODING_SJIS, ENCODING_TYPE_UCNV, "Shift_JIS", 1, false, true, "SJIS", NULL);
//add_encoding(ENCODING_EUCJP, ENCODING_TYPE_RUBY, "EUC-JP", 1, false, true, "eucJP", NULL);
//add_encoding(ENCODING_SJIS, ENCODING_TYPE_RUBY, "Shift_JIS", 1, false, true, "SJIS", NULL);
//add_encoding(ENCODING_CP932, ENCODING_TYPE_RUBY, "Windows-31J", 1, false, true, "CP932", "csWindows31J", NULL);
View
@@ -150,7 +150,7 @@ enum {
ENCODING_MACCYRILLIC,
ENCODING_BIG5,
ENCODING_EUCJP,
- //ENCODING_SJIS,
+ ENCODING_SJIS,
//ENCODING_CP932,
ENCODINGS_COUNT
@@ -295,6 +295,40 @@ str_set_valid_encoding(rb_str_t *self, bool status)
STRING_VALID_ENCODING);
}
+typedef enum {
+ TRANSCODE_BEHAVIOR_RAISE_EXCEPTION,
+ TRANSCODE_BEHAVIOR_REPLACE_WITH_STRING,
+ TRANSCODE_BEHAVIOR_REPLACE_WITH_XML_TEXT,
+ TRANSCODE_BEHAVIOR_REPLACE_WITH_XML_ATTR
+} transcode_behavior_t;
+
+typedef enum {
+ ECONV_INVALID_MASK = 1,
+ ECONV_INVALID_REPLACE = 1 << 1,
+ ECONV_UNDEF_MASK = 1 << 2,
+ ECONV_UNDEF_REPLACE = 1 << 3,
+ ECONV_UNDEF_HEX_CHARREF = 1 << 4,
+ ECONV_PARTIAL_INPUT = 1 << 5,
+ ECONV_AFTER_OUTPUT = 1 << 6,
+ ECONV_UNIVERSAL_NEWLINE_DECORATOR = 1 << 7,
+ ECONV_CRLF_NEWLINE_DECORATOR = 1 << 8,
+ ECONV_CR_NEWLINE_DECORATOR = 1 << 9,
+ ECONV_XML_TEXT_DECORATOR = 1 << 10,
+ ECONV_XML_ATTR_CONTENT_DECORATOR = 1 << 11,
+ ECONV_XML_ATTR_QUOTE_DECORATOR = 1 << 12
+} transcode_flags_t;
+
+rb_str_t *str_transcode(rb_str_t *self, rb_encoding_t *src_encoding, rb_encoding_t *dst_encoding,
+ int behavior_for_invalid, int behavior_for_undefined, rb_str_t *replacement_str);
+
+static inline rb_str_t *
+str_simple_transcode(rb_str_t *self, rb_encoding_t *dst_encoding)
+{
+ return str_transcode(self, self->encoding, dst_encoding,
+ TRANSCODE_BEHAVIOR_RAISE_EXCEPTION, TRANSCODE_BEHAVIOR_RAISE_EXCEPTION, NULL);
+}
+
+
void rb_str_NSCoder_encode(void *coder, VALUE str, const char *key);
VALUE rb_str_NSCoder_decode(void *coder, const char *key);
@@ -321,6 +355,10 @@ unsigned long rb_str_hash_uchars(const UChar *chars, long chars_len);
long rb_uchar_strtol(UniChar *chars, long chars_len, long pos,
long *end_offset);
void rb_str_force_encoding(VALUE str, rb_encoding_t *encoding);
+rb_str_t *str_need_string(VALUE str);
+rb_str_t *replacement_string_for_encoding(rb_encoding_t* enc);
+void str_replace_with_string(rb_str_t *self, rb_str_t *source);
+
#if defined(__cplusplus)
} // extern "C"
View
@@ -58,6 +58,7 @@ void Init_ObjC(void);
void Init_BridgeSupport(void);
void Init_FFI(void);
void Init_Dispatch(void);
+void Init_Transcode(void);
void Init_PostVM(void);
void
@@ -110,5 +111,6 @@ rb_call_inits()
Init_BridgeSupport();
Init_FFI();
Init_Dispatch();
+ Init_Transcode();
Init_PostVM();
}
@@ -6,7 +6,7 @@
random range rational re ruby signal sprintf st string struct time
util variable version thread id objc bs ucnv encoding main dln dmyext marshal
gcd vm_eval gc-stub bridgesupport compiler dispatcher vm symbol debugger MacRuby
- MacRubyDebuggerConnector NSArray NSDictionary NSString
+ MacRubyDebuggerConnector NSArray NSDictionary NSString transcode
}
EXTENSIONS = %w{
@@ -1,7 +1,4 @@
-fails:Encoding::Converter.asciicompat_encoding accepts an encoding name as a String argument
fails:Encoding::Converter.asciicompat_encoding coerces non-String/Encoding objects with #to_str
fails:Encoding::Converter.asciicompat_encoding accepts an Encoding object as an argument
fails:Encoding::Converter.asciicompat_encoding returns a corresponding ASCII compatible encoding for ASCII-incompatible encodings
-fails:Encoding::Converter.asciicompat_encoding returns nil when the given encoding is ASCII compatible
fails:Encoding::Converter.asciicompat_encoding handles encoding names who resolve to nil encodings
-fails:Encoding::Converter.asciicompat_encoding returns nil if called with an encoding it returned previously
@@ -1,26 +0,0 @@
-fails:Encoding::Converter::INVALID_MASK exists
-fails:Encoding::Converter::INVALID_MASK has a Fixnum value
-fails:Encoding::Converter::INVALID_REPLACE exists
-fails:Encoding::Converter::INVALID_REPLACE has a Fixnum value
-fails:Encoding::Converter::UNDEF_MASK exists
-fails:Encoding::Converter::UNDEF_MASK has a Fixnum value
-fails:Encoding::Converter::UNDEF_REPLACE exists
-fails:Encoding::Converter::UNDEF_REPLACE has a Fixnum value
-fails:Encoding::Converter::UNDEF_HEX_CHARREF exists
-fails:Encoding::Converter::UNDEF_HEX_CHARREF has a Fixnum value
-fails:Encoding::Converter::PARTIAL_INPUT exists
-fails:Encoding::Converter::PARTIAL_INPUT has a Fixnum value
-fails:Encoding::Converter::AFTER_OUTPUT exists
-fails:Encoding::Converter::AFTER_OUTPUT has a Fixnum value
-fails:Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR exists
-fails:Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR has a Fixnum value
-fails:Encoding::Converter::CRLF_NEWLINE_DECORATOR exists
-fails:Encoding::Converter::CRLF_NEWLINE_DECORATOR has a Fixnum value
-fails:Encoding::Converter::CR_NEWLINE_DECORATOR exists
-fails:Encoding::Converter::CR_NEWLINE_DECORATOR has a Fixnum value
-fails:Encoding::Converter::XML_TEXT_DECORATOR exists
-fails:Encoding::Converter::XML_TEXT_DECORATOR has a Fixnum value
-fails:Encoding::Converter::XML_ATTR_CONTENT_DECORATOR exists
-fails:Encoding::Converter::XML_ATTR_CONTENT_DECORATOR has a Fixnum value
-fails:Encoding::Converter::XML_ATTR_QUOTE_DECORATOR exists
-fails:Encoding::Converter::XML_ATTR_QUOTE_DECORATOR has a Fixnum value
@@ -1,7 +1,2 @@
-fails:Encoding::Converter#convert returns a String
-fails:Encoding::Converter#convert sets the encoding of the result to the target encoding
-fails:Encoding::Converter#convert transcodes the given String to the target encoding
fails:Encoding::Converter#convert allows Strings of different encodings to the source encoding
-fails:Encoding::Converter#convert reuses the given encoding pair if called multiple times
-fails:Encoding::Converter#convert raises UndefinedConversionError if the String contains characters invalid for the target encoding
-fails:Encoding::Converter#convert raises an ArgumentError if called on a finished stream
+
@@ -1,7 +1,2 @@
-fails:Encoding::Converter#convpath returns an Array
-fails:Encoding::Converter#convpath returns each encoding pair as a sub-Array
-fails:Encoding::Converter#convpath returns each encoding as an Encoding object
fails:Encoding::Converter#convpath returns multiple encoding pairs when direct conversion is impossible
-fails:Encoding::Converter#convpath sets the last element of each pair to the first element of the next
-fails:Encoding::Converter#convpath only lists a source encoding once
fails:Encoding::Converter#convpath indicates if crlf_newline conversion would occur
@@ -1 +1 @@
-fails:Encoding::Converter#destination_encoding returns the destination encoding as an Encoding object
+
@@ -1,8 +1,3 @@
fails:Encoding::Converter#replacement returns '?' in US-ASCII when the destination encoding is not UTF-8
-fails:Encoding::Converter#replacement returns � when the destination encoding is UTF-8
-fails:Encoding::Converter#replacement= accepts a String argument
-fails:Encoding::Converter#replacement= accepts a String argument of arbitrary length
-fails:Encoding::Converter#replacement= raises an TypeError if assigned a non-String argument
-fails:Encoding::Converter#replacement= sets #replacement
fails:Encoding::Converter#replacement= raises an UndefinedConversionError is the argument cannot be converted into the destination encoding
fails:Encoding::Converter#replacement= does not change the replacement character if the argument cannot be converted into the destination encoding
@@ -1,8 +1,3 @@
-fails:Encoding::Converter.search_convpath returns an Array
-fails:Encoding::Converter.search_convpath returns each encoding pair as a sub-Array
-fails:Encoding::Converter.search_convpath returns each encoding as an Encoding object
fails:Encoding::Converter.search_convpath returns multiple encoding pairs when direct conversion is impossible
-fails:Encoding::Converter.search_convpath sets the last element of each pair to the first element of the next
-fails:Encoding::Converter.search_convpath only lists a source encoding once
fails:Encoding::Converter.search_convpath indicates if crlf_newline conversion would occur
fails:Encoding::Converter.search_convpath raises an Encoding::ConverterNotFoundError if no conversion path exists
@@ -1 +1 @@
-fails:Encoding::Converter#source_encoding returns the source encoding as an Encoding object
+
Oops, something went wrong. Retry.

0 comments on commit ffe45d2

Please sign in to comment.