Permalink
Browse files

IO and Marshal now returning UTF8 strings

git-svn-id: http://svn.macosforge.org/repository/ruby/MacRuby/trunk@3782 23306eb0-4c56-4727-a40e-e92c0eb68959
  • Loading branch information...
1 parent 8eb1a6e commit 64b98245cd76b9d3ab747f5d253347afa396e815 @lrz lrz committed Mar 17, 2010
Showing with 33 additions and 16 deletions.
  1. +1 −0 encoding.h
  2. +11 −3 io.c
  3. +3 −1 marshal.c
  4. +18 −12 string.c
View
@@ -306,6 +306,7 @@ void rb_str_append_uchars(VALUE str, const UChar *chars, long len);
unsigned long rb_str_hash_uchars(const UChar *chars, long chars_len);
long rb_uchar_strtol(UniChar *chars, long chars_len, long pos,
long *end_offset);
+void rb_str_force_encoding(VALUE str, rb_encoding_t *encoding);
#if defined(__cplusplus)
} // extern "C"
View
14 io.c
@@ -1118,12 +1118,15 @@ io_read(VALUE io, SEL sel, int argc, VALUE *argv)
rb_io_t *io_struct = ExtractIOStruct(io);
rb_io_assert_readable(io_struct);
+ bool outbuf_created = false;
if (NIL_P(outbuf)) {
outbuf = rb_bstr_new();
+ outbuf_created = true;
}
if (NIL_P(len)) {
- return rb_io_read_all(io_struct, outbuf);
+ rb_io_read_all(io_struct, outbuf);
+ goto bail;
}
const long size = FIX2LONG(len);
@@ -1148,6 +1151,10 @@ io_read(VALUE io, SEL sel, int argc, VALUE *argv)
}
rb_bstr_set_length(outbuf, data_read);
+bail:
+ if (outbuf_created) {
+ rb_str_force_encoding(outbuf, rb_encodings[ENCODING_UTF8]);
+ }
return outbuf;
}
@@ -1365,6 +1372,7 @@ rb_io_gets_m(VALUE io, SEL sel, int argc, VALUE *argv)
OBJ_TAINT(bstr);
io_struct->lineno += 1;
ARGF.lineno = INT2FIX(io_struct->lineno);
+ rb_str_force_encoding(bstr, rb_encodings[ENCODING_UTF8]);
return bstr;
}
@@ -3346,7 +3354,7 @@ rb_f_backquote(VALUE obj, SEL sel, VALUE str)
VALUE outbuf = rb_bstr_new();
rb_io_read_all(ExtractIOStruct(io), outbuf);
rb_io_close(io);
-
+ rb_str_force_encoding(outbuf, rb_encodings[ENCODING_UTF8]);
return outbuf;
}
@@ -3852,7 +3860,7 @@ rb_io_s_readlines(VALUE recv, SEL sel, int argc, VALUE *argv)
void *ptr;
while ((ptr = memchr(&bytes[pos], byte, length - pos)) != NULL) {
const long s = (long)ptr - (long)&bytes[pos] + 1;
- rb_ary_push(ary, rb_bstr_new_with_data(&bytes[pos], s));
+ rb_ary_push(ary, rb_str_new((char *)&bytes[pos], s));
pos += s;
}
}
View
@@ -1023,7 +1023,9 @@ marshal_dump(VALUE self, SEL sel, int argc, VALUE *argv)
if (got_io) {
rb_io_write(port, 0, arg->str);
}
-
+ else {
+ rb_str_force_encoding(port, rb_encodings[ENCODING_UTF8]);
+ }
return port;
}
View
@@ -1533,22 +1533,28 @@ rstr_setbyte(VALUE self, SEL sel, VALUE index, VALUE value)
* Changes the encoding to +encoding+ and returns self.
*/
-static VALUE
-rstr_force_encoding(VALUE self, SEL sel, VALUE encoding)
+void
+rb_str_force_encoding(VALUE str, rb_encoding_t *enc)
{
- rstr_modify(self);
- rb_encoding_t *enc = rb_to_encoding(encoding);
- if (enc != RSTR(self)->encoding) {
- str_make_data_binary(RSTR(self));
- if (NATIVE_UTF16_ENC(RSTR(self)->encoding)) {
- str_set_stored_in_uchars(RSTR(self), false);
+ assert(IS_RSTR(str));
+ if (enc != RSTR(str)->encoding) {
+ str_make_data_binary(RSTR(str));
+ if (NATIVE_UTF16_ENC(RSTR(str)->encoding)) {
+ str_set_stored_in_uchars(RSTR(str), false);
}
- RSTR(self)->encoding = enc;
- str_unset_facultative_flags(RSTR(self));
- if (NATIVE_UTF16_ENC(RSTR(self)->encoding)) {
- str_set_stored_in_uchars(RSTR(self), true);
+ RSTR(str)->encoding = enc;
+ str_unset_facultative_flags(RSTR(str));
+ if (NATIVE_UTF16_ENC(RSTR(str)->encoding)) {
+ str_set_stored_in_uchars(RSTR(str), true);
}
}
+}
+
+static VALUE
+rstr_force_encoding(VALUE self, SEL sel, VALUE encoding)
+{
+ rstr_modify(self);
+ rb_str_force_encoding(self, rb_to_encoding(encoding));
return self;
}

0 comments on commit 64b9824

Please sign in to comment.