Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 487 lines (430 sloc) 15.233 kb
96ab900 more work
Laurent Sansonetti authored
1 /*
2 * MacRuby implementation of Ruby 1.9 String.
3 *
4 * This file is covered by the Ruby license. See COPYING for more details.
5 *
6 * Copyright (C) 2007-2010, Apple Inc. All rights reserved.
7 * Copyright (C) 1993-2007 Yukihiro Matsumoto
8 * Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
9 * Copyright (C) 2000 Information-technology Promotion Agency, Japan
10 */
11
2b7d5d5 import vincent's work
Laurent Sansonetti authored
12 #include <string.h>
9c1d230 committing experimental branch content
Laurent Sansonetti authored
13
39b55f1 some work on string
Laurent Sansonetti authored
14 #include "ruby.h"
15 #include "ruby/encoding.h"
16 #include "encoding.h"
17
96ab900 more work
Laurent Sansonetti authored
18 VALUE rb_cEncoding;
8b9745b define Encoding::ASCII_8BIT as a shortcut to US_ASCII (for now)
Laurent Sansonetti authored
19
d0ac593 @vincentisambart an (incomplete) implementation of String#encode
vincentisambart authored
20 rb_encoding_t *default_internal = NULL;
96ab900 more work
Laurent Sansonetti authored
21 static rb_encoding_t *default_external = NULL;
22 rb_encoding_t *rb_encodings[ENCODINGS_COUNT];
9c1d230 committing experimental branch content
Laurent Sansonetti authored
23
96ab900 more work
Laurent Sansonetti authored
24 static void str_undefined_update_flags(rb_str_t *self) { abort(); }
25 static void str_undefined_make_data_binary(rb_str_t *self) { abort(); }
26 static bool str_undefined_try_making_data_uchars(rb_str_t *self) { abort(); }
27 static long str_undefined_length(rb_str_t *self, bool ucs2_mode) { abort(); }
28 static long str_undefined_bytesize(rb_str_t *self) { abort(); }
29 static character_boundaries_t str_undefined_get_character_boundaries(rb_str_t *self, long index, bool ucs2_mode) { abort(); }
30 static long str_undefined_offset_in_bytes_to_index(rb_str_t *self, long offset_in_bytes, bool ucs2_mode) { abort(); }
d0ac593 @vincentisambart an (incomplete) implementation of String#encode
vincentisambart authored
31 static void str_undefined_transcode_to_utf16(struct rb_encoding *src_enc, rb_str_t *self, long *pos, UChar **utf16, long *utf16_length) { abort(); }
32 static void str_undefined_transcode_from_utf16(struct rb_encoding *dst_enc, UChar *utf16, long utf16_length, long *pos, char **bytes, long *bytes_length) { abort(); }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
33
34 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
35 mr_enc_s_list(VALUE klass, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
36 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
37 VALUE ary = rb_ary_new2(ENCODINGS_COUNT);
38 for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
96ab900 more work
Laurent Sansonetti authored
39 rb_ary_push(ary, (VALUE)rb_encodings[i]);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
40 }
2b7d5d5 import vincent's work
Laurent Sansonetti authored
41 return ary;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
42 }
43
44 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
45 mr_enc_s_name_list(VALUE klass, SEL sel)
46 {
47 VALUE ary = rb_ary_new();
48 for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
96ab900 more work
Laurent Sansonetti authored
49 rb_encoding_t *encoding = RENC(rb_encodings[i]);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
50 // TODO: use US-ASCII strings
96ab900 more work
Laurent Sansonetti authored
51 rb_ary_push(ary, rb_usascii_str_new2(encoding->public_name));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
52 for (unsigned int j = 0; j < encoding->aliases_count; ++j) {
96ab900 more work
Laurent Sansonetti authored
53 rb_ary_push(ary, rb_usascii_str_new2(encoding->aliases[j]));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
54 }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
55 }
56 return ary;
57 }
58
59 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
60 mr_enc_s_aliases(VALUE klass, SEL sel)
61 {
62 VALUE hash = rb_hash_new();
63 for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
96ab900 more work
Laurent Sansonetti authored
64 rb_encoding_t *encoding = RENC(rb_encodings[i]);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
65 for (unsigned int j = 0; j < encoding->aliases_count; ++j) {
96ab900 more work
Laurent Sansonetti authored
66 rb_hash_aset(hash, rb_usascii_str_new2(encoding->aliases[j]),
67 rb_usascii_str_new2(encoding->public_name));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
68 }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
69 }
2b7d5d5 import vincent's work
Laurent Sansonetti authored
70 return hash;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
71 }
72
73 static VALUE
4ede652 added #find
Laurent Sansonetti authored
74 mr_enc_s_find(VALUE klass, SEL sel, VALUE name)
75 {
76 StringValue(name);
77 rb_encoding_t *enc = rb_enc_find(RSTRING_PTR(name));
78 if (enc == NULL) {
79 rb_raise(rb_eArgError, "unknown encoding name - %s",
80 RSTRING_PTR(name));
81 }
82 return (VALUE)enc;
83 }
84
85 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
86 mr_enc_s_default_internal(VALUE klass, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
87 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
88 return (VALUE)default_internal;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
89 }
90
91 static VALUE
5051413 added #default_external=, #default_internal=
Laurent Sansonetti authored
92 mr_enc_set_default_internal(VALUE klass, SEL sel, VALUE enc)
93 {
94 default_internal = rb_to_encoding(enc);
95 return (VALUE)default_internal;
96 }
97
98 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
99 mr_enc_s_default_external(VALUE klass, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
100 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
101 return (VALUE)default_external;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
102 }
103
104 static VALUE
5051413 added #default_external=, #default_internal=
Laurent Sansonetti authored
105 mr_enc_set_default_external(VALUE klass, SEL sel, VALUE enc)
106 {
107 default_external = rb_to_encoding(enc);
108 return (VALUE)default_external;
109 }
110
111 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
112 mr_enc_name(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
113 {
96ab900 more work
Laurent Sansonetti authored
114 return rb_usascii_str_new2(RENC(self)->public_name);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
115 }
116
117 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
118 mr_enc_inspect(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
119 {
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
120 return rb_sprintf("#<%s:%s>", rb_obj_classname(self),
96ab900 more work
Laurent Sansonetti authored
121 RENC(self)->public_name);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
122 }
123
124 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
125 mr_enc_names(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
126 {
96ab900 more work
Laurent Sansonetti authored
127 rb_encoding_t *encoding = RENC(self);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
128
2b7d5d5 import vincent's work
Laurent Sansonetti authored
129 VALUE ary = rb_ary_new2(encoding->aliases_count + 1);
96ab900 more work
Laurent Sansonetti authored
130 rb_ary_push(ary, rb_usascii_str_new2(encoding->public_name));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
131 for (unsigned int i = 0; i < encoding->aliases_count; ++i) {
96ab900 more work
Laurent Sansonetti authored
132 rb_ary_push(ary, rb_usascii_str_new2(encoding->aliases[i]));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
133 }
134 return ary;
1623532 added Encoding#default_external= and Encoding#default_internal= which…
Laurent Sansonetti authored
135 }
136
2b7d5d5 import vincent's work
Laurent Sansonetti authored
137 static VALUE
138 mr_enc_ascii_compatible_p(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
139 {
96ab900 more work
Laurent Sansonetti authored
140 return RENC(self)->ascii_compatible ? Qtrue : Qfalse;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
141 }
142
143 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
144 mr_enc_dummy_p(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
145 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
146 return Qfalse;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
147 }
148
ffe45d2 Add support for Encoding::Converter and move String#encode and String…
Patrick Thomson authored
149 // For UTF-[8, 16, 32] it's /uFFFD, and for others it's '?'
150 rb_str_t *replacement_string_for_encoding(rb_encoding_t* destination)
151 {
152 rb_str_t *replacement_str = NULL;
153 if (destination == rb_encodings[ENCODING_UTF16BE]) {
154 replacement_str = RSTR(rb_enc_str_new("\xFF\xFD", 2, destination));
155 }
156 else if (destination == rb_encodings[ENCODING_UTF32BE]) {
157 replacement_str = RSTR(rb_enc_str_new("\0\0\xFF\xFD", 4, destination));
158 }
159 else if (destination == rb_encodings[ENCODING_UTF16LE]) {
160 replacement_str = RSTR(rb_enc_str_new("\xFD\xFF", 2, destination));
161 }
162 else if (destination == rb_encodings[ENCODING_UTF32LE]) {
163 replacement_str = RSTR(rb_enc_str_new("\xFD\xFF\0\0", 4, destination));
164 }
165 else if (destination == rb_encodings[ENCODING_UTF8]) {
166 replacement_str = RSTR(rb_enc_str_new("\xEF\xBF\xBD", 3, destination));
167 }
168 else {
169 replacement_str = RSTR(rb_enc_str_new("?", 1, rb_encodings[ENCODING_ASCII]));
170 replacement_str = str_simple_transcode(replacement_str, destination);
171 }
172 return replacement_str;
173 }
174
2b7d5d5 import vincent's work
Laurent Sansonetti authored
175 static void
96ab900 more work
Laurent Sansonetti authored
176 define_encoding_constant(const char *name, rb_encoding_t *encoding)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
177 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
178 char c = name[0];
179 if ((c >= '0') && (c <= '9')) {
180 // constants can't start with a number
181 return;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
182 }
183
325c032 @vincentisambart also define the encoding constants in upper case
vincentisambart authored
184 if (strcmp(name, "locale") == 0) {
185 // there is no constant for locale
186 return;
187 }
188
2b7d5d5 import vincent's work
Laurent Sansonetti authored
189 char *name_copy = strdup(name);
190 if ((c >= 'a') && (c <= 'z')) {
191 // the first character must be upper case
192 name_copy[0] = c - ('a' - 'A');
193 }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
194
325c032 @vincentisambart also define the encoding constants in upper case
vincentisambart authored
195 bool has_lower_case = false;
2b7d5d5 import vincent's work
Laurent Sansonetti authored
196 // '.' and '-' must be transformed into '_'
197 for (int i = 0; name_copy[i]; ++i) {
198 if ((name_copy[i] == '.') || (name_copy[i] == '-')) {
199 name_copy[i] = '_';
023dd4d fixed Encoding#name for 10.6
Laurent Sansonetti authored
200 }
325c032 @vincentisambart also define the encoding constants in upper case
vincentisambart authored
201 else if ((name_copy[i] >= 'a') && (name_copy[i] <= 'z')) {
202 has_lower_case = true;
203 }
023dd4d fixed Encoding#name for 10.6
Laurent Sansonetti authored
204 }
b881853 s/MR//
Laurent Sansonetti authored
205 rb_define_const(rb_cEncoding, name_copy, (VALUE)encoding);
325c032 @vincentisambart also define the encoding constants in upper case
vincentisambart authored
206 // if the encoding name has lower case characters,
207 // also define it in upper case
208 if (has_lower_case) {
209 for (int i = 0; name_copy[i]; ++i) {
210 if ((name_copy[i] >= 'a') && (name_copy[i] <= 'z')) {
211 name_copy[i] = name_copy[i] - 'a' + 'A';
212 }
213 }
214 rb_define_const(rb_cEncoding, name_copy, (VALUE)encoding);
215 }
216
2b7d5d5 import vincent's work
Laurent Sansonetti authored
217 free(name_copy);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
218 }
219
96ab900 more work
Laurent Sansonetti authored
220 extern void enc_init_ucnv_encoding(rb_encoding_t *encoding);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
221
2b7d5d5 import vincent's work
Laurent Sansonetti authored
222 enum {
223 ENCODING_TYPE_SPECIAL = 0,
224 ENCODING_TYPE_UCNV
225 };
9c1d230 committing experimental branch content
Laurent Sansonetti authored
226
2b7d5d5 import vincent's work
Laurent Sansonetti authored
227 static void
228 add_encoding(
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
229 unsigned int encoding_index, // index of the encoding in the encodings
230 // array
96ab900 more work
Laurent Sansonetti authored
231 unsigned int rb_encoding_type,
2b7d5d5 import vincent's work
Laurent Sansonetti authored
232 const char *public_name, // public name for the encoding
233 unsigned char min_char_size,
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
234 bool single_byte_encoding, // in the encoding a character takes only
235 // one byte
2b7d5d5 import vincent's work
Laurent Sansonetti authored
236 bool ascii_compatible, // is the encoding ASCII compatible or not
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
237 ... // aliases for the encoding (should no include the public name)
238 // - must end with a NULL
2b7d5d5 import vincent's work
Laurent Sansonetti authored
239 )
240 {
241 assert(encoding_index < ENCODINGS_COUNT);
242
243 // create an array for the aliases
244 unsigned int aliases_count = 0;
245 va_list va_aliases;
246 va_start(va_aliases, ascii_compatible);
247 while (va_arg(va_aliases, const char *) != NULL) {
248 ++aliases_count;
249 }
250 va_end(va_aliases);
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
251 const char **aliases = (const char **)
252 malloc(sizeof(const char *) * aliases_count);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
253 va_start(va_aliases, ascii_compatible);
254 for (unsigned int i = 0; i < aliases_count; ++i) {
255 aliases[i] = va_arg(va_aliases, const char *);
256 }
257 va_end(va_aliases);
258
259 // create the MacRuby object
96ab900 more work
Laurent Sansonetti authored
260 NEWOBJ(encoding, rb_encoding_t);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
261 encoding->basic.flags = 0;
b881853 s/MR//
Laurent Sansonetti authored
262 encoding->basic.klass = rb_cEncoding;
96ab900 more work
Laurent Sansonetti authored
263 rb_encodings[encoding_index] = encoding;
264 GC_RETAIN(encoding); // it should never be deallocated
2b7d5d5 import vincent's work
Laurent Sansonetti authored
265
266 // fill the fields
267 encoding->index = encoding_index;
268 encoding->public_name = public_name;
269 encoding->min_char_size = min_char_size;
270 encoding->single_byte_encoding = single_byte_encoding;
271 encoding->ascii_compatible = ascii_compatible;
272 encoding->aliases_count = aliases_count;
273 encoding->aliases = aliases;
274
275 // fill the default implementations with aborts
276 encoding->methods.update_flags = str_undefined_update_flags;
277 encoding->methods.make_data_binary = str_undefined_make_data_binary;
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
278 encoding->methods.try_making_data_uchars =
279 str_undefined_try_making_data_uchars;
2b7d5d5 import vincent's work
Laurent Sansonetti authored
280 encoding->methods.length = str_undefined_length;
281 encoding->methods.bytesize = str_undefined_bytesize;
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
282 encoding->methods.get_character_boundaries =
283 str_undefined_get_character_boundaries;
284 encoding->methods.offset_in_bytes_to_index =
285 str_undefined_offset_in_bytes_to_index;
d0ac593 @vincentisambart an (incomplete) implementation of String#encode
vincentisambart authored
286 encoding->methods.transcode_to_utf16 =
287 str_undefined_transcode_to_utf16;
288 encoding->methods.transcode_from_utf16 =
289 str_undefined_transcode_from_utf16;
2b7d5d5 import vincent's work
Laurent Sansonetti authored
290
96ab900 more work
Laurent Sansonetti authored
291 switch (rb_encoding_type) {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
292 case ENCODING_TYPE_SPECIAL:
022cd7c fixed ByteString#encoding to always return US_ASCII (for now)
Laurent Sansonetti authored
293 break;
2b7d5d5 import vincent's work
Laurent Sansonetti authored
294 case ENCODING_TYPE_UCNV:
295 enc_init_ucnv_encoding(encoding);
296 break;
297 default:
298 abort();
9c1d230 committing experimental branch content
Laurent Sansonetti authored
299 }
300 }
301
ae4da82 more work
Laurent Sansonetti authored
302 // This Init function is called very early. Do not use any runtime method
303 // because things may not be initialized properly yet.
304 void
305 Init_PreEncoding(void)
2b7d5d5 import vincent's work
Laurent Sansonetti authored
306 {
0f75583 @vincentisambart added the encodings used in the specs to be able to remove tags
vincentisambart authored
307 add_encoding(ENCODING_BINARY, ENCODING_TYPE_SPECIAL, "ASCII-8BIT", 1, true, true, "BINARY", NULL);
308 add_encoding(ENCODING_ASCII, ENCODING_TYPE_UCNV, "US-ASCII", 1, true, true, "ASCII", "ANSI_X3.4-1968", "646", NULL);
4e2db64 Improves core/env pass rate: add 'locale' as an alias of UTF-8
Thibault Martin-Lagardette authored
309 add_encoding(ENCODING_UTF8, ENCODING_TYPE_UCNV, "UTF-8", 1, false, true, "CP65001", "locale", NULL);
0f75583 @vincentisambart added the encodings used in the specs to be able to remove tags
vincentisambart authored
310 add_encoding(ENCODING_UTF16BE, ENCODING_TYPE_UCNV, "UTF-16BE", 2, false, false, NULL);
311 add_encoding(ENCODING_UTF16LE, ENCODING_TYPE_UCNV, "UTF-16LE", 2, false, false, NULL);
312 add_encoding(ENCODING_UTF32BE, ENCODING_TYPE_UCNV, "UTF-32BE", 4, false, false, "UCS-4BE", NULL);
313 add_encoding(ENCODING_UTF32LE, ENCODING_TYPE_UCNV, "UTF-32LE", 4, false, false, "UCS-4LE", NULL);
314 add_encoding(ENCODING_ISO8859_1, ENCODING_TYPE_UCNV, "ISO-8859-1", 1, true, true, "ISO8859-1", NULL);
315 add_encoding(ENCODING_MACROMAN, ENCODING_TYPE_UCNV, "macRoman", 1, true, true, NULL);
316 add_encoding(ENCODING_MACCYRILLIC, ENCODING_TYPE_UCNV, "macCyrillic", 1, true, true, NULL);
317 add_encoding(ENCODING_BIG5, ENCODING_TYPE_UCNV, "Big5", 1, false, true, "CP950", NULL);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
318 // FIXME: the ICU conversion tables do not seem to match Ruby's Japanese conversion tables
0f75583 @vincentisambart added the encodings used in the specs to be able to remove tags
vincentisambart authored
319 add_encoding(ENCODING_EUCJP, ENCODING_TYPE_UCNV, "EUC-JP", 1, false, true, "eucJP", NULL);
ffe45d2 Add support for Encoding::Converter and move String#encode and String…
Patrick Thomson authored
320 add_encoding(ENCODING_SJIS, ENCODING_TYPE_UCNV, "Shift_JIS", 1, false, true, "SJIS", NULL);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
321 //add_encoding(ENCODING_EUCJP, ENCODING_TYPE_RUBY, "EUC-JP", 1, false, true, "eucJP", NULL);
322 //add_encoding(ENCODING_SJIS, ENCODING_TYPE_RUBY, "Shift_JIS", 1, false, true, "SJIS", NULL);
323 //add_encoding(ENCODING_CP932, ENCODING_TYPE_RUBY, "Windows-31J", 1, false, true, "CP932", "csWindows31J", NULL);
324
96ab900 more work
Laurent Sansonetti authored
325 default_external = rb_encodings[ENCODING_UTF8];
326 default_internal = rb_encodings[ENCODING_UTF8];
9c1d230 committing experimental branch content
Laurent Sansonetti authored
327 }
328
2b7d5d5 import vincent's work
Laurent Sansonetti authored
329 void
b881853 s/MR//
Laurent Sansonetti authored
330 Init_Encoding(void)
2b7d5d5 import vincent's work
Laurent Sansonetti authored
331 {
ae4da82 more work
Laurent Sansonetti authored
332 // rb_cEncoding is defined earlier in Init_PreVM().
333 rb_set_class_path(rb_cEncoding, rb_cObject, "Encoding");
334 rb_const_set(rb_cObject, rb_intern("Encoding"), rb_cEncoding);
335
b881853 s/MR//
Laurent Sansonetti authored
336 rb_undef_alloc_func(rb_cEncoding);
337
338 rb_objc_define_method(rb_cEncoding, "to_s", mr_enc_name, 0);
339 rb_objc_define_method(rb_cEncoding, "inspect", mr_enc_inspect, 0);
340 rb_objc_define_method(rb_cEncoding, "name", mr_enc_name, 0);
341 rb_objc_define_method(rb_cEncoding, "names", mr_enc_names, 0);
342 rb_objc_define_method(rb_cEncoding, "dummy?", mr_enc_dummy_p, 0);
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
343 rb_objc_define_method(rb_cEncoding, "ascii_compatible?",
344 mr_enc_ascii_compatible_p, 0);
4ede652 added #find
Laurent Sansonetti authored
345 rb_objc_define_method(*(VALUE *)rb_cEncoding, "list", mr_enc_s_list, 0);
346 rb_objc_define_method(*(VALUE *)rb_cEncoding, "name_list",
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
347 mr_enc_s_name_list, 0);
4ede652 added #find
Laurent Sansonetti authored
348 rb_objc_define_method(*(VALUE *)rb_cEncoding, "aliases",
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
349 mr_enc_s_aliases, 0);
4ede652 added #find
Laurent Sansonetti authored
350 rb_objc_define_method(*(VALUE *)rb_cEncoding, "find", mr_enc_s_find, 1);
351 rb_objc_define_method(*(VALUE *)rb_cEncoding, "compatible?",
39b55f1 some work on string
Laurent Sansonetti authored
352 mr_enc_s_is_compatible, 2); // in string.c
2b7d5d5 import vincent's work
Laurent Sansonetti authored
353
354 //rb_define_method(rb_cEncoding, "_dump", enc_dump, -1);
355 //rb_define_singleton_method(rb_cEncoding, "_load", enc_load, 1);
356
4ede652 added #find
Laurent Sansonetti authored
357 rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_external",
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
358 mr_enc_s_default_external, 0);
5051413 added #default_external=, #default_internal=
Laurent Sansonetti authored
359 rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_external=",
360 mr_enc_set_default_external, 1);
4ede652 added #find
Laurent Sansonetti authored
361 rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_internal",
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
362 mr_enc_s_default_internal, 0);
5051413 added #default_external=, #default_internal=
Laurent Sansonetti authored
363 rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_internal=",
364 mr_enc_set_default_internal, 1);
b881853 s/MR//
Laurent Sansonetti authored
365 //rb_define_singleton_method(rb_cEncoding, "locale_charmap", rb_locale_charmap, 0);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
366
ae4da82 more work
Laurent Sansonetti authored
367 // Create constants.
368 for (unsigned int i = 0; i < ENCODINGS_COUNT; i++) {
369 rb_encoding_t *enc = rb_encodings[i];
370 define_encoding_constant(enc->public_name, enc);
371 for (unsigned int j = 0; j < enc->aliases_count; j++) {
372 define_encoding_constant(enc->aliases[j], enc);
373 }
374 }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
375 }
96ab900 more work
Laurent Sansonetti authored
376
377 // MRI C-API compatibility.
378
379 rb_encoding_t *
380 rb_enc_find(const char *name)
381 {
382 for (unsigned int i = 0; i < ENCODINGS_COUNT; i++) {
383 rb_encoding_t *enc = rb_encodings[i];
384 if (strcasecmp(enc->public_name, name) == 0) {
385 return enc;
386 }
387 for (unsigned int j = 0; j < enc->aliases_count; j++) {
388 const char *alias = enc->aliases[j];
389 if (strcasecmp(alias, name) == 0) {
390 return enc;
391 }
392 }
393 }
394 return NULL;
395 }
396
397 VALUE
398 rb_enc_from_encoding(rb_encoding_t *enc)
399 {
400 return (VALUE)enc;
401 }
402
403 rb_encoding_t *
404 rb_enc_get(VALUE obj)
405 {
406 if (IS_RSTR(obj)) {
407 return RSTR(obj)->encoding;
408 }
409 // TODO support symbols
410 return NULL;
411 }
412
413 rb_encoding_t *
414 rb_to_encoding(VALUE obj)
415 {
416 rb_encoding_t *enc;
417 if (CLASS_OF(obj) == rb_cEncoding) {
418 enc = RENC(obj);
419 }
420 else {
421 StringValue(obj);
422 enc = rb_enc_find(RSTRING_PTR(obj));
423 if (enc == NULL) {
424 rb_raise(rb_eArgError, "unknown encoding name - %s",
425 RSTRING_PTR(obj));
426 }
427 }
428 return enc;
429 }
430
431 const char *
432 rb_enc_name(rb_encoding_t *enc)
433 {
434 return RENC(enc)->public_name;
435 }
436
437 VALUE
438 rb_enc_name2(rb_encoding_t *enc)
439 {
440 return rb_usascii_str_new2(rb_enc_name(enc));
441 }
442
443 long
444 rb_enc_mbminlen(rb_encoding_t *enc)
445 {
446 return enc->min_char_size;
447 }
448
449 long
450 rb_enc_mbmaxlen(rb_encoding_t *enc)
451 {
452 return enc->single_byte_encoding ? 1 : 10; // XXX 10?
453 }
454
4cd5f5e added missing MRI methods
Laurent Sansonetti authored
455 rb_encoding *
456 rb_ascii8bit_encoding(void)
457 {
458 return rb_encodings[ENCODING_BINARY];
459 }
460
461 rb_encoding *
462 rb_utf8_encoding(void)
463 {
464 return rb_encodings[ENCODING_UTF8];
465 }
466
467 rb_encoding *
468 rb_usascii_encoding(void)
469 {
470 return rb_encodings[ENCODING_ASCII];
471 }
472
96ab900 more work
Laurent Sansonetti authored
473 rb_encoding_t *
474 rb_locale_encoding(void)
475 {
476 // XXX
477 return rb_encodings[ENCODING_UTF8];
478 }
479
480 void
481 rb_enc_set_default_external(VALUE encoding)
482 {
483 assert(CLASS_OF(encoding) == rb_cEncoding);
484 default_external = RENC(encoding);
485 }
486
Something went wrong with that request. Please try again.