Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 563 lines (498 sloc) 16.616 kB
96ab900 more work
Laurent Sansonetti authored
1 /*
2 * MacRuby implementation of Ruby 1.9 String.
3 *
4 * This file is covered by the Ruby license. See COPYING for more details.
5 *
6 * Copyright (C) 2007-2010, Apple Inc. All rights reserved.
7 * Copyright (C) 1993-2007 Yukihiro Matsumoto
8 * Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
9 * Copyright (C) 2000 Information-technology Promotion Agency, Japan
10 */
11
2b7d5d5 import vincent's work
Laurent Sansonetti authored
12 #include <string.h>
9c1d230 committing experimental branch content
Laurent Sansonetti authored
13
468a2ea Move Obj-C related headers around.
Thibault Martin-Lagardette authored
14 #include "ruby/macruby.h"
39b55f1 some work on string
Laurent Sansonetti authored
15 #include "ruby/encoding.h"
16 #include "encoding.h"
17
96ab900 more work
Laurent Sansonetti authored
18 VALUE rb_cEncoding;
8b9745b define Encoding::ASCII_8BIT as a shortcut to US_ASCII (for now)
Laurent Sansonetti authored
19
d0ac593 @vincentisambart an (incomplete) implementation of String#encode
vincentisambart authored
20 rb_encoding_t *default_internal = NULL;
96ab900 more work
Laurent Sansonetti authored
21 static rb_encoding_t *default_external = NULL;
22 rb_encoding_t *rb_encodings[ENCODINGS_COUNT];
9c1d230 committing experimental branch content
Laurent Sansonetti authored
23
96ab900 more work
Laurent Sansonetti authored
24 static void str_undefined_update_flags(rb_str_t *self) { abort(); }
25 static void str_undefined_make_data_binary(rb_str_t *self) { abort(); }
26 static bool str_undefined_try_making_data_uchars(rb_str_t *self) { abort(); }
27 static long str_undefined_length(rb_str_t *self, bool ucs2_mode) { abort(); }
28 static long str_undefined_bytesize(rb_str_t *self) { abort(); }
29 static character_boundaries_t str_undefined_get_character_boundaries(rb_str_t *self, long index, bool ucs2_mode) { abort(); }
30 static long str_undefined_offset_in_bytes_to_index(rb_str_t *self, long offset_in_bytes, bool ucs2_mode) { abort(); }
d0ac593 @vincentisambart an (incomplete) implementation of String#encode
vincentisambart authored
31 static void str_undefined_transcode_to_utf16(struct rb_encoding *src_enc, rb_str_t *self, long *pos, UChar **utf16, long *utf16_length) { abort(); }
32 static void str_undefined_transcode_from_utf16(struct rb_encoding *dst_enc, UChar *utf16, long utf16_length, long *pos, char **bytes, long *bytes_length) { abort(); }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
33
34 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
35 mr_enc_s_list(VALUE klass, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
36 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
37 VALUE ary = rb_ary_new2(ENCODINGS_COUNT);
38 for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
96ab900 more work
Laurent Sansonetti authored
39 rb_ary_push(ary, (VALUE)rb_encodings[i]);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
40 }
2b7d5d5 import vincent's work
Laurent Sansonetti authored
41 return ary;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
42 }
43
44 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
45 mr_enc_s_name_list(VALUE klass, SEL sel)
46 {
47 VALUE ary = rb_ary_new();
48 for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
96ab900 more work
Laurent Sansonetti authored
49 rb_encoding_t *encoding = RENC(rb_encodings[i]);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
50 // TODO: use US-ASCII strings
96ab900 more work
Laurent Sansonetti authored
51 rb_ary_push(ary, rb_usascii_str_new2(encoding->public_name));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
52 for (unsigned int j = 0; j < encoding->aliases_count; ++j) {
96ab900 more work
Laurent Sansonetti authored
53 rb_ary_push(ary, rb_usascii_str_new2(encoding->aliases[j]));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
54 }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
55 }
56 return ary;
57 }
58
59 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
60 mr_enc_s_aliases(VALUE klass, SEL sel)
61 {
62 VALUE hash = rb_hash_new();
63 for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
96ab900 more work
Laurent Sansonetti authored
64 rb_encoding_t *encoding = RENC(rb_encodings[i]);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
65 for (unsigned int j = 0; j < encoding->aliases_count; ++j) {
96ab900 more work
Laurent Sansonetti authored
66 rb_hash_aset(hash, rb_usascii_str_new2(encoding->aliases[j]),
67 rb_usascii_str_new2(encoding->public_name));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
68 }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
69 }
2b7d5d5 import vincent's work
Laurent Sansonetti authored
70 return hash;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
71 }
72
73 static VALUE
4ede652 added #find
Laurent Sansonetti authored
74 mr_enc_s_find(VALUE klass, SEL sel, VALUE name)
75 {
76 StringValue(name);
77 rb_encoding_t *enc = rb_enc_find(RSTRING_PTR(name));
78 if (enc == NULL) {
79 rb_raise(rb_eArgError, "unknown encoding name - %s",
80 RSTRING_PTR(name));
81 }
82 return (VALUE)enc;
83 }
84
85 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
86 mr_enc_s_default_internal(VALUE klass, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
87 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
88 return (VALUE)default_internal;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
89 }
90
91 static VALUE
5051413 added #default_external=, #default_internal=
Laurent Sansonetti authored
92 mr_enc_set_default_internal(VALUE klass, SEL sel, VALUE enc)
93 {
94 default_internal = rb_to_encoding(enc);
95 return (VALUE)default_internal;
96 }
97
98 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
99 mr_enc_s_default_external(VALUE klass, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
100 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
101 return (VALUE)default_external;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
102 }
103
104 static VALUE
5051413 added #default_external=, #default_internal=
Laurent Sansonetti authored
105 mr_enc_set_default_external(VALUE klass, SEL sel, VALUE enc)
106 {
107 default_external = rb_to_encoding(enc);
108 return (VALUE)default_external;
109 }
110
111 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
112 mr_enc_name(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
113 {
96ab900 more work
Laurent Sansonetti authored
114 return rb_usascii_str_new2(RENC(self)->public_name);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
115 }
116
117 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
118 mr_enc_inspect(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
119 {
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
120 return rb_sprintf("#<%s:%s>", rb_obj_classname(self),
96ab900 more work
Laurent Sansonetti authored
121 RENC(self)->public_name);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
122 }
123
124 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
125 mr_enc_names(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
126 {
96ab900 more work
Laurent Sansonetti authored
127 rb_encoding_t *encoding = RENC(self);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
128
2b7d5d5 import vincent's work
Laurent Sansonetti authored
129 VALUE ary = rb_ary_new2(encoding->aliases_count + 1);
96ab900 more work
Laurent Sansonetti authored
130 rb_ary_push(ary, rb_usascii_str_new2(encoding->public_name));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
131 for (unsigned int i = 0; i < encoding->aliases_count; ++i) {
96ab900 more work
Laurent Sansonetti authored
132 rb_ary_push(ary, rb_usascii_str_new2(encoding->aliases[i]));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
133 }
134 return ary;
1623532 added Encoding#default_external= and Encoding#default_internal= which…
Laurent Sansonetti authored
135 }
136
2b7d5d5 import vincent's work
Laurent Sansonetti authored
137 static VALUE
138 mr_enc_ascii_compatible_p(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
139 {
96ab900 more work
Laurent Sansonetti authored
140 return RENC(self)->ascii_compatible ? Qtrue : Qfalse;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
141 }
142
143 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
144 mr_enc_dummy_p(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
145 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
146 return Qfalse;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
147 }
148
ffe45d2 Add support for Encoding::Converter and move String#encode and String…
Patrick Thomson authored
149 // For UTF-[8, 16, 32] it's /uFFFD, and for others it's '?'
150 rb_str_t *replacement_string_for_encoding(rb_encoding_t* destination)
151 {
152 rb_str_t *replacement_str = NULL;
153 if (destination == rb_encodings[ENCODING_UTF16BE]) {
154 replacement_str = RSTR(rb_enc_str_new("\xFF\xFD", 2, destination));
155 }
156 else if (destination == rb_encodings[ENCODING_UTF32BE]) {
157 replacement_str = RSTR(rb_enc_str_new("\0\0\xFF\xFD", 4, destination));
158 }
159 else if (destination == rb_encodings[ENCODING_UTF16LE]) {
160 replacement_str = RSTR(rb_enc_str_new("\xFD\xFF", 2, destination));
161 }
162 else if (destination == rb_encodings[ENCODING_UTF32LE]) {
163 replacement_str = RSTR(rb_enc_str_new("\xFD\xFF\0\0", 4, destination));
164 }
165 else if (destination == rb_encodings[ENCODING_UTF8]) {
166 replacement_str = RSTR(rb_enc_str_new("\xEF\xBF\xBD", 3, destination));
167 }
168 else {
169 replacement_str = RSTR(rb_enc_str_new("?", 1, rb_encodings[ENCODING_ASCII]));
170 replacement_str = str_simple_transcode(replacement_str, destination);
171 }
172 return replacement_str;
173 }
174
2b7d5d5 import vincent's work
Laurent Sansonetti authored
175 static void
96ab900 more work
Laurent Sansonetti authored
176 define_encoding_constant(const char *name, rb_encoding_t *encoding)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
177 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
178 char c = name[0];
179 if ((c >= '0') && (c <= '9')) {
180 // constants can't start with a number
181 return;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
182 }
183
325c032 @vincentisambart also define the encoding constants in upper case
vincentisambart authored
184 if (strcmp(name, "locale") == 0) {
185 // there is no constant for locale
186 return;
187 }
188
2b7d5d5 import vincent's work
Laurent Sansonetti authored
189 char *name_copy = strdup(name);
190 if ((c >= 'a') && (c <= 'z')) {
191 // the first character must be upper case
192 name_copy[0] = c - ('a' - 'A');
193 }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
194
325c032 @vincentisambart also define the encoding constants in upper case
vincentisambart authored
195 bool has_lower_case = false;
2b7d5d5 import vincent's work
Laurent Sansonetti authored
196 // '.' and '-' must be transformed into '_'
197 for (int i = 0; name_copy[i]; ++i) {
198 if ((name_copy[i] == '.') || (name_copy[i] == '-')) {
199 name_copy[i] = '_';
023dd4d fixed Encoding#name for 10.6
Laurent Sansonetti authored
200 }
325c032 @vincentisambart also define the encoding constants in upper case
vincentisambart authored
201 else if ((name_copy[i] >= 'a') && (name_copy[i] <= 'z')) {
202 has_lower_case = true;
203 }
023dd4d fixed Encoding#name for 10.6
Laurent Sansonetti authored
204 }
b881853 s/MR//
Laurent Sansonetti authored
205 rb_define_const(rb_cEncoding, name_copy, (VALUE)encoding);
325c032 @vincentisambart also define the encoding constants in upper case
vincentisambart authored
206 // if the encoding name has lower case characters,
207 // also define it in upper case
208 if (has_lower_case) {
209 for (int i = 0; name_copy[i]; ++i) {
210 if ((name_copy[i] >= 'a') && (name_copy[i] <= 'z')) {
211 name_copy[i] = name_copy[i] - 'a' + 'A';
212 }
213 }
214 rb_define_const(rb_cEncoding, name_copy, (VALUE)encoding);
215 }
216
2b7d5d5 import vincent's work
Laurent Sansonetti authored
217 free(name_copy);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
218 }
219
96ab900 more work
Laurent Sansonetti authored
220 extern void enc_init_ucnv_encoding(rb_encoding_t *encoding);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
221
2b7d5d5 import vincent's work
Laurent Sansonetti authored
222 enum {
223 ENCODING_TYPE_SPECIAL = 0,
224 ENCODING_TYPE_UCNV
225 };
9c1d230 committing experimental branch content
Laurent Sansonetti authored
226
2b7d5d5 import vincent's work
Laurent Sansonetti authored
227 static void
228 add_encoding(
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
229 unsigned int encoding_index, // index of the encoding in the encodings
230 // array
96ab900 more work
Laurent Sansonetti authored
231 unsigned int rb_encoding_type,
2b7d5d5 import vincent's work
Laurent Sansonetti authored
232 const char *public_name, // public name for the encoding
233 unsigned char min_char_size,
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
234 bool single_byte_encoding, // in the encoding a character takes only
235 // one byte
2b7d5d5 import vincent's work
Laurent Sansonetti authored
236 bool ascii_compatible, // is the encoding ASCII compatible or not
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
237 ... // aliases for the encoding (should no include the public name)
238 // - must end with a NULL
2b7d5d5 import vincent's work
Laurent Sansonetti authored
239 )
240 {
241 assert(encoding_index < ENCODINGS_COUNT);
242
243 // create an array for the aliases
244 unsigned int aliases_count = 0;
245 va_list va_aliases;
246 va_start(va_aliases, ascii_compatible);
247 while (va_arg(va_aliases, const char *) != NULL) {
248 ++aliases_count;
249 }
250 va_end(va_aliases);
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
251 const char **aliases = (const char **)
252 malloc(sizeof(const char *) * aliases_count);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
253 va_start(va_aliases, ascii_compatible);
254 for (unsigned int i = 0; i < aliases_count; ++i) {
255 aliases[i] = va_arg(va_aliases, const char *);
256 }
257 va_end(va_aliases);
258
259 // create the MacRuby object
96ab900 more work
Laurent Sansonetti authored
260 NEWOBJ(encoding, rb_encoding_t);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
261 encoding->basic.flags = 0;
b881853 s/MR//
Laurent Sansonetti authored
262 encoding->basic.klass = rb_cEncoding;
96ab900 more work
Laurent Sansonetti authored
263 rb_encodings[encoding_index] = encoding;
264 GC_RETAIN(encoding); // it should never be deallocated
2b7d5d5 import vincent's work
Laurent Sansonetti authored
265
266 // fill the fields
267 encoding->index = encoding_index;
268 encoding->public_name = public_name;
269 encoding->min_char_size = min_char_size;
270 encoding->single_byte_encoding = single_byte_encoding;
271 encoding->ascii_compatible = ascii_compatible;
272 encoding->aliases_count = aliases_count;
273 encoding->aliases = aliases;
274
275 // fill the default implementations with aborts
276 encoding->methods.update_flags = str_undefined_update_flags;
277 encoding->methods.make_data_binary = str_undefined_make_data_binary;
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
278 encoding->methods.try_making_data_uchars =
279 str_undefined_try_making_data_uchars;
2b7d5d5 import vincent's work
Laurent Sansonetti authored
280 encoding->methods.length = str_undefined_length;
281 encoding->methods.bytesize = str_undefined_bytesize;
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
282 encoding->methods.get_character_boundaries =
283 str_undefined_get_character_boundaries;
284 encoding->methods.offset_in_bytes_to_index =
285 str_undefined_offset_in_bytes_to_index;
d0ac593 @vincentisambart an (incomplete) implementation of String#encode
vincentisambart authored
286 encoding->methods.transcode_to_utf16 =
287 str_undefined_transcode_to_utf16;
288 encoding->methods.transcode_from_utf16 =
289 str_undefined_transcode_from_utf16;
2b7d5d5 import vincent's work
Laurent Sansonetti authored
290
96ab900 more work
Laurent Sansonetti authored
291 switch (rb_encoding_type) {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
292 case ENCODING_TYPE_SPECIAL:
022cd7c fixed ByteString#encoding to always return US_ASCII (for now)
Laurent Sansonetti authored
293 break;
2b7d5d5 import vincent's work
Laurent Sansonetti authored
294 case ENCODING_TYPE_UCNV:
295 enc_init_ucnv_encoding(encoding);
296 break;
297 default:
298 abort();
9c1d230 committing experimental branch content
Laurent Sansonetti authored
299 }
300 }
301
ae4da82 more work
Laurent Sansonetti authored
302 // This Init function is called very early. Do not use any runtime method
303 // because things may not be initialized properly yet.
304 void
305 Init_PreEncoding(void)
2b7d5d5 import vincent's work
Laurent Sansonetti authored
306 {
0f75583 @vincentisambart added the encodings used in the specs to be able to remove tags
vincentisambart authored
307 add_encoding(ENCODING_BINARY, ENCODING_TYPE_SPECIAL, "ASCII-8BIT", 1, true, true, "BINARY", NULL);
308 add_encoding(ENCODING_ASCII, ENCODING_TYPE_UCNV, "US-ASCII", 1, true, true, "ASCII", "ANSI_X3.4-1968", "646", NULL);
4e2db64 Improves core/env pass rate: add 'locale' as an alias of UTF-8
Thibault Martin-Lagardette authored
309 add_encoding(ENCODING_UTF8, ENCODING_TYPE_UCNV, "UTF-8", 1, false, true, "CP65001", "locale", NULL);
0f75583 @vincentisambart added the encodings used in the specs to be able to remove tags
vincentisambart authored
310 add_encoding(ENCODING_UTF16BE, ENCODING_TYPE_UCNV, "UTF-16BE", 2, false, false, NULL);
311 add_encoding(ENCODING_UTF16LE, ENCODING_TYPE_UCNV, "UTF-16LE", 2, false, false, NULL);
312 add_encoding(ENCODING_UTF32BE, ENCODING_TYPE_UCNV, "UTF-32BE", 4, false, false, "UCS-4BE", NULL);
313 add_encoding(ENCODING_UTF32LE, ENCODING_TYPE_UCNV, "UTF-32LE", 4, false, false, "UCS-4LE", NULL);
314 add_encoding(ENCODING_ISO8859_1, ENCODING_TYPE_UCNV, "ISO-8859-1", 1, true, true, "ISO8859-1", NULL);
315 add_encoding(ENCODING_MACROMAN, ENCODING_TYPE_UCNV, "macRoman", 1, true, true, NULL);
316 add_encoding(ENCODING_MACCYRILLIC, ENCODING_TYPE_UCNV, "macCyrillic", 1, true, true, NULL);
317 add_encoding(ENCODING_BIG5, ENCODING_TYPE_UCNV, "Big5", 1, false, true, "CP950", NULL);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
318 // FIXME: the ICU conversion tables do not seem to match Ruby's Japanese conversion tables
0f75583 @vincentisambart added the encodings used in the specs to be able to remove tags
vincentisambart authored
319 add_encoding(ENCODING_EUCJP, ENCODING_TYPE_UCNV, "EUC-JP", 1, false, true, "eucJP", NULL);
ffe45d2 Add support for Encoding::Converter and move String#encode and String…
Patrick Thomson authored
320 add_encoding(ENCODING_SJIS, ENCODING_TYPE_UCNV, "Shift_JIS", 1, false, true, "SJIS", NULL);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
321 //add_encoding(ENCODING_EUCJP, ENCODING_TYPE_RUBY, "EUC-JP", 1, false, true, "eucJP", NULL);
322 //add_encoding(ENCODING_SJIS, ENCODING_TYPE_RUBY, "Shift_JIS", 1, false, true, "SJIS", NULL);
323 //add_encoding(ENCODING_CP932, ENCODING_TYPE_RUBY, "Windows-31J", 1, false, true, "CP932", "csWindows31J", NULL);
324
96ab900 more work
Laurent Sansonetti authored
325 default_external = rb_encodings[ENCODING_UTF8];
326 default_internal = rb_encodings[ENCODING_UTF8];
9c1d230 committing experimental branch content
Laurent Sansonetti authored
327 }
328
2b7d5d5 import vincent's work
Laurent Sansonetti authored
329 void
b881853 s/MR//
Laurent Sansonetti authored
330 Init_Encoding(void)
2b7d5d5 import vincent's work
Laurent Sansonetti authored
331 {
ae4da82 more work
Laurent Sansonetti authored
332 // rb_cEncoding is defined earlier in Init_PreVM().
333 rb_set_class_path(rb_cEncoding, rb_cObject, "Encoding");
334 rb_const_set(rb_cObject, rb_intern("Encoding"), rb_cEncoding);
335
b881853 s/MR//
Laurent Sansonetti authored
336 rb_undef_alloc_func(rb_cEncoding);
337
338 rb_objc_define_method(rb_cEncoding, "to_s", mr_enc_name, 0);
339 rb_objc_define_method(rb_cEncoding, "inspect", mr_enc_inspect, 0);
340 rb_objc_define_method(rb_cEncoding, "name", mr_enc_name, 0);
341 rb_objc_define_method(rb_cEncoding, "names", mr_enc_names, 0);
342 rb_objc_define_method(rb_cEncoding, "dummy?", mr_enc_dummy_p, 0);
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
343 rb_objc_define_method(rb_cEncoding, "ascii_compatible?",
344 mr_enc_ascii_compatible_p, 0);
4ede652 added #find
Laurent Sansonetti authored
345 rb_objc_define_method(*(VALUE *)rb_cEncoding, "list", mr_enc_s_list, 0);
346 rb_objc_define_method(*(VALUE *)rb_cEncoding, "name_list",
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
347 mr_enc_s_name_list, 0);
4ede652 added #find
Laurent Sansonetti authored
348 rb_objc_define_method(*(VALUE *)rb_cEncoding, "aliases",
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
349 mr_enc_s_aliases, 0);
4ede652 added #find
Laurent Sansonetti authored
350 rb_objc_define_method(*(VALUE *)rb_cEncoding, "find", mr_enc_s_find, 1);
351 rb_objc_define_method(*(VALUE *)rb_cEncoding, "compatible?",
39b55f1 some work on string
Laurent Sansonetti authored
352 mr_enc_s_is_compatible, 2); // in string.c
2b7d5d5 import vincent's work
Laurent Sansonetti authored
353
354 //rb_define_method(rb_cEncoding, "_dump", enc_dump, -1);
355 //rb_define_singleton_method(rb_cEncoding, "_load", enc_load, 1);
356
4ede652 added #find
Laurent Sansonetti authored
357 rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_external",
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
358 mr_enc_s_default_external, 0);
5051413 added #default_external=, #default_internal=
Laurent Sansonetti authored
359 rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_external=",
360 mr_enc_set_default_external, 1);
4ede652 added #find
Laurent Sansonetti authored
361 rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_internal",
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
362 mr_enc_s_default_internal, 0);
5051413 added #default_external=, #default_internal=
Laurent Sansonetti authored
363 rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_internal=",
364 mr_enc_set_default_internal, 1);
b881853 s/MR//
Laurent Sansonetti authored
365 //rb_define_singleton_method(rb_cEncoding, "locale_charmap", rb_locale_charmap, 0);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
366
ae4da82 more work
Laurent Sansonetti authored
367 // Create constants.
368 for (unsigned int i = 0; i < ENCODINGS_COUNT; i++) {
369 rb_encoding_t *enc = rb_encodings[i];
370 define_encoding_constant(enc->public_name, enc);
371 for (unsigned int j = 0; j < enc->aliases_count; j++) {
372 define_encoding_constant(enc->aliases[j], enc);
373 }
374 }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
375 }
96ab900 more work
Laurent Sansonetti authored
376
377 // MRI C-API compatibility.
378
379 rb_encoding_t *
380 rb_enc_find(const char *name)
381 {
382 for (unsigned int i = 0; i < ENCODINGS_COUNT; i++) {
383 rb_encoding_t *enc = rb_encodings[i];
384 if (strcasecmp(enc->public_name, name) == 0) {
385 return enc;
386 }
387 for (unsigned int j = 0; j < enc->aliases_count; j++) {
388 const char *alias = enc->aliases[j];
389 if (strcasecmp(alias, name) == 0) {
390 return enc;
391 }
392 }
393 }
394 return NULL;
395 }
396
397 VALUE
398 rb_enc_from_encoding(rb_encoding_t *enc)
399 {
400 return (VALUE)enc;
401 }
402
403 rb_encoding_t *
404 rb_enc_get(VALUE obj)
405 {
406 if (IS_RSTR(obj)) {
407 return RSTR(obj)->encoding;
408 }
409 // TODO support symbols
410 return NULL;
411 }
412
413 rb_encoding_t *
414 rb_to_encoding(VALUE obj)
415 {
416 rb_encoding_t *enc;
417 if (CLASS_OF(obj) == rb_cEncoding) {
418 enc = RENC(obj);
419 }
420 else {
421 StringValue(obj);
422 enc = rb_enc_find(RSTRING_PTR(obj));
423 if (enc == NULL) {
424 rb_raise(rb_eArgError, "unknown encoding name - %s",
425 RSTRING_PTR(obj));
426 }
427 }
428 return enc;
429 }
430
431 const char *
432 rb_enc_name(rb_encoding_t *enc)
433 {
434 return RENC(enc)->public_name;
435 }
436
437 VALUE
438 rb_enc_name2(rb_encoding_t *enc)
439 {
440 return rb_usascii_str_new2(rb_enc_name(enc));
441 }
442
443 long
444 rb_enc_mbminlen(rb_encoding_t *enc)
445 {
446 return enc->min_char_size;
447 }
448
449 long
450 rb_enc_mbmaxlen(rb_encoding_t *enc)
451 {
452 return enc->single_byte_encoding ? 1 : 10; // XXX 10?
453 }
454
4cd5f5e added missing MRI methods
Laurent Sansonetti authored
455 rb_encoding *
456 rb_ascii8bit_encoding(void)
457 {
458 return rb_encodings[ENCODING_BINARY];
459 }
460
461 rb_encoding *
462 rb_utf8_encoding(void)
463 {
464 return rb_encodings[ENCODING_UTF8];
465 }
466
467 rb_encoding *
468 rb_usascii_encoding(void)
469 {
470 return rb_encodings[ENCODING_ASCII];
471 }
472
96ab900 more work
Laurent Sansonetti authored
473 rb_encoding_t *
474 rb_locale_encoding(void)
475 {
476 // XXX
477 return rb_encodings[ENCODING_UTF8];
478 }
479
480 void
481 rb_enc_set_default_external(VALUE encoding)
482 {
483 assert(CLASS_OF(encoding) == rb_cEncoding);
484 default_external = RENC(encoding);
485 }
486
508b43f @jballanc Implement a few more of the MRI encoding APIs
jballanc authored
487 rb_encoding *
488 rb_default_internal_encoding(void)
489 {
490 return (rb_encoding *)default_internal;
491 }
492
0202977 implement some of the MRI encoding index APIs
Laurent Sansonetti authored
493 static int
494 index_of_encoding(rb_encoding_t *enc)
495 {
496 if (enc != NULL) {
497 for (int i = 0; i <ENCODINGS_COUNT; i++) {
498 if (rb_encodings[i] == enc) {
499 return i;
500 }
501 }
502 }
503 return -1;
504 }
505
506 int
507 rb_enc_get_index(VALUE obj)
508 {
509 return index_of_encoding(rb_enc_get(obj));
510 }
511
512 int
508b43f @jballanc Implement a few more of the MRI encoding APIs
jballanc authored
513 rb_to_encoding_index(VALUE enc)
514 {
515 if (CLASS_OF(enc) != rb_cEncoding && TYPE(enc) != T_STRING) {
516 return -1;
517 }
518 else {
519 int idx = index_of_encoding(rb_enc_get(enc));
520 if (idx >= 0) {
521 return idx;
522 }
523 else if (NIL_P(enc = rb_check_string_type(enc))) {
524 return -1;
525 }
526 if (!rb_enc_asciicompat(rb_enc_get(enc))) {
527 return -1;
528 }
529 return rb_enc_find_index(StringValueCStr(enc));
530 }
531 }
532
533 int
0202977 implement some of the MRI encoding index APIs
Laurent Sansonetti authored
534 rb_enc_find_index(const char *name)
535 {
536 return index_of_encoding(rb_enc_find(name));
537 }
538
539 int
540 rb_ascii8bit_encindex(void)
541 {
542 return index_of_encoding(rb_encodings[ENCODING_BINARY]);
543 }
544
545 int
546 rb_utf8_encindex(void)
547 {
548 return index_of_encoding(rb_encodings[ENCODING_UTF8]);
549 }
550
551 int
552 rb_usascii_encindex(void)
553 {
554 return index_of_encoding(rb_encodings[ENCODING_ASCII]);
555 }
556
508b43f @jballanc Implement a few more of the MRI encoding APIs
jballanc authored
557 rb_encoding *
558 rb_enc_from_index(int idx)
559 {
560 assert(idx >= 0 && idx < ENCODINGS_COUNT);
561 return rb_encodings[idx];
562 }
Something went wrong with that request. Please try again.