Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 590 lines (522 sloc) 17.194 kb
96ab900 more work
Laurent Sansonetti authored
1 /*
2 * MacRuby implementation of Ruby 1.9 String.
3 *
4 * This file is covered by the Ruby license. See COPYING for more details.
5 *
6 * Copyright (C) 2007-2010, Apple Inc. All rights reserved.
7 * Copyright (C) 1993-2007 Yukihiro Matsumoto
8 * Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
9 * Copyright (C) 2000 Information-technology Promotion Agency, Japan
10 */
11
2b7d5d5 import vincent's work
Laurent Sansonetti authored
12 #include <string.h>
9c1d230 committing experimental branch content
Laurent Sansonetti authored
13
468a2ea Move Obj-C related headers around.
Thibault Martin-Lagardette authored
14 #include "ruby/macruby.h"
39b55f1 some work on string
Laurent Sansonetti authored
15 #include "ruby/encoding.h"
16 #include "encoding.h"
f738483 honor the original string encoding when generating substrings out of a r...
Laurent Sansonetti authored
17 #include "symbol.h"
39b55f1 some work on string
Laurent Sansonetti authored
18
96ab900 more work
Laurent Sansonetti authored
19 VALUE rb_cEncoding;
8b9745b define Encoding::ASCII_8BIT as a shortcut to US_ASCII (for now)
Laurent Sansonetti authored
20
d0ac593 @vincentisambart an (incomplete) implementation of String#encode
vincentisambart authored
21 rb_encoding_t *default_internal = NULL;
96ab900 more work
Laurent Sansonetti authored
22 static rb_encoding_t *default_external = NULL;
23 rb_encoding_t *rb_encodings[ENCODINGS_COUNT];
9c1d230 committing experimental branch content
Laurent Sansonetti authored
24
96ab900 more work
Laurent Sansonetti authored
25 static void str_undefined_update_flags(rb_str_t *self) { abort(); }
26 static void str_undefined_make_data_binary(rb_str_t *self) { abort(); }
27 static bool str_undefined_try_making_data_uchars(rb_str_t *self) { abort(); }
28 static long str_undefined_length(rb_str_t *self, bool ucs2_mode) { abort(); }
29 static long str_undefined_bytesize(rb_str_t *self) { abort(); }
30 static character_boundaries_t str_undefined_get_character_boundaries(rb_str_t *self, long index, bool ucs2_mode) { abort(); }
31 static long str_undefined_offset_in_bytes_to_index(rb_str_t *self, long offset_in_bytes, bool ucs2_mode) { abort(); }
d0ac593 @vincentisambart an (incomplete) implementation of String#encode
vincentisambart authored
32 static void str_undefined_transcode_to_utf16(struct rb_encoding *src_enc, rb_str_t *self, long *pos, UChar **utf16, long *utf16_length) { abort(); }
33 static void str_undefined_transcode_from_utf16(struct rb_encoding *dst_enc, UChar *utf16, long utf16_length, long *pos, char **bytes, long *bytes_length) { abort(); }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
34
35 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
36 mr_enc_s_list(VALUE klass, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
37 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
38 VALUE ary = rb_ary_new2(ENCODINGS_COUNT);
39 for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
96ab900 more work
Laurent Sansonetti authored
40 rb_ary_push(ary, (VALUE)rb_encodings[i]);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
41 }
2b7d5d5 import vincent's work
Laurent Sansonetti authored
42 return ary;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
43 }
44
45 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
46 mr_enc_s_name_list(VALUE klass, SEL sel)
47 {
48 VALUE ary = rb_ary_new();
49 for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
96ab900 more work
Laurent Sansonetti authored
50 rb_encoding_t *encoding = RENC(rb_encodings[i]);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
51 // TODO: use US-ASCII strings
96ab900 more work
Laurent Sansonetti authored
52 rb_ary_push(ary, rb_usascii_str_new2(encoding->public_name));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
53 for (unsigned int j = 0; j < encoding->aliases_count; ++j) {
96ab900 more work
Laurent Sansonetti authored
54 rb_ary_push(ary, rb_usascii_str_new2(encoding->aliases[j]));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
55 }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
56 }
57 return ary;
58 }
59
60 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
61 mr_enc_s_aliases(VALUE klass, SEL sel)
62 {
63 VALUE hash = rb_hash_new();
64 for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
96ab900 more work
Laurent Sansonetti authored
65 rb_encoding_t *encoding = RENC(rb_encodings[i]);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
66 for (unsigned int j = 0; j < encoding->aliases_count; ++j) {
96ab900 more work
Laurent Sansonetti authored
67 rb_hash_aset(hash, rb_usascii_str_new2(encoding->aliases[j]),
68 rb_usascii_str_new2(encoding->public_name));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
69 }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
70 }
2b7d5d5 import vincent's work
Laurent Sansonetti authored
71 return hash;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
72 }
73
74 static VALUE
4ede652 added #find
Laurent Sansonetti authored
75 mr_enc_s_find(VALUE klass, SEL sel, VALUE name)
76 {
77 StringValue(name);
78 rb_encoding_t *enc = rb_enc_find(RSTRING_PTR(name));
79 if (enc == NULL) {
80 rb_raise(rb_eArgError, "unknown encoding name - %s",
81 RSTRING_PTR(name));
82 }
83 return (VALUE)enc;
84 }
85
86 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
87 mr_enc_s_default_internal(VALUE klass, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
88 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
89 return (VALUE)default_internal;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
90 }
91
92 static VALUE
5051413 added #default_external=, #default_internal=
Laurent Sansonetti authored
93 mr_enc_set_default_internal(VALUE klass, SEL sel, VALUE enc)
94 {
95 default_internal = rb_to_encoding(enc);
96 return (VALUE)default_internal;
97 }
98
99 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
100 mr_enc_s_default_external(VALUE klass, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
101 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
102 return (VALUE)default_external;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
103 }
104
105 static VALUE
5051413 added #default_external=, #default_internal=
Laurent Sansonetti authored
106 mr_enc_set_default_external(VALUE klass, SEL sel, VALUE enc)
107 {
108 default_external = rb_to_encoding(enc);
109 return (VALUE)default_external;
110 }
111
112 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
113 mr_enc_name(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
114 {
96ab900 more work
Laurent Sansonetti authored
115 return rb_usascii_str_new2(RENC(self)->public_name);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
116 }
117
118 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
119 mr_enc_inspect(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
120 {
0382b34 indented code, better type checking, removed rb_cCFString, started addin...
Laurent Sansonetti authored
121 return rb_sprintf("#<%s:%s>", rb_obj_classname(self),
96ab900 more work
Laurent Sansonetti authored
122 RENC(self)->public_name);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
123 }
124
125 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
126 mr_enc_names(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
127 {
96ab900 more work
Laurent Sansonetti authored
128 rb_encoding_t *encoding = RENC(self);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
129
2b7d5d5 import vincent's work
Laurent Sansonetti authored
130 VALUE ary = rb_ary_new2(encoding->aliases_count + 1);
96ab900 more work
Laurent Sansonetti authored
131 rb_ary_push(ary, rb_usascii_str_new2(encoding->public_name));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
132 for (unsigned int i = 0; i < encoding->aliases_count; ++i) {
96ab900 more work
Laurent Sansonetti authored
133 rb_ary_push(ary, rb_usascii_str_new2(encoding->aliases[i]));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
134 }
135 return ary;
1623532 added Encoding#default_external= and Encoding#default_internal= which do...
Laurent Sansonetti authored
136 }
137
2b7d5d5 import vincent's work
Laurent Sansonetti authored
138 static VALUE
139 mr_enc_ascii_compatible_p(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
140 {
96ab900 more work
Laurent Sansonetti authored
141 return RENC(self)->ascii_compatible ? Qtrue : Qfalse;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
142 }
143
144 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
145 mr_enc_dummy_p(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
146 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
147 return Qfalse;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
148 }
149
ffe45d2 Add support for Encoding::Converter and move String#encode and String#en...
Patrick Thomson authored
150 // For UTF-[8, 16, 32] it's /uFFFD, and for others it's '?'
151 rb_str_t *replacement_string_for_encoding(rb_encoding_t* destination)
152 {
153 rb_str_t *replacement_str = NULL;
154 if (destination == rb_encodings[ENCODING_UTF16BE]) {
155 replacement_str = RSTR(rb_enc_str_new("\xFF\xFD", 2, destination));
156 }
157 else if (destination == rb_encodings[ENCODING_UTF32BE]) {
158 replacement_str = RSTR(rb_enc_str_new("\0\0\xFF\xFD", 4, destination));
159 }
160 else if (destination == rb_encodings[ENCODING_UTF16LE]) {
161 replacement_str = RSTR(rb_enc_str_new("\xFD\xFF", 2, destination));
162 }
163 else if (destination == rb_encodings[ENCODING_UTF32LE]) {
164 replacement_str = RSTR(rb_enc_str_new("\xFD\xFF\0\0", 4, destination));
165 }
166 else if (destination == rb_encodings[ENCODING_UTF8]) {
167 replacement_str = RSTR(rb_enc_str_new("\xEF\xBF\xBD", 3, destination));
168 }
169 else {
170 replacement_str = RSTR(rb_enc_str_new("?", 1, rb_encodings[ENCODING_ASCII]));
171 replacement_str = str_simple_transcode(replacement_str, destination);
172 }
173 return replacement_str;
174 }
175
2b7d5d5 import vincent's work
Laurent Sansonetti authored
176 static void
96ab900 more work
Laurent Sansonetti authored
177 define_encoding_constant(const char *name, rb_encoding_t *encoding)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
178 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
179 char c = name[0];
180 if ((c >= '0') && (c <= '9')) {
181 // constants can't start with a number
182 return;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
183 }
184
325c032 @vincentisambart also define the encoding constants in upper case
vincentisambart authored
185 if (strcmp(name, "locale") == 0) {
186 // there is no constant for locale
187 return;
188 }
189
2b7d5d5 import vincent's work
Laurent Sansonetti authored
190 char *name_copy = strdup(name);
191 if ((c >= 'a') && (c <= 'z')) {
192 // the first character must be upper case
193 name_copy[0] = c - ('a' - 'A');
194 }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
195
325c032 @vincentisambart also define the encoding constants in upper case
vincentisambart authored
196 bool has_lower_case = false;
2b7d5d5 import vincent's work
Laurent Sansonetti authored
197 // '.' and '-' must be transformed into '_'
198 for (int i = 0; name_copy[i]; ++i) {
199 if ((name_copy[i] == '.') || (name_copy[i] == '-')) {
200 name_copy[i] = '_';
023dd4d fixed Encoding#name for 10.6
Laurent Sansonetti authored
201 }
325c032 @vincentisambart also define the encoding constants in upper case
vincentisambart authored
202 else if ((name_copy[i] >= 'a') && (name_copy[i] <= 'z')) {
203 has_lower_case = true;
204 }
023dd4d fixed Encoding#name for 10.6
Laurent Sansonetti authored
205 }
b881853 s/MR//
Laurent Sansonetti authored
206 rb_define_const(rb_cEncoding, name_copy, (VALUE)encoding);
325c032 @vincentisambart also define the encoding constants in upper case
vincentisambart authored
207 // if the encoding name has lower case characters,
208 // also define it in upper case
209 if (has_lower_case) {
210 for (int i = 0; name_copy[i]; ++i) {
211 if ((name_copy[i] >= 'a') && (name_copy[i] <= 'z')) {
212 name_copy[i] = name_copy[i] - 'a' + 'A';
213 }
214 }
215 rb_define_const(rb_cEncoding, name_copy, (VALUE)encoding);
216 }
217
2b7d5d5 import vincent's work
Laurent Sansonetti authored
218 free(name_copy);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
219 }
220
96ab900 more work
Laurent Sansonetti authored
221 extern void enc_init_ucnv_encoding(rb_encoding_t *encoding);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
222
2b7d5d5 import vincent's work
Laurent Sansonetti authored
223 enum {
224 ENCODING_TYPE_SPECIAL = 0,
225 ENCODING_TYPE_UCNV
226 };
9c1d230 committing experimental branch content
Laurent Sansonetti authored
227
2b7d5d5 import vincent's work
Laurent Sansonetti authored
228 static void
229 add_encoding(
0382b34 indented code, better type checking, removed rb_cCFString, started addin...
Laurent Sansonetti authored
230 unsigned int encoding_index, // index of the encoding in the encodings
231 // array
96ab900 more work
Laurent Sansonetti authored
232 unsigned int rb_encoding_type,
2b7d5d5 import vincent's work
Laurent Sansonetti authored
233 const char *public_name, // public name for the encoding
234 unsigned char min_char_size,
0382b34 indented code, better type checking, removed rb_cCFString, started addin...
Laurent Sansonetti authored
235 bool single_byte_encoding, // in the encoding a character takes only
236 // one byte
2b7d5d5 import vincent's work
Laurent Sansonetti authored
237 bool ascii_compatible, // is the encoding ASCII compatible or not
0382b34 indented code, better type checking, removed rb_cCFString, started addin...
Laurent Sansonetti authored
238 ... // aliases for the encoding (should no include the public name)
239 // - must end with a NULL
2b7d5d5 import vincent's work
Laurent Sansonetti authored
240 )
241 {
242 assert(encoding_index < ENCODINGS_COUNT);
243
244 // create an array for the aliases
245 unsigned int aliases_count = 0;
246 va_list va_aliases;
247 va_start(va_aliases, ascii_compatible);
248 while (va_arg(va_aliases, const char *) != NULL) {
249 ++aliases_count;
250 }
251 va_end(va_aliases);
0382b34 indented code, better type checking, removed rb_cCFString, started addin...
Laurent Sansonetti authored
252 const char **aliases = (const char **)
253 malloc(sizeof(const char *) * aliases_count);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
254 va_start(va_aliases, ascii_compatible);
255 for (unsigned int i = 0; i < aliases_count; ++i) {
256 aliases[i] = va_arg(va_aliases, const char *);
257 }
258 va_end(va_aliases);
259
260 // create the MacRuby object
96ab900 more work
Laurent Sansonetti authored
261 NEWOBJ(encoding, rb_encoding_t);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
262 encoding->basic.flags = 0;
b881853 s/MR//
Laurent Sansonetti authored
263 encoding->basic.klass = rb_cEncoding;
96ab900 more work
Laurent Sansonetti authored
264 rb_encodings[encoding_index] = encoding;
265 GC_RETAIN(encoding); // it should never be deallocated
2b7d5d5 import vincent's work
Laurent Sansonetti authored
266
267 // fill the fields
268 encoding->index = encoding_index;
269 encoding->public_name = public_name;
270 encoding->min_char_size = min_char_size;
271 encoding->single_byte_encoding = single_byte_encoding;
272 encoding->ascii_compatible = ascii_compatible;
273 encoding->aliases_count = aliases_count;
274 encoding->aliases = aliases;
275
276 // fill the default implementations with aborts
277 encoding->methods.update_flags = str_undefined_update_flags;
278 encoding->methods.make_data_binary = str_undefined_make_data_binary;
0382b34 indented code, better type checking, removed rb_cCFString, started addin...
Laurent Sansonetti authored
279 encoding->methods.try_making_data_uchars =
280 str_undefined_try_making_data_uchars;
2b7d5d5 import vincent's work
Laurent Sansonetti authored
281 encoding->methods.length = str_undefined_length;
282 encoding->methods.bytesize = str_undefined_bytesize;
0382b34 indented code, better type checking, removed rb_cCFString, started addin...
Laurent Sansonetti authored
283 encoding->methods.get_character_boundaries =
284 str_undefined_get_character_boundaries;
285 encoding->methods.offset_in_bytes_to_index =
286 str_undefined_offset_in_bytes_to_index;
d0ac593 @vincentisambart an (incomplete) implementation of String#encode
vincentisambart authored
287 encoding->methods.transcode_to_utf16 =
288 str_undefined_transcode_to_utf16;
289 encoding->methods.transcode_from_utf16 =
290 str_undefined_transcode_from_utf16;
2b7d5d5 import vincent's work
Laurent Sansonetti authored
291
96ab900 more work
Laurent Sansonetti authored
292 switch (rb_encoding_type) {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
293 case ENCODING_TYPE_SPECIAL:
022cd7c fixed ByteString#encoding to always return US_ASCII (for now)
Laurent Sansonetti authored
294 break;
2b7d5d5 import vincent's work
Laurent Sansonetti authored
295 case ENCODING_TYPE_UCNV:
296 enc_init_ucnv_encoding(encoding);
297 break;
298 default:
299 abort();
9c1d230 committing experimental branch content
Laurent Sansonetti authored
300 }
301 }
302
ae4da82 more work
Laurent Sansonetti authored
303 // This Init function is called very early. Do not use any runtime method
304 // because things may not be initialized properly yet.
305 void
306 Init_PreEncoding(void)
2b7d5d5 import vincent's work
Laurent Sansonetti authored
307 {
0f75583 @vincentisambart added the encodings used in the specs to be able to remove tags
vincentisambart authored
308 add_encoding(ENCODING_BINARY, ENCODING_TYPE_SPECIAL, "ASCII-8BIT", 1, true, true, "BINARY", NULL);
309 add_encoding(ENCODING_ASCII, ENCODING_TYPE_UCNV, "US-ASCII", 1, true, true, "ASCII", "ANSI_X3.4-1968", "646", NULL);
4e2db64 Improves core/env pass rate: add 'locale' as an alias of UTF-8
Thibault Martin-Lagardette authored
310 add_encoding(ENCODING_UTF8, ENCODING_TYPE_UCNV, "UTF-8", 1, false, true, "CP65001", "locale", NULL);
0f75583 @vincentisambart added the encodings used in the specs to be able to remove tags
vincentisambart authored
311 add_encoding(ENCODING_UTF16BE, ENCODING_TYPE_UCNV, "UTF-16BE", 2, false, false, NULL);
312 add_encoding(ENCODING_UTF16LE, ENCODING_TYPE_UCNV, "UTF-16LE", 2, false, false, NULL);
313 add_encoding(ENCODING_UTF32BE, ENCODING_TYPE_UCNV, "UTF-32BE", 4, false, false, "UCS-4BE", NULL);
314 add_encoding(ENCODING_UTF32LE, ENCODING_TYPE_UCNV, "UTF-32LE", 4, false, false, "UCS-4LE", NULL);
315 add_encoding(ENCODING_ISO8859_1, ENCODING_TYPE_UCNV, "ISO-8859-1", 1, true, true, "ISO8859-1", NULL);
316 add_encoding(ENCODING_MACROMAN, ENCODING_TYPE_UCNV, "macRoman", 1, true, true, NULL);
317 add_encoding(ENCODING_MACCYRILLIC, ENCODING_TYPE_UCNV, "macCyrillic", 1, true, true, NULL);
318 add_encoding(ENCODING_BIG5, ENCODING_TYPE_UCNV, "Big5", 1, false, true, "CP950", NULL);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
319 // FIXME: the ICU conversion tables do not seem to match Ruby's Japanese conversion tables
0f75583 @vincentisambart added the encodings used in the specs to be able to remove tags
vincentisambart authored
320 add_encoding(ENCODING_EUCJP, ENCODING_TYPE_UCNV, "EUC-JP", 1, false, true, "eucJP", NULL);
ffe45d2 Add support for Encoding::Converter and move String#encode and String#en...
Patrick Thomson authored
321 add_encoding(ENCODING_SJIS, ENCODING_TYPE_UCNV, "Shift_JIS", 1, false, true, "SJIS", NULL);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
322 //add_encoding(ENCODING_EUCJP, ENCODING_TYPE_RUBY, "EUC-JP", 1, false, true, "eucJP", NULL);
323 //add_encoding(ENCODING_SJIS, ENCODING_TYPE_RUBY, "Shift_JIS", 1, false, true, "SJIS", NULL);
324 //add_encoding(ENCODING_CP932, ENCODING_TYPE_RUBY, "Windows-31J", 1, false, true, "CP932", "csWindows31J", NULL);
325
96ab900 more work
Laurent Sansonetti authored
326 default_external = rb_encodings[ENCODING_UTF8];
327 default_internal = rb_encodings[ENCODING_UTF8];
9c1d230 committing experimental branch content
Laurent Sansonetti authored
328 }
329
2b7d5d5 import vincent's work
Laurent Sansonetti authored
330 void
b881853 s/MR//
Laurent Sansonetti authored
331 Init_Encoding(void)
2b7d5d5 import vincent's work
Laurent Sansonetti authored
332 {
ae4da82 more work
Laurent Sansonetti authored
333 // rb_cEncoding is defined earlier in Init_PreVM().
334 rb_set_class_path(rb_cEncoding, rb_cObject, "Encoding");
335 rb_const_set(rb_cObject, rb_intern("Encoding"), rb_cEncoding);
336
b881853 s/MR//
Laurent Sansonetti authored
337 rb_undef_alloc_func(rb_cEncoding);
338
339 rb_objc_define_method(rb_cEncoding, "to_s", mr_enc_name, 0);
340 rb_objc_define_method(rb_cEncoding, "inspect", mr_enc_inspect, 0);
341 rb_objc_define_method(rb_cEncoding, "name", mr_enc_name, 0);
342 rb_objc_define_method(rb_cEncoding, "names", mr_enc_names, 0);
343 rb_objc_define_method(rb_cEncoding, "dummy?", mr_enc_dummy_p, 0);
0382b34 indented code, better type checking, removed rb_cCFString, started addin...
Laurent Sansonetti authored
344 rb_objc_define_method(rb_cEncoding, "ascii_compatible?",
345 mr_enc_ascii_compatible_p, 0);
4ede652 added #find
Laurent Sansonetti authored
346 rb_objc_define_method(*(VALUE *)rb_cEncoding, "list", mr_enc_s_list, 0);
347 rb_objc_define_method(*(VALUE *)rb_cEncoding, "name_list",
0382b34 indented code, better type checking, removed rb_cCFString, started addin...
Laurent Sansonetti authored
348 mr_enc_s_name_list, 0);
4ede652 added #find
Laurent Sansonetti authored
349 rb_objc_define_method(*(VALUE *)rb_cEncoding, "aliases",
0382b34 indented code, better type checking, removed rb_cCFString, started addin...
Laurent Sansonetti authored
350 mr_enc_s_aliases, 0);
4ede652 added #find
Laurent Sansonetti authored
351 rb_objc_define_method(*(VALUE *)rb_cEncoding, "find", mr_enc_s_find, 1);
352 rb_objc_define_method(*(VALUE *)rb_cEncoding, "compatible?",
39b55f1 some work on string
Laurent Sansonetti authored
353 mr_enc_s_is_compatible, 2); // in string.c
2b7d5d5 import vincent's work
Laurent Sansonetti authored
354
355 //rb_define_method(rb_cEncoding, "_dump", enc_dump, -1);
356 //rb_define_singleton_method(rb_cEncoding, "_load", enc_load, 1);
357
4ede652 added #find
Laurent Sansonetti authored
358 rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_external",
0382b34 indented code, better type checking, removed rb_cCFString, started addin...
Laurent Sansonetti authored
359 mr_enc_s_default_external, 0);
5051413 added #default_external=, #default_internal=
Laurent Sansonetti authored
360 rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_external=",
361 mr_enc_set_default_external, 1);
4ede652 added #find
Laurent Sansonetti authored
362 rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_internal",
0382b34 indented code, better type checking, removed rb_cCFString, started addin...
Laurent Sansonetti authored
363 mr_enc_s_default_internal, 0);
5051413 added #default_external=, #default_internal=
Laurent Sansonetti authored
364 rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_internal=",
365 mr_enc_set_default_internal, 1);
b881853 s/MR//
Laurent Sansonetti authored
366 //rb_define_singleton_method(rb_cEncoding, "locale_charmap", rb_locale_charmap, 0);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
367
ae4da82 more work
Laurent Sansonetti authored
368 // Create constants.
369 for (unsigned int i = 0; i < ENCODINGS_COUNT; i++) {
370 rb_encoding_t *enc = rb_encodings[i];
371 define_encoding_constant(enc->public_name, enc);
372 for (unsigned int j = 0; j < enc->aliases_count; j++) {
373 define_encoding_constant(enc->aliases[j], enc);
374 }
375 }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
376 }
96ab900 more work
Laurent Sansonetti authored
377
378 // MRI C-API compatibility.
379
380 rb_encoding_t *
381 rb_enc_find(const char *name)
382 {
383 for (unsigned int i = 0; i < ENCODINGS_COUNT; i++) {
384 rb_encoding_t *enc = rb_encodings[i];
385 if (strcasecmp(enc->public_name, name) == 0) {
386 return enc;
387 }
388 for (unsigned int j = 0; j < enc->aliases_count; j++) {
389 const char *alias = enc->aliases[j];
390 if (strcasecmp(alias, name) == 0) {
391 return enc;
392 }
393 }
394 }
395 return NULL;
396 }
397
398 VALUE
399 rb_enc_from_encoding(rb_encoding_t *enc)
400 {
401 return (VALUE)enc;
402 }
403
404 rb_encoding_t *
405 rb_enc_get(VALUE obj)
406 {
f738483 honor the original string encoding when generating substrings out of a r...
Laurent Sansonetti authored
407 switch (TYPE(obj)) {
408 case T_STRING:
409 if (IS_RSTR(obj)) {
410 return RSTR(obj)->encoding;
411 }
412 return rb_encodings[ENCODING_UTF8];
413
414 case T_SYMBOL:
415 return rb_enc_get(rb_sym_str(obj));
96ab900 more work
Laurent Sansonetti authored
416 }
417 return NULL;
418 }
419
420 rb_encoding_t *
421 rb_to_encoding(VALUE obj)
422 {
423 rb_encoding_t *enc;
424 if (CLASS_OF(obj) == rb_cEncoding) {
425 enc = RENC(obj);
426 }
427 else {
428 StringValue(obj);
429 enc = rb_enc_find(RSTRING_PTR(obj));
430 if (enc == NULL) {
431 rb_raise(rb_eArgError, "unknown encoding name - %s",
432 RSTRING_PTR(obj));
433 }
434 }
435 return enc;
436 }
437
438 const char *
439 rb_enc_name(rb_encoding_t *enc)
440 {
441 return RENC(enc)->public_name;
442 }
443
444 VALUE
445 rb_enc_name2(rb_encoding_t *enc)
446 {
447 return rb_usascii_str_new2(rb_enc_name(enc));
448 }
449
450 long
451 rb_enc_mbminlen(rb_encoding_t *enc)
452 {
453 return enc->min_char_size;
454 }
455
456 long
457 rb_enc_mbmaxlen(rb_encoding_t *enc)
458 {
459 return enc->single_byte_encoding ? 1 : 10; // XXX 10?
460 }
461
4cd5f5e added missing MRI methods
Laurent Sansonetti authored
462 rb_encoding *
463 rb_ascii8bit_encoding(void)
464 {
465 return rb_encodings[ENCODING_BINARY];
466 }
467
468 rb_encoding *
469 rb_utf8_encoding(void)
470 {
471 return rb_encodings[ENCODING_UTF8];
472 }
473
474 rb_encoding *
475 rb_usascii_encoding(void)
476 {
477 return rb_encodings[ENCODING_ASCII];
478 }
479
96ab900 more work
Laurent Sansonetti authored
480 rb_encoding_t *
481 rb_locale_encoding(void)
482 {
483 // XXX
484 return rb_encodings[ENCODING_UTF8];
485 }
486
487 void
488 rb_enc_set_default_external(VALUE encoding)
489 {
490 assert(CLASS_OF(encoding) == rb_cEncoding);
491 default_external = RENC(encoding);
492 }
493
508b43f @jballanc Implement a few more of the MRI encoding APIs
jballanc authored
494 rb_encoding *
495 rb_default_internal_encoding(void)
496 {
497 return (rb_encoding *)default_internal;
498 }
499
0202977 implement some of the MRI encoding index APIs
Laurent Sansonetti authored
500 static int
501 index_of_encoding(rb_encoding_t *enc)
502 {
503 if (enc != NULL) {
504 for (int i = 0; i <ENCODINGS_COUNT; i++) {
505 if (rb_encodings[i] == enc) {
506 return i;
507 }
508 }
509 }
510 return -1;
511 }
512
513 int
514 rb_enc_get_index(VALUE obj)
515 {
516 return index_of_encoding(rb_enc_get(obj));
517 }
518
ee2152e Better C implementation for Iconv
Thibault Martin-Lagardette authored
519 void
520 rb_enc_set_index(VALUE obj, int encindex)
521 {
522 if (encindex < ENCODINGS_COUNT) {
523 return ;
524 }
525 rb_str_force_encoding(obj, rb_encodings[encindex]);
526 }
527
0202977 implement some of the MRI encoding index APIs
Laurent Sansonetti authored
528 int
508b43f @jballanc Implement a few more of the MRI encoding APIs
jballanc authored
529 rb_to_encoding_index(VALUE enc)
530 {
531 if (CLASS_OF(enc) != rb_cEncoding && TYPE(enc) != T_STRING) {
532 return -1;
533 }
534 else {
a0faa60 @jballanc Cast, don't get; would be good to export this too
jballanc authored
535 int idx = index_of_encoding((rb_encoding_t *)enc);
508b43f @jballanc Implement a few more of the MRI encoding APIs
jballanc authored
536 if (idx >= 0) {
537 return idx;
538 }
539 else if (NIL_P(enc = rb_check_string_type(enc))) {
540 return -1;
541 }
542 if (!rb_enc_asciicompat(rb_enc_get(enc))) {
543 return -1;
544 }
545 return rb_enc_find_index(StringValueCStr(enc));
546 }
547 }
548
549 int
0202977 implement some of the MRI encoding index APIs
Laurent Sansonetti authored
550 rb_enc_find_index(const char *name)
551 {
552 return index_of_encoding(rb_enc_find(name));
553 }
554
555 int
556 rb_ascii8bit_encindex(void)
557 {
558 return index_of_encoding(rb_encodings[ENCODING_BINARY]);
559 }
560
561 int
562 rb_utf8_encindex(void)
563 {
564 return index_of_encoding(rb_encodings[ENCODING_UTF8]);
565 }
566
567 int
568 rb_usascii_encindex(void)
569 {
570 return index_of_encoding(rb_encodings[ENCODING_ASCII]);
571 }
572
508b43f @jballanc Implement a few more of the MRI encoding APIs
jballanc authored
573 rb_encoding *
574 rb_enc_from_index(int idx)
575 {
576 assert(idx >= 0 && idx < ENCODINGS_COUNT);
577 return rb_encodings[idx];
578 }
583e433 adding 2 more frightening MRI methods
Laurent Sansonetti authored
579
580 VALUE
581 rb_enc_associate_index(VALUE obj, int idx)
582 {
583 if (TYPE(obj) == T_STRING) {
584 assert(idx >= 0 && idx < ENCODINGS_COUNT);
585 rb_str_force_encoding(obj, rb_encodings[idx]);
586 return obj;
587 }
588 rb_raise(rb_eArgError, "cannot set encoding on non-string object");
589 }
Something went wrong with that request. Please try again.