Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 580 lines (514 sloc) 17.452 kb
96ab900 more work
Laurent Sansonetti authored
1 /*
2 * MacRuby implementation of Ruby 1.9 String.
3 *
4 * This file is covered by the Ruby license. See COPYING for more details.
5 *
9595725 update copyrights to 2011
Laurent Sansonetti authored
6 * Copyright (C) 2007-2011, Apple Inc. All rights reserved.
96ab900 more work
Laurent Sansonetti authored
7 * Copyright (C) 1993-2007 Yukihiro Matsumoto
8 * Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
9 * Copyright (C) 2000 Information-technology Promotion Agency, Japan
10 */
11
2b7d5d5 import vincent's work
Laurent Sansonetti authored
12 #include <string.h>
9c1d230 committing experimental branch content
Laurent Sansonetti authored
13
d0898dd include/ruby/macruby.h -> macruby_internal.h
Laurent Sansonetti authored
14 #include "macruby_internal.h"
39b55f1 some work on string
Laurent Sansonetti authored
15 #include "ruby/encoding.h"
16 #include "encoding.h"
f738483 honor the original string encoding when generating substrings out of …
Laurent Sansonetti authored
17 #include "symbol.h"
39b55f1 some work on string
Laurent Sansonetti authored
18
96ab900 more work
Laurent Sansonetti authored
19 VALUE rb_cEncoding;
8b9745b define Encoding::ASCII_8BIT as a shortcut to US_ASCII (for now)
Laurent Sansonetti authored
20
d0ac593 @vincentisambart an (incomplete) implementation of String#encode
vincentisambart authored
21 rb_encoding_t *default_internal = NULL;
96ab900 more work
Laurent Sansonetti authored
22 static rb_encoding_t *default_external = NULL;
23 rb_encoding_t *rb_encodings[ENCODINGS_COUNT];
9c1d230 committing experimental branch content
Laurent Sansonetti authored
24
25 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
26 mr_enc_s_list(VALUE klass, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
27 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
28 VALUE ary = rb_ary_new2(ENCODINGS_COUNT);
29 for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
96ab900 more work
Laurent Sansonetti authored
30 rb_ary_push(ary, (VALUE)rb_encodings[i]);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
31 }
2b7d5d5 import vincent's work
Laurent Sansonetti authored
32 return ary;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
33 }
34
35 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
36 mr_enc_s_name_list(VALUE klass, SEL sel)
37 {
38 VALUE ary = rb_ary_new();
39 for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
96ab900 more work
Laurent Sansonetti authored
40 rb_encoding_t *encoding = RENC(rb_encodings[i]);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
41 // TODO: use US-ASCII strings
96ab900 more work
Laurent Sansonetti authored
42 rb_ary_push(ary, rb_usascii_str_new2(encoding->public_name));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
43 for (unsigned int j = 0; j < encoding->aliases_count; ++j) {
96ab900 more work
Laurent Sansonetti authored
44 rb_ary_push(ary, rb_usascii_str_new2(encoding->aliases[j]));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
45 }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
46 }
47 return ary;
48 }
49
50 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
51 mr_enc_s_aliases(VALUE klass, SEL sel)
52 {
53 VALUE hash = rb_hash_new();
54 for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
96ab900 more work
Laurent Sansonetti authored
55 rb_encoding_t *encoding = RENC(rb_encodings[i]);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
56 for (unsigned int j = 0; j < encoding->aliases_count; ++j) {
96ab900 more work
Laurent Sansonetti authored
57 rb_hash_aset(hash, rb_usascii_str_new2(encoding->aliases[j]),
58 rb_usascii_str_new2(encoding->public_name));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
59 }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
60 }
2b7d5d5 import vincent's work
Laurent Sansonetti authored
61 return hash;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
62 }
63
64 static VALUE
4ede652 added #find
Laurent Sansonetti authored
65 mr_enc_s_find(VALUE klass, SEL sel, VALUE name)
66 {
67 StringValue(name);
68 rb_encoding_t *enc = rb_enc_find(RSTRING_PTR(name));
69 if (enc == NULL) {
70 rb_raise(rb_eArgError, "unknown encoding name - %s",
71 RSTRING_PTR(name));
72 }
73 return (VALUE)enc;
74 }
75
76 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
77 mr_enc_s_default_internal(VALUE klass, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
78 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
79 return (VALUE)default_internal;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
80 }
81
82 static VALUE
5051413 added #default_external=, #default_internal=
Laurent Sansonetti authored
83 mr_enc_set_default_internal(VALUE klass, SEL sel, VALUE enc)
84 {
85 default_internal = rb_to_encoding(enc);
86 return (VALUE)default_internal;
87 }
88
89 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
90 mr_enc_s_default_external(VALUE klass, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
91 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
92 return (VALUE)default_external;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
93 }
94
95 static VALUE
5051413 added #default_external=, #default_internal=
Laurent Sansonetti authored
96 mr_enc_set_default_external(VALUE klass, SEL sel, VALUE enc)
97 {
98 default_external = rb_to_encoding(enc);
99 return (VALUE)default_external;
100 }
101
102 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
103 mr_enc_name(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
104 {
96ab900 more work
Laurent Sansonetti authored
105 return rb_usascii_str_new2(RENC(self)->public_name);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
106 }
107
108 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
109 mr_enc_inspect(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
110 {
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
111 return rb_sprintf("#<%s:%s>", rb_obj_classname(self),
96ab900 more work
Laurent Sansonetti authored
112 RENC(self)->public_name);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
113 }
114
115 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
116 mr_enc_names(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
117 {
96ab900 more work
Laurent Sansonetti authored
118 rb_encoding_t *encoding = RENC(self);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
119
2b7d5d5 import vincent's work
Laurent Sansonetti authored
120 VALUE ary = rb_ary_new2(encoding->aliases_count + 1);
96ab900 more work
Laurent Sansonetti authored
121 rb_ary_push(ary, rb_usascii_str_new2(encoding->public_name));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
122 for (unsigned int i = 0; i < encoding->aliases_count; ++i) {
96ab900 more work
Laurent Sansonetti authored
123 rb_ary_push(ary, rb_usascii_str_new2(encoding->aliases[i]));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
124 }
125 return ary;
1623532 added Encoding#default_external= and Encoding#default_internal= which…
Laurent Sansonetti authored
126 }
127
2b7d5d5 import vincent's work
Laurent Sansonetti authored
128 static VALUE
129 mr_enc_ascii_compatible_p(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
130 {
96ab900 more work
Laurent Sansonetti authored
131 return RENC(self)->ascii_compatible ? Qtrue : Qfalse;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
132 }
133
134 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
135 mr_enc_dummy_p(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
136 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
137 return Qfalse;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
138 }
139
ffe45d2 Add support for Encoding::Converter and move String#encode and String…
Patrick Thomson authored
140 // For UTF-[8, 16, 32] it's /uFFFD, and for others it's '?'
141 rb_str_t *replacement_string_for_encoding(rb_encoding_t* destination)
142 {
143 rb_str_t *replacement_str = NULL;
144 if (destination == rb_encodings[ENCODING_UTF16BE]) {
145 replacement_str = RSTR(rb_enc_str_new("\xFF\xFD", 2, destination));
146 }
147 else if (destination == rb_encodings[ENCODING_UTF32BE]) {
148 replacement_str = RSTR(rb_enc_str_new("\0\0\xFF\xFD", 4, destination));
149 }
150 else if (destination == rb_encodings[ENCODING_UTF16LE]) {
151 replacement_str = RSTR(rb_enc_str_new("\xFD\xFF", 2, destination));
152 }
153 else if (destination == rb_encodings[ENCODING_UTF32LE]) {
154 replacement_str = RSTR(rb_enc_str_new("\xFD\xFF\0\0", 4, destination));
155 }
156 else if (destination == rb_encodings[ENCODING_UTF8]) {
157 replacement_str = RSTR(rb_enc_str_new("\xEF\xBF\xBD", 3, destination));
158 }
159 else {
160 replacement_str = RSTR(rb_enc_str_new("?", 1, rb_encodings[ENCODING_ASCII]));
161 replacement_str = str_simple_transcode(replacement_str, destination);
162 }
163 return replacement_str;
164 }
165
2b7d5d5 import vincent's work
Laurent Sansonetti authored
166 static void
96ab900 more work
Laurent Sansonetti authored
167 define_encoding_constant(const char *name, rb_encoding_t *encoding)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
168 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
169 char c = name[0];
170 if ((c >= '0') && (c <= '9')) {
171 // constants can't start with a number
172 return;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
173 }
174
325c032 @vincentisambart also define the encoding constants in upper case
vincentisambart authored
175 if (strcmp(name, "locale") == 0) {
176 // there is no constant for locale
177 return;
178 }
179
2b7d5d5 import vincent's work
Laurent Sansonetti authored
180 char *name_copy = strdup(name);
181 if ((c >= 'a') && (c <= 'z')) {
182 // the first character must be upper case
183 name_copy[0] = c - ('a' - 'A');
184 }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
185
325c032 @vincentisambart also define the encoding constants in upper case
vincentisambart authored
186 bool has_lower_case = false;
2b7d5d5 import vincent's work
Laurent Sansonetti authored
187 // '.' and '-' must be transformed into '_'
188 for (int i = 0; name_copy[i]; ++i) {
189 if ((name_copy[i] == '.') || (name_copy[i] == '-')) {
190 name_copy[i] = '_';
023dd4d fixed Encoding#name for 10.6
Laurent Sansonetti authored
191 }
325c032 @vincentisambart also define the encoding constants in upper case
vincentisambart authored
192 else if ((name_copy[i] >= 'a') && (name_copy[i] <= 'z')) {
193 has_lower_case = true;
194 }
023dd4d fixed Encoding#name for 10.6
Laurent Sansonetti authored
195 }
b881853 s/MR//
Laurent Sansonetti authored
196 rb_define_const(rb_cEncoding, name_copy, (VALUE)encoding);
325c032 @vincentisambart also define the encoding constants in upper case
vincentisambart authored
197 // if the encoding name has lower case characters,
198 // also define it in upper case
199 if (has_lower_case) {
200 for (int i = 0; name_copy[i]; ++i) {
201 if ((name_copy[i] >= 'a') && (name_copy[i] <= 'z')) {
202 name_copy[i] = name_copy[i] - 'a' + 'A';
203 }
204 }
205 rb_define_const(rb_cEncoding, name_copy, (VALUE)encoding);
206 }
207
2b7d5d5 import vincent's work
Laurent Sansonetti authored
208 free(name_copy);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
209 }
210
96ab900 more work
Laurent Sansonetti authored
211 extern void enc_init_ucnv_encoding(rb_encoding_t *encoding);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
212
2b7d5d5 import vincent's work
Laurent Sansonetti authored
213 enum {
214 ENCODING_TYPE_SPECIAL = 0,
215 ENCODING_TYPE_UCNV
216 };
9c1d230 committing experimental branch content
Laurent Sansonetti authored
217
2b7d5d5 import vincent's work
Laurent Sansonetti authored
218 static void
219 add_encoding(
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
220 unsigned int encoding_index, // index of the encoding in the encodings
221 // array
96ab900 more work
Laurent Sansonetti authored
222 unsigned int rb_encoding_type,
2b7d5d5 import vincent's work
Laurent Sansonetti authored
223 const char *public_name, // public name for the encoding
224 unsigned char min_char_size,
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
225 bool single_byte_encoding, // in the encoding a character takes only
226 // one byte
2b7d5d5 import vincent's work
Laurent Sansonetti authored
227 bool ascii_compatible, // is the encoding ASCII compatible or not
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored
228 bool little_endian, // for UTF-16/32, if the encoding is little endian
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
229 ... // aliases for the encoding (should no include the public name)
230 // - must end with a NULL
2b7d5d5 import vincent's work
Laurent Sansonetti authored
231 )
232 {
233 assert(encoding_index < ENCODINGS_COUNT);
234
235 // create an array for the aliases
236 unsigned int aliases_count = 0;
237 va_list va_aliases;
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored
238 va_start(va_aliases, little_endian);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
239 while (va_arg(va_aliases, const char *) != NULL) {
240 ++aliases_count;
241 }
242 va_end(va_aliases);
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
243 const char **aliases = (const char **)
244 malloc(sizeof(const char *) * aliases_count);
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored
245 va_start(va_aliases, little_endian);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
246 for (unsigned int i = 0; i < aliases_count; ++i) {
247 aliases[i] = va_arg(va_aliases, const char *);
248 }
249 va_end(va_aliases);
250
251 // create the MacRuby object
96ab900 more work
Laurent Sansonetti authored
252 NEWOBJ(encoding, rb_encoding_t);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
253 encoding->basic.flags = 0;
b881853 s/MR//
Laurent Sansonetti authored
254 encoding->basic.klass = rb_cEncoding;
96ab900 more work
Laurent Sansonetti authored
255 rb_encodings[encoding_index] = encoding;
256 GC_RETAIN(encoding); // it should never be deallocated
2b7d5d5 import vincent's work
Laurent Sansonetti authored
257
258 // fill the fields
259 encoding->index = encoding_index;
260 encoding->public_name = public_name;
261 encoding->min_char_size = min_char_size;
262 encoding->single_byte_encoding = single_byte_encoding;
263 encoding->ascii_compatible = ascii_compatible;
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored
264 encoding->little_endian = little_endian;
2b7d5d5 import vincent's work
Laurent Sansonetti authored
265 encoding->aliases_count = aliases_count;
266 encoding->aliases = aliases;
267
96ab900 more work
Laurent Sansonetti authored
268 switch (rb_encoding_type) {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
269 case ENCODING_TYPE_SPECIAL:
022cd7c fixed ByteString#encoding to always return US_ASCII (for now)
Laurent Sansonetti authored
270 break;
2b7d5d5 import vincent's work
Laurent Sansonetti authored
271 case ENCODING_TYPE_UCNV:
272 enc_init_ucnv_encoding(encoding);
273 break;
274 default:
275 abort();
9c1d230 committing experimental branch content
Laurent Sansonetti authored
276 }
277 }
278
ae4da82 more work
Laurent Sansonetti authored
279 // This Init function is called very early. Do not use any runtime method
280 // because things may not be initialized properly yet.
281 void
282 Init_PreEncoding(void)
2b7d5d5 import vincent's work
Laurent Sansonetti authored
283 {
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored
284 add_encoding(ENCODING_BINARY, ENCODING_TYPE_SPECIAL, "ASCII-8BIT", 1, true, true, false, "BINARY", NULL);
285 add_encoding(ENCODING_ASCII, ENCODING_TYPE_UCNV, "US-ASCII", 1, true, true, false, "ASCII", "ANSI_X3.4-1968", "646", NULL);
286 add_encoding(ENCODING_UTF8, ENCODING_TYPE_UCNV, "UTF-8", 1, false, true, false, "CP65001", "locale", NULL);
287 add_encoding(ENCODING_UTF16BE, ENCODING_TYPE_UCNV, "UTF-16BE", 2, false, false, false, NULL);
288 add_encoding(ENCODING_UTF16LE, ENCODING_TYPE_UCNV, "UTF-16LE", 2, false, false, true, NULL);
289 add_encoding(ENCODING_UTF32BE, ENCODING_TYPE_UCNV, "UTF-32BE", 4, false, false, false, "UCS-4BE", NULL);
290 add_encoding(ENCODING_UTF32LE, ENCODING_TYPE_UCNV, "UTF-32LE", 4, false, false, true, "UCS-4LE", NULL);
291 add_encoding(ENCODING_ISO8859_1, ENCODING_TYPE_UCNV, "ISO-8859-1", 1, true, true, false, "ISO8859-1", NULL);
311371a @vincentisambart added all the ISO-8859 encodings left as some of them are used in rub…
vincentisambart authored
292 add_encoding(ENCODING_ISO8859_2, ENCODING_TYPE_UCNV, "ISO-8859-2", 1, true, true, false, "ISO8859-2", NULL);
293 add_encoding(ENCODING_ISO8859_3, ENCODING_TYPE_UCNV, "ISO-8859-3", 1, true, true, false, "ISO8859-3", NULL);
294 add_encoding(ENCODING_ISO8859_4, ENCODING_TYPE_UCNV, "ISO-8859-4", 1, true, true, false, "ISO8859-4", NULL);
295 add_encoding(ENCODING_ISO8859_5, ENCODING_TYPE_UCNV, "ISO-8859-5", 1, true, true, false, "ISO8859-5", NULL);
296 add_encoding(ENCODING_ISO8859_6, ENCODING_TYPE_UCNV, "ISO-8859-6", 1, true, true, false, "ISO8859-6", NULL);
297 add_encoding(ENCODING_ISO8859_7, ENCODING_TYPE_UCNV, "ISO-8859-7", 1, true, true, false, "ISO8859-7", NULL);
298 add_encoding(ENCODING_ISO8859_8, ENCODING_TYPE_UCNV, "ISO-8859-8", 1, true, true, false, "ISO8859-8", NULL);
299 add_encoding(ENCODING_ISO8859_9, ENCODING_TYPE_UCNV, "ISO-8859-9", 1, true, true, false, "ISO8859-9", NULL);
300 add_encoding(ENCODING_ISO8859_10, ENCODING_TYPE_UCNV, "ISO-8859-10", 1, true, true, false, "ISO8859-10", NULL);
301 add_encoding(ENCODING_ISO8859_11, ENCODING_TYPE_UCNV, "ISO-8859-11", 1, true, true, false, "ISO8859-11", NULL);
302 add_encoding(ENCODING_ISO8859_13, ENCODING_TYPE_UCNV, "ISO-8859-13", 1, true, true, false, "ISO8859-13", NULL);
303 add_encoding(ENCODING_ISO8859_14, ENCODING_TYPE_UCNV, "ISO-8859-14", 1, true, true, false, "ISO8859-14", NULL);
304 add_encoding(ENCODING_ISO8859_15, ENCODING_TYPE_UCNV, "ISO-8859-15", 1, true, true, false, "ISO8859-15", NULL);
305 add_encoding(ENCODING_ISO8859_16, ENCODING_TYPE_UCNV, "ISO-8859-16", 1, true, true, false, "ISO8859-16", NULL);
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored
306 add_encoding(ENCODING_MACROMAN, ENCODING_TYPE_UCNV, "macRoman", 1, true, true, false, NULL);
307 add_encoding(ENCODING_MACCYRILLIC, ENCODING_TYPE_UCNV, "macCyrillic", 1, true, true, false, NULL);
308 add_encoding(ENCODING_BIG5, ENCODING_TYPE_UCNV, "Big5", 1, false, true, false, "CP950", NULL);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
309 // FIXME: the ICU conversion tables do not seem to match Ruby's Japanese conversion tables
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored
310 add_encoding(ENCODING_EUCJP, ENCODING_TYPE_UCNV, "EUC-JP", 1, false, true, false, "eucJP", NULL);
311 add_encoding(ENCODING_SJIS, ENCODING_TYPE_UCNV, "Shift_JIS", 1, false, true, false, "SJIS", NULL);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
312 //add_encoding(ENCODING_EUCJP, ENCODING_TYPE_RUBY, "EUC-JP", 1, false, true, "eucJP", NULL);
313 //add_encoding(ENCODING_SJIS, ENCODING_TYPE_RUBY, "Shift_JIS", 1, false, true, "SJIS", NULL);
314 //add_encoding(ENCODING_CP932, ENCODING_TYPE_RUBY, "Windows-31J", 1, false, true, "CP932", "csWindows31J", NULL);
315
96ab900 more work
Laurent Sansonetti authored
316 default_external = rb_encodings[ENCODING_UTF8];
317 default_internal = rb_encodings[ENCODING_UTF8];
9c1d230 committing experimental branch content
Laurent Sansonetti authored
318 }
319
2b7d5d5 import vincent's work
Laurent Sansonetti authored
320 void
b881853 s/MR//
Laurent Sansonetti authored
321 Init_Encoding(void)
2b7d5d5 import vincent's work
Laurent Sansonetti authored
322 {
ae4da82 more work
Laurent Sansonetti authored
323 // rb_cEncoding is defined earlier in Init_PreVM().
324 rb_set_class_path(rb_cEncoding, rb_cObject, "Encoding");
325 rb_const_set(rb_cObject, rb_intern("Encoding"), rb_cEncoding);
326
b881853 s/MR//
Laurent Sansonetti authored
327 rb_undef_alloc_func(rb_cEncoding);
328
329 rb_objc_define_method(rb_cEncoding, "to_s", mr_enc_name, 0);
330 rb_objc_define_method(rb_cEncoding, "inspect", mr_enc_inspect, 0);
331 rb_objc_define_method(rb_cEncoding, "name", mr_enc_name, 0);
332 rb_objc_define_method(rb_cEncoding, "names", mr_enc_names, 0);
333 rb_objc_define_method(rb_cEncoding, "dummy?", mr_enc_dummy_p, 0);
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
334 rb_objc_define_method(rb_cEncoding, "ascii_compatible?",
335 mr_enc_ascii_compatible_p, 0);
4ede652 added #find
Laurent Sansonetti authored
336 rb_objc_define_method(*(VALUE *)rb_cEncoding, "list", mr_enc_s_list, 0);
337 rb_objc_define_method(*(VALUE *)rb_cEncoding, "name_list",
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
338 mr_enc_s_name_list, 0);
4ede652 added #find
Laurent Sansonetti authored
339 rb_objc_define_method(*(VALUE *)rb_cEncoding, "aliases",
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
340 mr_enc_s_aliases, 0);
4ede652 added #find
Laurent Sansonetti authored
341 rb_objc_define_method(*(VALUE *)rb_cEncoding, "find", mr_enc_s_find, 1);
342 rb_objc_define_method(*(VALUE *)rb_cEncoding, "compatible?",
39b55f1 some work on string
Laurent Sansonetti authored
343 mr_enc_s_is_compatible, 2); // in string.c
2b7d5d5 import vincent's work
Laurent Sansonetti authored
344
345 //rb_define_method(rb_cEncoding, "_dump", enc_dump, -1);
346 //rb_define_singleton_method(rb_cEncoding, "_load", enc_load, 1);
347
4ede652 added #find
Laurent Sansonetti authored
348 rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_external",
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
349 mr_enc_s_default_external, 0);
5051413 added #default_external=, #default_internal=
Laurent Sansonetti authored
350 rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_external=",
351 mr_enc_set_default_external, 1);
4ede652 added #find
Laurent Sansonetti authored
352 rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_internal",
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
353 mr_enc_s_default_internal, 0);
5051413 added #default_external=, #default_internal=
Laurent Sansonetti authored
354 rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_internal=",
355 mr_enc_set_default_internal, 1);
b881853 s/MR//
Laurent Sansonetti authored
356 //rb_define_singleton_method(rb_cEncoding, "locale_charmap", rb_locale_charmap, 0);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
357
ae4da82 more work
Laurent Sansonetti authored
358 // Create constants.
359 for (unsigned int i = 0; i < ENCODINGS_COUNT; i++) {
360 rb_encoding_t *enc = rb_encodings[i];
361 define_encoding_constant(enc->public_name, enc);
362 for (unsigned int j = 0; j < enc->aliases_count; j++) {
363 define_encoding_constant(enc->aliases[j], enc);
364 }
365 }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
366 }
96ab900 more work
Laurent Sansonetti authored
367
368 // MRI C-API compatibility.
369
370 rb_encoding_t *
371 rb_enc_find(const char *name)
372 {
373 for (unsigned int i = 0; i < ENCODINGS_COUNT; i++) {
374 rb_encoding_t *enc = rb_encodings[i];
375 if (strcasecmp(enc->public_name, name) == 0) {
376 return enc;
377 }
378 for (unsigned int j = 0; j < enc->aliases_count; j++) {
379 const char *alias = enc->aliases[j];
380 if (strcasecmp(alias, name) == 0) {
381 return enc;
382 }
383 }
384 }
385 return NULL;
386 }
387
388 VALUE
389 rb_enc_from_encoding(rb_encoding_t *enc)
390 {
391 return (VALUE)enc;
392 }
393
394 rb_encoding_t *
395 rb_enc_get(VALUE obj)
396 {
f738483 honor the original string encoding when generating substrings out of …
Laurent Sansonetti authored
397 switch (TYPE(obj)) {
398 case T_STRING:
399 if (IS_RSTR(obj)) {
400 return RSTR(obj)->encoding;
401 }
402 return rb_encodings[ENCODING_UTF8];
403
404 case T_SYMBOL:
405 return rb_enc_get(rb_sym_str(obj));
96ab900 more work
Laurent Sansonetti authored
406 }
407 return NULL;
408 }
409
410 rb_encoding_t *
411 rb_to_encoding(VALUE obj)
412 {
413 rb_encoding_t *enc;
414 if (CLASS_OF(obj) == rb_cEncoding) {
415 enc = RENC(obj);
416 }
417 else {
418 StringValue(obj);
419 enc = rb_enc_find(RSTRING_PTR(obj));
420 if (enc == NULL) {
421 rb_raise(rb_eArgError, "unknown encoding name - %s",
422 RSTRING_PTR(obj));
423 }
424 }
425 return enc;
426 }
427
428 const char *
429 rb_enc_name(rb_encoding_t *enc)
430 {
431 return RENC(enc)->public_name;
432 }
433
434 VALUE
435 rb_enc_name2(rb_encoding_t *enc)
436 {
437 return rb_usascii_str_new2(rb_enc_name(enc));
438 }
439
440 long
441 rb_enc_mbminlen(rb_encoding_t *enc)
442 {
443 return enc->min_char_size;
444 }
445
446 long
447 rb_enc_mbmaxlen(rb_encoding_t *enc)
448 {
449 return enc->single_byte_encoding ? 1 : 10; // XXX 10?
450 }
451
4cd5f5e added missing MRI methods
Laurent Sansonetti authored
452 rb_encoding *
453 rb_ascii8bit_encoding(void)
454 {
455 return rb_encodings[ENCODING_BINARY];
456 }
457
458 rb_encoding *
459 rb_utf8_encoding(void)
460 {
461 return rb_encodings[ENCODING_UTF8];
462 }
463
464 rb_encoding *
465 rb_usascii_encoding(void)
466 {
467 return rb_encodings[ENCODING_ASCII];
468 }
469
96ab900 more work
Laurent Sansonetti authored
470 rb_encoding_t *
471 rb_locale_encoding(void)
472 {
473 // XXX
474 return rb_encodings[ENCODING_UTF8];
475 }
476
477 void
478 rb_enc_set_default_external(VALUE encoding)
479 {
480 assert(CLASS_OF(encoding) == rb_cEncoding);
481 default_external = RENC(encoding);
482 }
483
508b43f @jballanc Implement a few more of the MRI encoding APIs
jballanc authored
484 rb_encoding *
485 rb_default_internal_encoding(void)
486 {
487 return (rb_encoding *)default_internal;
488 }
489
0202977 implement some of the MRI encoding index APIs
Laurent Sansonetti authored
490 static int
491 index_of_encoding(rb_encoding_t *enc)
492 {
493 if (enc != NULL) {
494 for (int i = 0; i <ENCODINGS_COUNT; i++) {
495 if (rb_encodings[i] == enc) {
496 return i;
497 }
498 }
499 }
500 return -1;
501 }
502
503 int
504 rb_enc_get_index(VALUE obj)
505 {
506 return index_of_encoding(rb_enc_get(obj));
507 }
508
ee2152e Better C implementation for Iconv
Thibault Martin-Lagardette authored
509 void
510 rb_enc_set_index(VALUE obj, int encindex)
511 {
512 if (encindex < ENCODINGS_COUNT) {
513 return ;
514 }
515 rb_str_force_encoding(obj, rb_encodings[encindex]);
516 }
517
0202977 implement some of the MRI encoding index APIs
Laurent Sansonetti authored
518 int
508b43f @jballanc Implement a few more of the MRI encoding APIs
jballanc authored
519 rb_to_encoding_index(VALUE enc)
520 {
521 if (CLASS_OF(enc) != rb_cEncoding && TYPE(enc) != T_STRING) {
522 return -1;
523 }
524 else {
a0faa60 @jballanc Cast, don't get; would be good to export this too
jballanc authored
525 int idx = index_of_encoding((rb_encoding_t *)enc);
508b43f @jballanc Implement a few more of the MRI encoding APIs
jballanc authored
526 if (idx >= 0) {
527 return idx;
528 }
529 else if (NIL_P(enc = rb_check_string_type(enc))) {
530 return -1;
531 }
532 if (!rb_enc_asciicompat(rb_enc_get(enc))) {
533 return -1;
534 }
535 return rb_enc_find_index(StringValueCStr(enc));
536 }
537 }
538
539 int
0202977 implement some of the MRI encoding index APIs
Laurent Sansonetti authored
540 rb_enc_find_index(const char *name)
541 {
542 return index_of_encoding(rb_enc_find(name));
543 }
544
545 int
546 rb_ascii8bit_encindex(void)
547 {
548 return index_of_encoding(rb_encodings[ENCODING_BINARY]);
549 }
550
551 int
552 rb_utf8_encindex(void)
553 {
554 return index_of_encoding(rb_encodings[ENCODING_UTF8]);
555 }
556
557 int
558 rb_usascii_encindex(void)
559 {
560 return index_of_encoding(rb_encodings[ENCODING_ASCII]);
561 }
562
508b43f @jballanc Implement a few more of the MRI encoding APIs
jballanc authored
563 rb_encoding *
564 rb_enc_from_index(int idx)
565 {
566 assert(idx >= 0 && idx < ENCODINGS_COUNT);
567 return rb_encodings[idx];
568 }
583e433 adding 2 more frightening MRI methods
Laurent Sansonetti authored
569
570 VALUE
571 rb_enc_associate_index(VALUE obj, int idx)
572 {
573 if (TYPE(obj) == T_STRING) {
574 assert(idx >= 0 && idx < ENCODINGS_COUNT);
575 rb_str_force_encoding(obj, rb_encodings[idx]);
576 return obj;
577 }
578 rb_raise(rb_eArgError, "cannot set encoding on non-string object");
579 }
Something went wrong with that request. Please try again.