Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 596 lines (528 sloc) 17.715 kb
96ab900 more work
Laurent Sansonetti authored
1 /*
2 * MacRuby implementation of Ruby 1.9 String.
3 *
4 * This file is covered by the Ruby license. See COPYING for more details.
5 *
9595725 update copyrights to 2011
Laurent Sansonetti authored
6 * Copyright (C) 2007-2011, Apple Inc. All rights reserved.
96ab900 more work
Laurent Sansonetti authored
7 * Copyright (C) 1993-2007 Yukihiro Matsumoto
8 * Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
9 * Copyright (C) 2000 Information-technology Promotion Agency, Japan
10 */
11
2b7d5d5 import vincent's work
Laurent Sansonetti authored
12 #include <string.h>
9c1d230 committing experimental branch content
Laurent Sansonetti authored
13
d0898dd include/ruby/macruby.h -> macruby_internal.h
Laurent Sansonetti authored
14 #include "macruby_internal.h"
39b55f1 some work on string
Laurent Sansonetti authored
15 #include "ruby/encoding.h"
16 #include "encoding.h"
f738483 honor the original string encoding when generating substrings out of a r...
Laurent Sansonetti authored
17 #include "symbol.h"
39b55f1 some work on string
Laurent Sansonetti authored
18
96ab900 more work
Laurent Sansonetti authored
19 VALUE rb_cEncoding;
8b9745b define Encoding::ASCII_8BIT as a shortcut to US_ASCII (for now)
Laurent Sansonetti authored
20
d0ac593 @vincentisambart an (incomplete) implementation of String#encode
vincentisambart authored
21 rb_encoding_t *default_internal = NULL;
96ab900 more work
Laurent Sansonetti authored
22 static rb_encoding_t *default_external = NULL;
23 rb_encoding_t *rb_encodings[ENCODINGS_COUNT];
9c1d230 committing experimental branch content
Laurent Sansonetti authored
24
25 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
26 mr_enc_s_list(VALUE klass, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
27 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
28 VALUE ary = rb_ary_new2(ENCODINGS_COUNT);
29 for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
96ab900 more work
Laurent Sansonetti authored
30 rb_ary_push(ary, (VALUE)rb_encodings[i]);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
31 }
2b7d5d5 import vincent's work
Laurent Sansonetti authored
32 return ary;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
33 }
34
35 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
36 mr_enc_s_name_list(VALUE klass, SEL sel)
37 {
38 VALUE ary = rb_ary_new();
39 for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
96ab900 more work
Laurent Sansonetti authored
40 rb_encoding_t *encoding = RENC(rb_encodings[i]);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
41 // TODO: use US-ASCII strings
96ab900 more work
Laurent Sansonetti authored
42 rb_ary_push(ary, rb_usascii_str_new2(encoding->public_name));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
43 for (unsigned int j = 0; j < encoding->aliases_count; ++j) {
96ab900 more work
Laurent Sansonetti authored
44 rb_ary_push(ary, rb_usascii_str_new2(encoding->aliases[j]));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
45 }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
46 }
47 return ary;
48 }
49
50 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
51 mr_enc_s_aliases(VALUE klass, SEL sel)
52 {
53 VALUE hash = rb_hash_new();
54 for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
96ab900 more work
Laurent Sansonetti authored
55 rb_encoding_t *encoding = RENC(rb_encodings[i]);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
56 for (unsigned int j = 0; j < encoding->aliases_count; ++j) {
96ab900 more work
Laurent Sansonetti authored
57 rb_hash_aset(hash, rb_usascii_str_new2(encoding->aliases[j]),
58 rb_usascii_str_new2(encoding->public_name));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
59 }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
60 }
2b7d5d5 import vincent's work
Laurent Sansonetti authored
61 return hash;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
62 }
63
64 static VALUE
4ede652 added #find
Laurent Sansonetti authored
65 mr_enc_s_find(VALUE klass, SEL sel, VALUE name)
66 {
67 StringValue(name);
68 rb_encoding_t *enc = rb_enc_find(RSTRING_PTR(name));
69 if (enc == NULL) {
70 rb_raise(rb_eArgError, "unknown encoding name - %s",
71 RSTRING_PTR(name));
72 }
73 return (VALUE)enc;
74 }
75
76 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
77 mr_enc_s_default_internal(VALUE klass, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
78 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
79 return (VALUE)default_internal;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
80 }
81
82 static VALUE
5051413 added #default_external=, #default_internal=
Laurent Sansonetti authored
83 mr_enc_set_default_internal(VALUE klass, SEL sel, VALUE enc)
84 {
85 default_internal = rb_to_encoding(enc);
86 return (VALUE)default_internal;
87 }
88
89 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
90 mr_enc_s_default_external(VALUE klass, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
91 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
92 return (VALUE)default_external;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
93 }
94
95 static VALUE
5051413 added #default_external=, #default_internal=
Laurent Sansonetti authored
96 mr_enc_set_default_external(VALUE klass, SEL sel, VALUE enc)
97 {
98 default_external = rb_to_encoding(enc);
99 return (VALUE)default_external;
100 }
101
102 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
103 mr_enc_name(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
104 {
96ab900 more work
Laurent Sansonetti authored
105 return rb_usascii_str_new2(RENC(self)->public_name);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
106 }
107
108 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
109 mr_enc_inspect(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
110 {
0382b34 indented code, better type checking, removed rb_cCFString, started addin...
Laurent Sansonetti authored
111 return rb_sprintf("#<%s:%s>", rb_obj_classname(self),
96ab900 more work
Laurent Sansonetti authored
112 RENC(self)->public_name);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
113 }
114
115 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
116 mr_enc_names(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
117 {
96ab900 more work
Laurent Sansonetti authored
118 rb_encoding_t *encoding = RENC(self);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
119
2b7d5d5 import vincent's work
Laurent Sansonetti authored
120 VALUE ary = rb_ary_new2(encoding->aliases_count + 1);
96ab900 more work
Laurent Sansonetti authored
121 rb_ary_push(ary, rb_usascii_str_new2(encoding->public_name));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
122 for (unsigned int i = 0; i < encoding->aliases_count; ++i) {
96ab900 more work
Laurent Sansonetti authored
123 rb_ary_push(ary, rb_usascii_str_new2(encoding->aliases[i]));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
124 }
125 return ary;
1623532 added Encoding#default_external= and Encoding#default_internal= which do...
Laurent Sansonetti authored
126 }
127
2b7d5d5 import vincent's work
Laurent Sansonetti authored
128 static VALUE
129 mr_enc_ascii_compatible_p(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
130 {
96ab900 more work
Laurent Sansonetti authored
131 return RENC(self)->ascii_compatible ? Qtrue : Qfalse;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
132 }
133
134 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
135 mr_enc_dummy_p(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
136 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
137 return Qfalse;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
138 }
139
ffe45d2 Add support for Encoding::Converter and move String#encode and String#en...
Patrick Thomson authored
140 // For UTF-[8, 16, 32] it's /uFFFD, and for others it's '?'
141 rb_str_t *replacement_string_for_encoding(rb_encoding_t* destination)
142 {
143 rb_str_t *replacement_str = NULL;
144 if (destination == rb_encodings[ENCODING_UTF16BE]) {
145 replacement_str = RSTR(rb_enc_str_new("\xFF\xFD", 2, destination));
146 }
147 else if (destination == rb_encodings[ENCODING_UTF32BE]) {
148 replacement_str = RSTR(rb_enc_str_new("\0\0\xFF\xFD", 4, destination));
149 }
150 else if (destination == rb_encodings[ENCODING_UTF16LE]) {
151 replacement_str = RSTR(rb_enc_str_new("\xFD\xFF", 2, destination));
152 }
153 else if (destination == rb_encodings[ENCODING_UTF32LE]) {
154 replacement_str = RSTR(rb_enc_str_new("\xFD\xFF\0\0", 4, destination));
155 }
156 else if (destination == rb_encodings[ENCODING_UTF8]) {
157 replacement_str = RSTR(rb_enc_str_new("\xEF\xBF\xBD", 3, destination));
158 }
159 else {
160 replacement_str = RSTR(rb_enc_str_new("?", 1, rb_encodings[ENCODING_ASCII]));
161 replacement_str = str_simple_transcode(replacement_str, destination);
162 }
163 return replacement_str;
164 }
165
2b7d5d5 import vincent's work
Laurent Sansonetti authored
166 static void
96ab900 more work
Laurent Sansonetti authored
167 define_encoding_constant(const char *name, rb_encoding_t *encoding)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
168 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
169 char c = name[0];
170 if ((c >= '0') && (c <= '9')) {
171 // constants can't start with a number
172 return;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
173 }
174
325c032 @vincentisambart also define the encoding constants in upper case
vincentisambart authored
175 if (strcmp(name, "locale") == 0) {
176 // there is no constant for locale
177 return;
178 }
179
2b7d5d5 import vincent's work
Laurent Sansonetti authored
180 char *name_copy = strdup(name);
181 if ((c >= 'a') && (c <= 'z')) {
182 // the first character must be upper case
183 name_copy[0] = c - ('a' - 'A');
184 }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
185
325c032 @vincentisambart also define the encoding constants in upper case
vincentisambart authored
186 bool has_lower_case = false;
2b7d5d5 import vincent's work
Laurent Sansonetti authored
187 // '.' and '-' must be transformed into '_'
188 for (int i = 0; name_copy[i]; ++i) {
189 if ((name_copy[i] == '.') || (name_copy[i] == '-')) {
190 name_copy[i] = '_';
023dd4d fixed Encoding#name for 10.6
Laurent Sansonetti authored
191 }
325c032 @vincentisambart also define the encoding constants in upper case
vincentisambart authored
192 else if ((name_copy[i] >= 'a') && (name_copy[i] <= 'z')) {
193 has_lower_case = true;
194 }
023dd4d fixed Encoding#name for 10.6
Laurent Sansonetti authored
195 }
b881853 s/MR//
Laurent Sansonetti authored
196 rb_define_const(rb_cEncoding, name_copy, (VALUE)encoding);
325c032 @vincentisambart also define the encoding constants in upper case
vincentisambart authored
197 // if the encoding name has lower case characters,
198 // also define it in upper case
199 if (has_lower_case) {
200 for (int i = 0; name_copy[i]; ++i) {
201 if ((name_copy[i] >= 'a') && (name_copy[i] <= 'z')) {
202 name_copy[i] = name_copy[i] - 'a' + 'A';
203 }
204 }
205 rb_define_const(rb_cEncoding, name_copy, (VALUE)encoding);
206 }
207
2b7d5d5 import vincent's work
Laurent Sansonetti authored
208 free(name_copy);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
209 }
210
96ab900 more work
Laurent Sansonetti authored
211 extern void enc_init_ucnv_encoding(rb_encoding_t *encoding);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
212
2b7d5d5 import vincent's work
Laurent Sansonetti authored
213 enum {
214 ENCODING_TYPE_SPECIAL = 0,
215 ENCODING_TYPE_UCNV
216 };
9c1d230 committing experimental branch content
Laurent Sansonetti authored
217
2b7d5d5 import vincent's work
Laurent Sansonetti authored
218 static void
219 add_encoding(
0382b34 indented code, better type checking, removed rb_cCFString, started addin...
Laurent Sansonetti authored
220 unsigned int encoding_index, // index of the encoding in the encodings
221 // array
96ab900 more work
Laurent Sansonetti authored
222 unsigned int rb_encoding_type,
2b7d5d5 import vincent's work
Laurent Sansonetti authored
223 const char *public_name, // public name for the encoding
224 unsigned char min_char_size,
0382b34 indented code, better type checking, removed rb_cCFString, started addin...
Laurent Sansonetti authored
225 bool single_byte_encoding, // in the encoding a character takes only
226 // one byte
2b7d5d5 import vincent's work
Laurent Sansonetti authored
227 bool ascii_compatible, // is the encoding ASCII compatible or not
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored
228 bool little_endian, // for UTF-16/32, if the encoding is little endian
0382b34 indented code, better type checking, removed rb_cCFString, started addin...
Laurent Sansonetti authored
229 ... // aliases for the encoding (should no include the public name)
230 // - must end with a NULL
2b7d5d5 import vincent's work
Laurent Sansonetti authored
231 )
232 {
233 assert(encoding_index < ENCODINGS_COUNT);
234
235 // create an array for the aliases
236 unsigned int aliases_count = 0;
237 va_list va_aliases;
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored
238 va_start(va_aliases, little_endian);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
239 while (va_arg(va_aliases, const char *) != NULL) {
240 ++aliases_count;
241 }
242 va_end(va_aliases);
0382b34 indented code, better type checking, removed rb_cCFString, started addin...
Laurent Sansonetti authored
243 const char **aliases = (const char **)
244 malloc(sizeof(const char *) * aliases_count);
56236a3 encoding.c
Steven Canfield authored
245 assert(aliases != NULL);
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored
246 va_start(va_aliases, little_endian);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
247 for (unsigned int i = 0; i < aliases_count; ++i) {
248 aliases[i] = va_arg(va_aliases, const char *);
249 }
250 va_end(va_aliases);
251
252 // create the MacRuby object
96ab900 more work
Laurent Sansonetti authored
253 NEWOBJ(encoding, rb_encoding_t);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
254 encoding->basic.flags = 0;
b881853 s/MR//
Laurent Sansonetti authored
255 encoding->basic.klass = rb_cEncoding;
96ab900 more work
Laurent Sansonetti authored
256 rb_encodings[encoding_index] = encoding;
257 GC_RETAIN(encoding); // it should never be deallocated
2b7d5d5 import vincent's work
Laurent Sansonetti authored
258
259 // fill the fields
260 encoding->index = encoding_index;
261 encoding->public_name = public_name;
262 encoding->min_char_size = min_char_size;
263 encoding->single_byte_encoding = single_byte_encoding;
264 encoding->ascii_compatible = ascii_compatible;
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored
265 encoding->little_endian = little_endian;
2b7d5d5 import vincent's work
Laurent Sansonetti authored
266 encoding->aliases_count = aliases_count;
267 encoding->aliases = aliases;
268
96ab900 more work
Laurent Sansonetti authored
269 switch (rb_encoding_type) {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
270 case ENCODING_TYPE_SPECIAL:
022cd7c fixed ByteString#encoding to always return US_ASCII (for now)
Laurent Sansonetti authored
271 break;
2b7d5d5 import vincent's work
Laurent Sansonetti authored
272 case ENCODING_TYPE_UCNV:
273 enc_init_ucnv_encoding(encoding);
274 break;
275 default:
276 abort();
9c1d230 committing experimental branch content
Laurent Sansonetti authored
277 }
278 }
279
ae4da82 more work
Laurent Sansonetti authored
280 // This Init function is called very early. Do not use any runtime method
281 // because things may not be initialized properly yet.
282 void
283 Init_PreEncoding(void)
2b7d5d5 import vincent's work
Laurent Sansonetti authored
284 {
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored
285 add_encoding(ENCODING_BINARY, ENCODING_TYPE_SPECIAL, "ASCII-8BIT", 1, true, true, false, "BINARY", NULL);
286 add_encoding(ENCODING_ASCII, ENCODING_TYPE_UCNV, "US-ASCII", 1, true, true, false, "ASCII", "ANSI_X3.4-1968", "646", NULL);
287 add_encoding(ENCODING_UTF8, ENCODING_TYPE_UCNV, "UTF-8", 1, false, true, false, "CP65001", "locale", NULL);
288 add_encoding(ENCODING_UTF16BE, ENCODING_TYPE_UCNV, "UTF-16BE", 2, false, false, false, NULL);
289 add_encoding(ENCODING_UTF16LE, ENCODING_TYPE_UCNV, "UTF-16LE", 2, false, false, true, NULL);
290 add_encoding(ENCODING_UTF32BE, ENCODING_TYPE_UCNV, "UTF-32BE", 4, false, false, false, "UCS-4BE", NULL);
291 add_encoding(ENCODING_UTF32LE, ENCODING_TYPE_UCNV, "UTF-32LE", 4, false, false, true, "UCS-4LE", NULL);
292 add_encoding(ENCODING_ISO8859_1, ENCODING_TYPE_UCNV, "ISO-8859-1", 1, true, true, false, "ISO8859-1", NULL);
311371a @vincentisambart added all the ISO-8859 encodings left as some of them are used in rubysp...
vincentisambart authored
293 add_encoding(ENCODING_ISO8859_2, ENCODING_TYPE_UCNV, "ISO-8859-2", 1, true, true, false, "ISO8859-2", NULL);
294 add_encoding(ENCODING_ISO8859_3, ENCODING_TYPE_UCNV, "ISO-8859-3", 1, true, true, false, "ISO8859-3", NULL);
295 add_encoding(ENCODING_ISO8859_4, ENCODING_TYPE_UCNV, "ISO-8859-4", 1, true, true, false, "ISO8859-4", NULL);
296 add_encoding(ENCODING_ISO8859_5, ENCODING_TYPE_UCNV, "ISO-8859-5", 1, true, true, false, "ISO8859-5", NULL);
297 add_encoding(ENCODING_ISO8859_6, ENCODING_TYPE_UCNV, "ISO-8859-6", 1, true, true, false, "ISO8859-6", NULL);
298 add_encoding(ENCODING_ISO8859_7, ENCODING_TYPE_UCNV, "ISO-8859-7", 1, true, true, false, "ISO8859-7", NULL);
299 add_encoding(ENCODING_ISO8859_8, ENCODING_TYPE_UCNV, "ISO-8859-8", 1, true, true, false, "ISO8859-8", NULL);
300 add_encoding(ENCODING_ISO8859_9, ENCODING_TYPE_UCNV, "ISO-8859-9", 1, true, true, false, "ISO8859-9", NULL);
301 add_encoding(ENCODING_ISO8859_10, ENCODING_TYPE_UCNV, "ISO-8859-10", 1, true, true, false, "ISO8859-10", NULL);
302 add_encoding(ENCODING_ISO8859_11, ENCODING_TYPE_UCNV, "ISO-8859-11", 1, true, true, false, "ISO8859-11", NULL);
303 add_encoding(ENCODING_ISO8859_13, ENCODING_TYPE_UCNV, "ISO-8859-13", 1, true, true, false, "ISO8859-13", NULL);
304 add_encoding(ENCODING_ISO8859_14, ENCODING_TYPE_UCNV, "ISO-8859-14", 1, true, true, false, "ISO8859-14", NULL);
305 add_encoding(ENCODING_ISO8859_15, ENCODING_TYPE_UCNV, "ISO-8859-15", 1, true, true, false, "ISO8859-15", NULL);
306 add_encoding(ENCODING_ISO8859_16, ENCODING_TYPE_UCNV, "ISO-8859-16", 1, true, true, false, "ISO8859-16", NULL);
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored
307 add_encoding(ENCODING_MACROMAN, ENCODING_TYPE_UCNV, "macRoman", 1, true, true, false, NULL);
308 add_encoding(ENCODING_MACCYRILLIC, ENCODING_TYPE_UCNV, "macCyrillic", 1, true, true, false, NULL);
309 add_encoding(ENCODING_BIG5, ENCODING_TYPE_UCNV, "Big5", 1, false, true, false, "CP950", NULL);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
310 // FIXME: the ICU conversion tables do not seem to match Ruby's Japanese conversion tables
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored
311 add_encoding(ENCODING_EUCJP, ENCODING_TYPE_UCNV, "EUC-JP", 1, false, true, false, "eucJP", NULL);
312 add_encoding(ENCODING_SJIS, ENCODING_TYPE_UCNV, "Shift_JIS", 1, false, true, false, "SJIS", NULL);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
313 //add_encoding(ENCODING_EUCJP, ENCODING_TYPE_RUBY, "EUC-JP", 1, false, true, "eucJP", NULL);
314 //add_encoding(ENCODING_SJIS, ENCODING_TYPE_RUBY, "Shift_JIS", 1, false, true, "SJIS", NULL);
315 //add_encoding(ENCODING_CP932, ENCODING_TYPE_RUBY, "Windows-31J", 1, false, true, "CP932", "csWindows31J", NULL);
316
96ab900 more work
Laurent Sansonetti authored
317 default_external = rb_encodings[ENCODING_UTF8];
318 default_internal = rb_encodings[ENCODING_UTF8];
9c1d230 committing experimental branch content
Laurent Sansonetti authored
319 }
320
2b7d5d5 import vincent's work
Laurent Sansonetti authored
321 void
b881853 s/MR//
Laurent Sansonetti authored
322 Init_Encoding(void)
2b7d5d5 import vincent's work
Laurent Sansonetti authored
323 {
ae4da82 more work
Laurent Sansonetti authored
324 // rb_cEncoding is defined earlier in Init_PreVM().
325 rb_set_class_path(rb_cEncoding, rb_cObject, "Encoding");
326 rb_const_set(rb_cObject, rb_intern("Encoding"), rb_cEncoding);
327
b881853 s/MR//
Laurent Sansonetti authored
328 rb_undef_alloc_func(rb_cEncoding);
329
330 rb_objc_define_method(rb_cEncoding, "to_s", mr_enc_name, 0);
331 rb_objc_define_method(rb_cEncoding, "inspect", mr_enc_inspect, 0);
332 rb_objc_define_method(rb_cEncoding, "name", mr_enc_name, 0);
333 rb_objc_define_method(rb_cEncoding, "names", mr_enc_names, 0);
334 rb_objc_define_method(rb_cEncoding, "dummy?", mr_enc_dummy_p, 0);
0382b34 indented code, better type checking, removed rb_cCFString, started addin...
Laurent Sansonetti authored
335 rb_objc_define_method(rb_cEncoding, "ascii_compatible?",
336 mr_enc_ascii_compatible_p, 0);
4ede652 added #find
Laurent Sansonetti authored
337 rb_objc_define_method(*(VALUE *)rb_cEncoding, "list", mr_enc_s_list, 0);
338 rb_objc_define_method(*(VALUE *)rb_cEncoding, "name_list",
0382b34 indented code, better type checking, removed rb_cCFString, started addin...
Laurent Sansonetti authored
339 mr_enc_s_name_list, 0);
4ede652 added #find
Laurent Sansonetti authored
340 rb_objc_define_method(*(VALUE *)rb_cEncoding, "aliases",
0382b34 indented code, better type checking, removed rb_cCFString, started addin...
Laurent Sansonetti authored
341 mr_enc_s_aliases, 0);
4ede652 added #find
Laurent Sansonetti authored
342 rb_objc_define_method(*(VALUE *)rb_cEncoding, "find", mr_enc_s_find, 1);
343 rb_objc_define_method(*(VALUE *)rb_cEncoding, "compatible?",
39b55f1 some work on string
Laurent Sansonetti authored
344 mr_enc_s_is_compatible, 2); // in string.c
2b7d5d5 import vincent's work
Laurent Sansonetti authored
345
346 //rb_define_method(rb_cEncoding, "_dump", enc_dump, -1);
347 //rb_define_singleton_method(rb_cEncoding, "_load", enc_load, 1);
348
4ede652 added #find
Laurent Sansonetti authored
349 rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_external",
0382b34 indented code, better type checking, removed rb_cCFString, started addin...
Laurent Sansonetti authored
350 mr_enc_s_default_external, 0);
5051413 added #default_external=, #default_internal=
Laurent Sansonetti authored
351 rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_external=",
352 mr_enc_set_default_external, 1);
4ede652 added #find
Laurent Sansonetti authored
353 rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_internal",
0382b34 indented code, better type checking, removed rb_cCFString, started addin...
Laurent Sansonetti authored
354 mr_enc_s_default_internal, 0);
5051413 added #default_external=, #default_internal=
Laurent Sansonetti authored
355 rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_internal=",
356 mr_enc_set_default_internal, 1);
b881853 s/MR//
Laurent Sansonetti authored
357 //rb_define_singleton_method(rb_cEncoding, "locale_charmap", rb_locale_charmap, 0);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
358
ae4da82 more work
Laurent Sansonetti authored
359 // Create constants.
360 for (unsigned int i = 0; i < ENCODINGS_COUNT; i++) {
361 rb_encoding_t *enc = rb_encodings[i];
362 define_encoding_constant(enc->public_name, enc);
363 for (unsigned int j = 0; j < enc->aliases_count; j++) {
364 define_encoding_constant(enc->aliases[j], enc);
365 }
366 }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
367 }
96ab900 more work
Laurent Sansonetti authored
368
369 // MRI C-API compatibility.
370
371 rb_encoding_t *
372 rb_enc_find(const char *name)
373 {
374 for (unsigned int i = 0; i < ENCODINGS_COUNT; i++) {
375 rb_encoding_t *enc = rb_encodings[i];
376 if (strcasecmp(enc->public_name, name) == 0) {
377 return enc;
378 }
379 for (unsigned int j = 0; j < enc->aliases_count; j++) {
380 const char *alias = enc->aliases[j];
381 if (strcasecmp(alias, name) == 0) {
382 return enc;
383 }
384 }
385 }
386 return NULL;
387 }
388
389 VALUE
390 rb_enc_from_encoding(rb_encoding_t *enc)
391 {
392 return (VALUE)enc;
393 }
394
395 rb_encoding_t *
396 rb_enc_get(VALUE obj)
397 {
f738483 honor the original string encoding when generating substrings out of a r...
Laurent Sansonetti authored
398 switch (TYPE(obj)) {
399 case T_STRING:
400 if (IS_RSTR(obj)) {
401 return RSTR(obj)->encoding;
402 }
403 return rb_encodings[ENCODING_UTF8];
404
405 case T_SYMBOL:
406 return rb_enc_get(rb_sym_str(obj));
96ab900 more work
Laurent Sansonetti authored
407 }
408 return NULL;
409 }
410
411 rb_encoding_t *
412 rb_to_encoding(VALUE obj)
413 {
414 rb_encoding_t *enc;
415 if (CLASS_OF(obj) == rb_cEncoding) {
416 enc = RENC(obj);
417 }
418 else {
419 StringValue(obj);
420 enc = rb_enc_find(RSTRING_PTR(obj));
421 if (enc == NULL) {
422 rb_raise(rb_eArgError, "unknown encoding name - %s",
423 RSTRING_PTR(obj));
424 }
425 }
426 return enc;
427 }
428
429 const char *
430 rb_enc_name(rb_encoding_t *enc)
431 {
432 return RENC(enc)->public_name;
433 }
434
435 VALUE
436 rb_enc_name2(rb_encoding_t *enc)
437 {
438 return rb_usascii_str_new2(rb_enc_name(enc));
439 }
440
441 long
442 rb_enc_mbminlen(rb_encoding_t *enc)
443 {
444 return enc->min_char_size;
445 }
446
447 long
448 rb_enc_mbmaxlen(rb_encoding_t *enc)
449 {
450 return enc->single_byte_encoding ? 1 : 10; // XXX 10?
451 }
452
4cd5f5e added missing MRI methods
Laurent Sansonetti authored
453 rb_encoding *
454 rb_ascii8bit_encoding(void)
455 {
456 return rb_encodings[ENCODING_BINARY];
457 }
458
459 rb_encoding *
460 rb_utf8_encoding(void)
461 {
462 return rb_encodings[ENCODING_UTF8];
463 }
464
465 rb_encoding *
466 rb_usascii_encoding(void)
467 {
468 return rb_encodings[ENCODING_ASCII];
469 }
470
96ab900 more work
Laurent Sansonetti authored
471 rb_encoding_t *
472 rb_locale_encoding(void)
473 {
474 // XXX
475 return rb_encodings[ENCODING_UTF8];
476 }
477
478 void
479 rb_enc_set_default_external(VALUE encoding)
480 {
481 assert(CLASS_OF(encoding) == rb_cEncoding);
482 default_external = RENC(encoding);
483 }
484
508b43f @jballanc Implement a few more of the MRI encoding APIs
jballanc authored
485 rb_encoding *
486 rb_default_internal_encoding(void)
487 {
488 return (rb_encoding *)default_internal;
489 }
490
5e9b743 @lrz add missing CRuby encoding APIs
lrz authored
491 rb_encoding *
492 rb_default_external_encoding(void)
493 {
494 return (rb_encoding *)default_external;
495 }
496
0202977 implement some of the MRI encoding index APIs
Laurent Sansonetti authored
497 static int
498 index_of_encoding(rb_encoding_t *enc)
499 {
500 if (enc != NULL) {
501 for (int i = 0; i <ENCODINGS_COUNT; i++) {
502 if (rb_encodings[i] == enc) {
503 return i;
504 }
505 }
506 }
507 return -1;
508 }
509
510 int
511 rb_enc_get_index(VALUE obj)
512 {
513 return index_of_encoding(rb_enc_get(obj));
514 }
515
5e9b743 @lrz add missing CRuby encoding APIs
lrz authored
516 int
517 rb_enc_to_index(VALUE enc)
518 {
519 if (CLASS_OF(enc) == rb_cEncoding) {
520 return index_of_encoding(RENC(enc));
521 }
522 return -1;
523 }
524
ee2152e Better C implementation for Iconv
Thibault Martin-Lagardette authored
525 void
526 rb_enc_set_index(VALUE obj, int encindex)
527 {
528 if (encindex < ENCODINGS_COUNT) {
529 return ;
530 }
531 rb_str_force_encoding(obj, rb_encodings[encindex]);
532 }
533
0202977 implement some of the MRI encoding index APIs
Laurent Sansonetti authored
534 int
508b43f @jballanc Implement a few more of the MRI encoding APIs
jballanc authored
535 rb_to_encoding_index(VALUE enc)
536 {
537 if (CLASS_OF(enc) != rb_cEncoding && TYPE(enc) != T_STRING) {
538 return -1;
539 }
540 else {
a0faa60 @jballanc Cast, don't get; would be good to export this too
jballanc authored
541 int idx = index_of_encoding((rb_encoding_t *)enc);
508b43f @jballanc Implement a few more of the MRI encoding APIs
jballanc authored
542 if (idx >= 0) {
543 return idx;
544 }
545 else if (NIL_P(enc = rb_check_string_type(enc))) {
546 return -1;
547 }
548 if (!rb_enc_asciicompat(rb_enc_get(enc))) {
549 return -1;
550 }
551 return rb_enc_find_index(StringValueCStr(enc));
552 }
553 }
554
555 int
0202977 implement some of the MRI encoding index APIs
Laurent Sansonetti authored
556 rb_enc_find_index(const char *name)
557 {
558 return index_of_encoding(rb_enc_find(name));
559 }
560
561 int
562 rb_ascii8bit_encindex(void)
563 {
564 return index_of_encoding(rb_encodings[ENCODING_BINARY]);
565 }
566
567 int
568 rb_utf8_encindex(void)
569 {
570 return index_of_encoding(rb_encodings[ENCODING_UTF8]);
571 }
572
573 int
574 rb_usascii_encindex(void)
575 {
576 return index_of_encoding(rb_encodings[ENCODING_ASCII]);
577 }
578
508b43f @jballanc Implement a few more of the MRI encoding APIs
jballanc authored
579 rb_encoding *
580 rb_enc_from_index(int idx)
581 {
582 assert(idx >= 0 && idx < ENCODINGS_COUNT);
583 return rb_encodings[idx];
584 }
583e433 adding 2 more frightening MRI methods
Laurent Sansonetti authored
585
586 VALUE
587 rb_enc_associate_index(VALUE obj, int idx)
588 {
589 if (TYPE(obj) == T_STRING) {
590 assert(idx >= 0 && idx < ENCODINGS_COUNT);
591 rb_str_force_encoding(obj, rb_encodings[idx]);
592 return obj;
593 }
594 rb_raise(rb_eArgError, "cannot set encoding on non-string object");
595 }
Something went wrong with that request. Please try again.