Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 603 lines (534 sloc) 17.918 kb
7d7d3e8 @ferrous26 Change ownership to The MacRuby Team and update copyrights
ferrous26 authored
1 /*
96ab900 more work
Laurent Sansonetti authored
2 * MacRuby implementation of Ruby 1.9 String.
3 *
4 * This file is covered by the Ruby license. See COPYING for more details.
7d7d3e8 @ferrous26 Change ownership to The MacRuby Team and update copyrights
ferrous26 authored
5 *
6 * Copyright (C) 2012, The MacRuby Team. All rights reserved.
9595725 update copyrights to 2011
Laurent Sansonetti authored
7 * Copyright (C) 2007-2011, Apple Inc. All rights reserved.
96ab900 more work
Laurent Sansonetti authored
8 * Copyright (C) 1993-2007 Yukihiro Matsumoto
9 * Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
10 * Copyright (C) 2000 Information-technology Promotion Agency, Japan
11 */
12
2b7d5d5 import vincent's work
Laurent Sansonetti authored
13 #include <string.h>
9c1d230 committing experimental branch content
Laurent Sansonetti authored
14
d0898dd include/ruby/macruby.h -> macruby_internal.h
Laurent Sansonetti authored
15 #include "macruby_internal.h"
39b55f1 some work on string
Laurent Sansonetti authored
16 #include "ruby/encoding.h"
17 #include "encoding.h"
f738483 honor the original string encoding when generating substrings out of a r...
Laurent Sansonetti authored
18 #include "symbol.h"
39b55f1 some work on string
Laurent Sansonetti authored
19
96ab900 more work
Laurent Sansonetti authored
20 VALUE rb_cEncoding;
8b9745b define Encoding::ASCII_8BIT as a shortcut to US_ASCII (for now)
Laurent Sansonetti authored
21
d0ac593 @vincentisambart an (incomplete) implementation of String#encode
vincentisambart authored
22 rb_encoding_t *default_internal = NULL;
96ab900 more work
Laurent Sansonetti authored
23 static rb_encoding_t *default_external = NULL;
24 rb_encoding_t *rb_encodings[ENCODINGS_COUNT];
9c1d230 committing experimental branch content
Laurent Sansonetti authored
25
26 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
27 mr_enc_s_list(VALUE klass, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
28 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
29 VALUE ary = rb_ary_new2(ENCODINGS_COUNT);
30 for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
96ab900 more work
Laurent Sansonetti authored
31 rb_ary_push(ary, (VALUE)rb_encodings[i]);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
32 }
2b7d5d5 import vincent's work
Laurent Sansonetti authored
33 return ary;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
34 }
35
36 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
37 mr_enc_s_name_list(VALUE klass, SEL sel)
38 {
39 VALUE ary = rb_ary_new();
40 for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
96ab900 more work
Laurent Sansonetti authored
41 rb_encoding_t *encoding = RENC(rb_encodings[i]);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
42 // TODO: use US-ASCII strings
96ab900 more work
Laurent Sansonetti authored
43 rb_ary_push(ary, rb_usascii_str_new2(encoding->public_name));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
44 for (unsigned int j = 0; j < encoding->aliases_count; ++j) {
96ab900 more work
Laurent Sansonetti authored
45 rb_ary_push(ary, rb_usascii_str_new2(encoding->aliases[j]));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
46 }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
47 }
48 return ary;
49 }
50
51 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
52 mr_enc_s_aliases(VALUE klass, SEL sel)
53 {
54 VALUE hash = rb_hash_new();
55 for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
96ab900 more work
Laurent Sansonetti authored
56 rb_encoding_t *encoding = RENC(rb_encodings[i]);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
57 for (unsigned int j = 0; j < encoding->aliases_count; ++j) {
96ab900 more work
Laurent Sansonetti authored
58 rb_hash_aset(hash, rb_usascii_str_new2(encoding->aliases[j]),
59 rb_usascii_str_new2(encoding->public_name));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
60 }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
61 }
2b7d5d5 import vincent's work
Laurent Sansonetti authored
62 return hash;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
63 }
64
65 static VALUE
4ede652 added #find
Laurent Sansonetti authored
66 mr_enc_s_find(VALUE klass, SEL sel, VALUE name)
67 {
68 StringValue(name);
69 rb_encoding_t *enc = rb_enc_find(RSTRING_PTR(name));
70 if (enc == NULL) {
71 rb_raise(rb_eArgError, "unknown encoding name - %s",
72 RSTRING_PTR(name));
73 }
74 return (VALUE)enc;
75 }
76
77 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
78 mr_enc_s_default_internal(VALUE klass, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
79 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
80 return (VALUE)default_internal;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
81 }
82
83 static VALUE
5051413 added #default_external=, #default_internal=
Laurent Sansonetti authored
84 mr_enc_set_default_internal(VALUE klass, SEL sel, VALUE enc)
85 {
86 default_internal = rb_to_encoding(enc);
87 return (VALUE)default_internal;
88 }
89
90 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
91 mr_enc_s_default_external(VALUE klass, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
92 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
93 return (VALUE)default_external;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
94 }
95
96 static VALUE
5051413 added #default_external=, #default_internal=
Laurent Sansonetti authored
97 mr_enc_set_default_external(VALUE klass, SEL sel, VALUE enc)
98 {
99 default_external = rb_to_encoding(enc);
100 return (VALUE)default_external;
101 }
102
103 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
104 mr_enc_name(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
105 {
96ab900 more work
Laurent Sansonetti authored
106 return rb_usascii_str_new2(RENC(self)->public_name);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
107 }
108
109 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
110 mr_enc_inspect(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
111 {
0382b34 indented code, better type checking, removed rb_cCFString, started addin...
Laurent Sansonetti authored
112 return rb_sprintf("#<%s:%s>", rb_obj_classname(self),
96ab900 more work
Laurent Sansonetti authored
113 RENC(self)->public_name);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
114 }
115
116 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
117 mr_enc_names(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
118 {
96ab900 more work
Laurent Sansonetti authored
119 rb_encoding_t *encoding = RENC(self);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
120
2b7d5d5 import vincent's work
Laurent Sansonetti authored
121 VALUE ary = rb_ary_new2(encoding->aliases_count + 1);
96ab900 more work
Laurent Sansonetti authored
122 rb_ary_push(ary, rb_usascii_str_new2(encoding->public_name));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
123 for (unsigned int i = 0; i < encoding->aliases_count; ++i) {
96ab900 more work
Laurent Sansonetti authored
124 rb_ary_push(ary, rb_usascii_str_new2(encoding->aliases[i]));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
125 }
126 return ary;
1623532 added Encoding#default_external= and Encoding#default_internal= which do...
Laurent Sansonetti authored
127 }
128
2b7d5d5 import vincent's work
Laurent Sansonetti authored
129 static VALUE
130 mr_enc_ascii_compatible_p(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
131 {
96ab900 more work
Laurent Sansonetti authored
132 return RENC(self)->ascii_compatible ? Qtrue : Qfalse;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
133 }
134
135 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
136 mr_enc_dummy_p(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
137 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
138 return Qfalse;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
139 }
140
ffe45d2 Add support for Encoding::Converter and move String#encode and String#en...
Patrick Thomson authored
141 // For UTF-[8, 16, 32] it's /uFFFD, and for others it's '?'
142 rb_str_t *replacement_string_for_encoding(rb_encoding_t* destination)
143 {
144 rb_str_t *replacement_str = NULL;
145 if (destination == rb_encodings[ENCODING_UTF16BE]) {
146 replacement_str = RSTR(rb_enc_str_new("\xFF\xFD", 2, destination));
147 }
148 else if (destination == rb_encodings[ENCODING_UTF32BE]) {
149 replacement_str = RSTR(rb_enc_str_new("\0\0\xFF\xFD", 4, destination));
150 }
151 else if (destination == rb_encodings[ENCODING_UTF16LE]) {
152 replacement_str = RSTR(rb_enc_str_new("\xFD\xFF", 2, destination));
153 }
154 else if (destination == rb_encodings[ENCODING_UTF32LE]) {
155 replacement_str = RSTR(rb_enc_str_new("\xFD\xFF\0\0", 4, destination));
156 }
157 else if (destination == rb_encodings[ENCODING_UTF8]) {
158 replacement_str = RSTR(rb_enc_str_new("\xEF\xBF\xBD", 3, destination));
159 }
160 else {
161 replacement_str = RSTR(rb_enc_str_new("?", 1, rb_encodings[ENCODING_ASCII]));
162 replacement_str = str_simple_transcode(replacement_str, destination);
163 }
164 return replacement_str;
165 }
166
2b7d5d5 import vincent's work
Laurent Sansonetti authored
167 static void
96ab900 more work
Laurent Sansonetti authored
168 define_encoding_constant(const char *name, rb_encoding_t *encoding)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
169 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
170 char c = name[0];
171 if ((c >= '0') && (c <= '9')) {
172 // constants can't start with a number
173 return;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
174 }
175
325c032 @vincentisambart also define the encoding constants in upper case
vincentisambart authored
176 if (strcmp(name, "locale") == 0) {
177 // there is no constant for locale
178 return;
179 }
180
2b7d5d5 import vincent's work
Laurent Sansonetti authored
181 char *name_copy = strdup(name);
182 if ((c >= 'a') && (c <= 'z')) {
183 // the first character must be upper case
184 name_copy[0] = c - ('a' - 'A');
185 }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
186
325c032 @vincentisambart also define the encoding constants in upper case
vincentisambart authored
187 bool has_lower_case = false;
2b7d5d5 import vincent's work
Laurent Sansonetti authored
188 // '.' and '-' must be transformed into '_'
189 for (int i = 0; name_copy[i]; ++i) {
190 if ((name_copy[i] == '.') || (name_copy[i] == '-')) {
191 name_copy[i] = '_';
023dd4d fixed Encoding#name for 10.6
Laurent Sansonetti authored
192 }
325c032 @vincentisambart also define the encoding constants in upper case
vincentisambart authored
193 else if ((name_copy[i] >= 'a') && (name_copy[i] <= 'z')) {
194 has_lower_case = true;
195 }
023dd4d fixed Encoding#name for 10.6
Laurent Sansonetti authored
196 }
b881853 s/MR//
Laurent Sansonetti authored
197 rb_define_const(rb_cEncoding, name_copy, (VALUE)encoding);
325c032 @vincentisambart also define the encoding constants in upper case
vincentisambart authored
198 // if the encoding name has lower case characters,
199 // also define it in upper case
200 if (has_lower_case) {
201 for (int i = 0; name_copy[i]; ++i) {
202 if ((name_copy[i] >= 'a') && (name_copy[i] <= 'z')) {
203 name_copy[i] = name_copy[i] - 'a' + 'A';
204 }
205 }
206 rb_define_const(rb_cEncoding, name_copy, (VALUE)encoding);
207 }
208
2b7d5d5 import vincent's work
Laurent Sansonetti authored
209 free(name_copy);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
210 }
211
96ab900 more work
Laurent Sansonetti authored
212 extern void enc_init_ucnv_encoding(rb_encoding_t *encoding);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
213
2b7d5d5 import vincent's work
Laurent Sansonetti authored
214 enum {
215 ENCODING_TYPE_SPECIAL = 0,
216 ENCODING_TYPE_UCNV
217 };
9c1d230 committing experimental branch content
Laurent Sansonetti authored
218
2b7d5d5 import vincent's work
Laurent Sansonetti authored
219 static void
220 add_encoding(
0382b34 indented code, better type checking, removed rb_cCFString, started addin...
Laurent Sansonetti authored
221 unsigned int encoding_index, // index of the encoding in the encodings
222 // array
96ab900 more work
Laurent Sansonetti authored
223 unsigned int rb_encoding_type,
2b7d5d5 import vincent's work
Laurent Sansonetti authored
224 const char *public_name, // public name for the encoding
225 unsigned char min_char_size,
0382b34 indented code, better type checking, removed rb_cCFString, started addin...
Laurent Sansonetti authored
226 bool single_byte_encoding, // in the encoding a character takes only
227 // one byte
2b7d5d5 import vincent's work
Laurent Sansonetti authored
228 bool ascii_compatible, // is the encoding ASCII compatible or not
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored
229 bool little_endian, // for UTF-16/32, if the encoding is little endian
0382b34 indented code, better type checking, removed rb_cCFString, started addin...
Laurent Sansonetti authored
230 ... // aliases for the encoding (should no include the public name)
231 // - must end with a NULL
2b7d5d5 import vincent's work
Laurent Sansonetti authored
232 )
233 {
234 assert(encoding_index < ENCODINGS_COUNT);
235
236 // create an array for the aliases
237 unsigned int aliases_count = 0;
238 va_list va_aliases;
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored
239 va_start(va_aliases, little_endian);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
240 while (va_arg(va_aliases, const char *) != NULL) {
241 ++aliases_count;
242 }
243 va_end(va_aliases);
0382b34 indented code, better type checking, removed rb_cCFString, started addin...
Laurent Sansonetti authored
244 const char **aliases = (const char **)
245 malloc(sizeof(const char *) * aliases_count);
56236a3 encoding.c
Steven Canfield authored
246 assert(aliases != NULL);
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored
247 va_start(va_aliases, little_endian);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
248 for (unsigned int i = 0; i < aliases_count; ++i) {
249 aliases[i] = va_arg(va_aliases, const char *);
250 }
251 va_end(va_aliases);
252
253 // create the MacRuby object
96ab900 more work
Laurent Sansonetti authored
254 NEWOBJ(encoding, rb_encoding_t);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
255 encoding->basic.flags = 0;
b881853 s/MR//
Laurent Sansonetti authored
256 encoding->basic.klass = rb_cEncoding;
96ab900 more work
Laurent Sansonetti authored
257 rb_encodings[encoding_index] = encoding;
258 GC_RETAIN(encoding); // it should never be deallocated
2b7d5d5 import vincent's work
Laurent Sansonetti authored
259
260 // fill the fields
261 encoding->index = encoding_index;
262 encoding->public_name = public_name;
263 encoding->min_char_size = min_char_size;
264 encoding->single_byte_encoding = single_byte_encoding;
265 encoding->ascii_compatible = ascii_compatible;
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored
266 encoding->little_endian = little_endian;
2b7d5d5 import vincent's work
Laurent Sansonetti authored
267 encoding->aliases_count = aliases_count;
268 encoding->aliases = aliases;
269
96ab900 more work
Laurent Sansonetti authored
270 switch (rb_encoding_type) {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
271 case ENCODING_TYPE_SPECIAL:
022cd7c fixed ByteString#encoding to always return US_ASCII (for now)
Laurent Sansonetti authored
272 break;
2b7d5d5 import vincent's work
Laurent Sansonetti authored
273 case ENCODING_TYPE_UCNV:
274 enc_init_ucnv_encoding(encoding);
275 break;
276 default:
277 abort();
9c1d230 committing experimental branch content
Laurent Sansonetti authored
278 }
279 }
280
ae4da82 more work
Laurent Sansonetti authored
281 // This Init function is called very early. Do not use any runtime method
282 // because things may not be initialized properly yet.
283 void
284 Init_PreEncoding(void)
2b7d5d5 import vincent's work
Laurent Sansonetti authored
285 {
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored
286 add_encoding(ENCODING_BINARY, ENCODING_TYPE_SPECIAL, "ASCII-8BIT", 1, true, true, false, "BINARY", NULL);
287 add_encoding(ENCODING_ASCII, ENCODING_TYPE_UCNV, "US-ASCII", 1, true, true, false, "ASCII", "ANSI_X3.4-1968", "646", NULL);
288 add_encoding(ENCODING_UTF8, ENCODING_TYPE_UCNV, "UTF-8", 1, false, true, false, "CP65001", "locale", NULL);
289 add_encoding(ENCODING_UTF16BE, ENCODING_TYPE_UCNV, "UTF-16BE", 2, false, false, false, NULL);
290 add_encoding(ENCODING_UTF16LE, ENCODING_TYPE_UCNV, "UTF-16LE", 2, false, false, true, NULL);
291 add_encoding(ENCODING_UTF32BE, ENCODING_TYPE_UCNV, "UTF-32BE", 4, false, false, false, "UCS-4BE", NULL);
292 add_encoding(ENCODING_UTF32LE, ENCODING_TYPE_UCNV, "UTF-32LE", 4, false, false, true, "UCS-4LE", NULL);
293 add_encoding(ENCODING_ISO8859_1, ENCODING_TYPE_UCNV, "ISO-8859-1", 1, true, true, false, "ISO8859-1", NULL);
311371a @vincentisambart added all the ISO-8859 encodings left as some of them are used in rubysp...
vincentisambart authored
294 add_encoding(ENCODING_ISO8859_2, ENCODING_TYPE_UCNV, "ISO-8859-2", 1, true, true, false, "ISO8859-2", NULL);
295 add_encoding(ENCODING_ISO8859_3, ENCODING_TYPE_UCNV, "ISO-8859-3", 1, true, true, false, "ISO8859-3", NULL);
296 add_encoding(ENCODING_ISO8859_4, ENCODING_TYPE_UCNV, "ISO-8859-4", 1, true, true, false, "ISO8859-4", NULL);
297 add_encoding(ENCODING_ISO8859_5, ENCODING_TYPE_UCNV, "ISO-8859-5", 1, true, true, false, "ISO8859-5", NULL);
298 add_encoding(ENCODING_ISO8859_6, ENCODING_TYPE_UCNV, "ISO-8859-6", 1, true, true, false, "ISO8859-6", NULL);
299 add_encoding(ENCODING_ISO8859_7, ENCODING_TYPE_UCNV, "ISO-8859-7", 1, true, true, false, "ISO8859-7", NULL);
300 add_encoding(ENCODING_ISO8859_8, ENCODING_TYPE_UCNV, "ISO-8859-8", 1, true, true, false, "ISO8859-8", NULL);
301 add_encoding(ENCODING_ISO8859_9, ENCODING_TYPE_UCNV, "ISO-8859-9", 1, true, true, false, "ISO8859-9", NULL);
302 add_encoding(ENCODING_ISO8859_10, ENCODING_TYPE_UCNV, "ISO-8859-10", 1, true, true, false, "ISO8859-10", NULL);
303 add_encoding(ENCODING_ISO8859_11, ENCODING_TYPE_UCNV, "ISO-8859-11", 1, true, true, false, "ISO8859-11", NULL);
304 add_encoding(ENCODING_ISO8859_13, ENCODING_TYPE_UCNV, "ISO-8859-13", 1, true, true, false, "ISO8859-13", NULL);
305 add_encoding(ENCODING_ISO8859_14, ENCODING_TYPE_UCNV, "ISO-8859-14", 1, true, true, false, "ISO8859-14", NULL);
306 add_encoding(ENCODING_ISO8859_15, ENCODING_TYPE_UCNV, "ISO-8859-15", 1, true, true, false, "ISO8859-15", NULL);
307 add_encoding(ENCODING_ISO8859_16, ENCODING_TYPE_UCNV, "ISO-8859-16", 1, true, true, false, "ISO8859-16", NULL);
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored
308 add_encoding(ENCODING_MACROMAN, ENCODING_TYPE_UCNV, "macRoman", 1, true, true, false, NULL);
309 add_encoding(ENCODING_MACCYRILLIC, ENCODING_TYPE_UCNV, "macCyrillic", 1, true, true, false, NULL);
310 add_encoding(ENCODING_BIG5, ENCODING_TYPE_UCNV, "Big5", 1, false, true, false, "CP950", NULL);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
311 // FIXME: the ICU conversion tables do not seem to match Ruby's Japanese conversion tables
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored
312 add_encoding(ENCODING_EUCJP, ENCODING_TYPE_UCNV, "EUC-JP", 1, false, true, false, "eucJP", NULL);
313 add_encoding(ENCODING_SJIS, ENCODING_TYPE_UCNV, "Shift_JIS", 1, false, true, false, "SJIS", NULL);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
314 //add_encoding(ENCODING_EUCJP, ENCODING_TYPE_RUBY, "EUC-JP", 1, false, true, "eucJP", NULL);
315 //add_encoding(ENCODING_SJIS, ENCODING_TYPE_RUBY, "Shift_JIS", 1, false, true, "SJIS", NULL);
316 //add_encoding(ENCODING_CP932, ENCODING_TYPE_RUBY, "Windows-31J", 1, false, true, "CP932", "csWindows31J", NULL);
317
96ab900 more work
Laurent Sansonetti authored
318 default_external = rb_encodings[ENCODING_UTF8];
319 default_internal = rb_encodings[ENCODING_UTF8];
9c1d230 committing experimental branch content
Laurent Sansonetti authored
320 }
321
2b7d5d5 import vincent's work
Laurent Sansonetti authored
322 void
b881853 s/MR//
Laurent Sansonetti authored
323 Init_Encoding(void)
2b7d5d5 import vincent's work
Laurent Sansonetti authored
324 {
ae4da82 more work
Laurent Sansonetti authored
325 // rb_cEncoding is defined earlier in Init_PreVM().
326 rb_set_class_path(rb_cEncoding, rb_cObject, "Encoding");
327 rb_const_set(rb_cObject, rb_intern("Encoding"), rb_cEncoding);
328
b881853 s/MR//
Laurent Sansonetti authored
329 rb_undef_alloc_func(rb_cEncoding);
330
331 rb_objc_define_method(rb_cEncoding, "to_s", mr_enc_name, 0);
332 rb_objc_define_method(rb_cEncoding, "inspect", mr_enc_inspect, 0);
333 rb_objc_define_method(rb_cEncoding, "name", mr_enc_name, 0);
334 rb_objc_define_method(rb_cEncoding, "names", mr_enc_names, 0);
335 rb_objc_define_method(rb_cEncoding, "dummy?", mr_enc_dummy_p, 0);
0382b34 indented code, better type checking, removed rb_cCFString, started addin...
Laurent Sansonetti authored
336 rb_objc_define_method(rb_cEncoding, "ascii_compatible?",
337 mr_enc_ascii_compatible_p, 0);
4ede652 added #find
Laurent Sansonetti authored
338 rb_objc_define_method(*(VALUE *)rb_cEncoding, "list", mr_enc_s_list, 0);
339 rb_objc_define_method(*(VALUE *)rb_cEncoding, "name_list",
0382b34 indented code, better type checking, removed rb_cCFString, started addin...
Laurent Sansonetti authored
340 mr_enc_s_name_list, 0);
4ede652 added #find
Laurent Sansonetti authored
341 rb_objc_define_method(*(VALUE *)rb_cEncoding, "aliases",
0382b34 indented code, better type checking, removed rb_cCFString, started addin...
Laurent Sansonetti authored
342 mr_enc_s_aliases, 0);
4ede652 added #find
Laurent Sansonetti authored
343 rb_objc_define_method(*(VALUE *)rb_cEncoding, "find", mr_enc_s_find, 1);
344 rb_objc_define_method(*(VALUE *)rb_cEncoding, "compatible?",
39b55f1 some work on string
Laurent Sansonetti authored
345 mr_enc_s_is_compatible, 2); // in string.c
2b7d5d5 import vincent's work
Laurent Sansonetti authored
346
347 //rb_define_method(rb_cEncoding, "_dump", enc_dump, -1);
348 //rb_define_singleton_method(rb_cEncoding, "_load", enc_load, 1);
349
4ede652 added #find
Laurent Sansonetti authored
350 rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_external",
0382b34 indented code, better type checking, removed rb_cCFString, started addin...
Laurent Sansonetti authored
351 mr_enc_s_default_external, 0);
5051413 added #default_external=, #default_internal=
Laurent Sansonetti authored
352 rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_external=",
353 mr_enc_set_default_external, 1);
4ede652 added #find
Laurent Sansonetti authored
354 rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_internal",
0382b34 indented code, better type checking, removed rb_cCFString, started addin...
Laurent Sansonetti authored
355 mr_enc_s_default_internal, 0);
5051413 added #default_external=, #default_internal=
Laurent Sansonetti authored
356 rb_objc_define_method(*(VALUE *)rb_cEncoding, "default_internal=",
357 mr_enc_set_default_internal, 1);
b881853 s/MR//
Laurent Sansonetti authored
358 //rb_define_singleton_method(rb_cEncoding, "locale_charmap", rb_locale_charmap, 0);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
359
ae4da82 more work
Laurent Sansonetti authored
360 // Create constants.
361 for (unsigned int i = 0; i < ENCODINGS_COUNT; i++) {
362 rb_encoding_t *enc = rb_encodings[i];
363 define_encoding_constant(enc->public_name, enc);
364 for (unsigned int j = 0; j < enc->aliases_count; j++) {
365 define_encoding_constant(enc->aliases[j], enc);
366 }
367 }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
368 }
96ab900 more work
Laurent Sansonetti authored
369
370 // MRI C-API compatibility.
371
372 rb_encoding_t *
373 rb_enc_find(const char *name)
374 {
375 for (unsigned int i = 0; i < ENCODINGS_COUNT; i++) {
376 rb_encoding_t *enc = rb_encodings[i];
377 if (strcasecmp(enc->public_name, name) == 0) {
378 return enc;
379 }
380 for (unsigned int j = 0; j < enc->aliases_count; j++) {
381 const char *alias = enc->aliases[j];
382 if (strcasecmp(alias, name) == 0) {
383 return enc;
384 }
385 }
386 }
387 return NULL;
388 }
389
390 VALUE
391 rb_enc_from_encoding(rb_encoding_t *enc)
392 {
393 return (VALUE)enc;
394 }
395
396 rb_encoding_t *
397 rb_enc_get(VALUE obj)
398 {
f738483 honor the original string encoding when generating substrings out of a r...
Laurent Sansonetti authored
399 switch (TYPE(obj)) {
400 case T_STRING:
401 if (IS_RSTR(obj)) {
402 return RSTR(obj)->encoding;
403 }
404 return rb_encodings[ENCODING_UTF8];
405
406 case T_SYMBOL:
407 return rb_enc_get(rb_sym_str(obj));
96ab900 more work
Laurent Sansonetti authored
408 }
409 return NULL;
410 }
411
412 rb_encoding_t *
413 rb_to_encoding(VALUE obj)
414 {
415 rb_encoding_t *enc;
416 if (CLASS_OF(obj) == rb_cEncoding) {
417 enc = RENC(obj);
418 }
419 else {
420 StringValue(obj);
421 enc = rb_enc_find(RSTRING_PTR(obj));
422 if (enc == NULL) {
423 rb_raise(rb_eArgError, "unknown encoding name - %s",
424 RSTRING_PTR(obj));
425 }
426 }
427 return enc;
428 }
429
430 const char *
431 rb_enc_name(rb_encoding_t *enc)
432 {
433 return RENC(enc)->public_name;
434 }
435
436 VALUE
437 rb_enc_name2(rb_encoding_t *enc)
438 {
439 return rb_usascii_str_new2(rb_enc_name(enc));
440 }
441
442 long
443 rb_enc_mbminlen(rb_encoding_t *enc)
444 {
445 return enc->min_char_size;
446 }
447
448 long
449 rb_enc_mbmaxlen(rb_encoding_t *enc)
450 {
451 return enc->single_byte_encoding ? 1 : 10; // XXX 10?
452 }
453
4cd5f5e added missing MRI methods
Laurent Sansonetti authored
454 rb_encoding *
455 rb_ascii8bit_encoding(void)
456 {
457 return rb_encodings[ENCODING_BINARY];
458 }
459
460 rb_encoding *
461 rb_utf8_encoding(void)
462 {
463 return rb_encodings[ENCODING_UTF8];
464 }
465
466 rb_encoding *
467 rb_usascii_encoding(void)
468 {
469 return rb_encodings[ENCODING_ASCII];
470 }
471
96ab900 more work
Laurent Sansonetti authored
472 rb_encoding_t *
473 rb_locale_encoding(void)
474 {
475 // XXX
476 return rb_encodings[ENCODING_UTF8];
477 }
478
479 void
480 rb_enc_set_default_external(VALUE encoding)
481 {
482 assert(CLASS_OF(encoding) == rb_cEncoding);
483 default_external = RENC(encoding);
484 }
485
508b43f @jballanc Implement a few more of the MRI encoding APIs
jballanc authored
486 rb_encoding *
487 rb_default_internal_encoding(void)
488 {
489 return (rb_encoding *)default_internal;
490 }
491
5e9b743 @lrz add missing CRuby encoding APIs
lrz authored
492 rb_encoding *
493 rb_default_external_encoding(void)
494 {
495 return (rb_encoding *)default_external;
496 }
497
0202977 implement some of the MRI encoding index APIs
Laurent Sansonetti authored
498 static int
499 index_of_encoding(rb_encoding_t *enc)
500 {
501 if (enc != NULL) {
502 for (int i = 0; i <ENCODINGS_COUNT; i++) {
503 if (rb_encodings[i] == enc) {
504 return i;
505 }
506 }
507 }
508 return -1;
509 }
510
511 int
512 rb_enc_get_index(VALUE obj)
513 {
514 return index_of_encoding(rb_enc_get(obj));
515 }
516
5e9b743 @lrz add missing CRuby encoding APIs
lrz authored
517 int
518 rb_enc_to_index(VALUE enc)
519 {
520 if (CLASS_OF(enc) == rb_cEncoding) {
521 return index_of_encoding(RENC(enc));
522 }
523 return -1;
524 }
525
ee2152e Better C implementation for Iconv
Thibault Martin-Lagardette authored
526 void
527 rb_enc_set_index(VALUE obj, int encindex)
528 {
3fce966 @lrz implement ENCODING_SET() CRuby API
lrz authored
529 assert(encindex >= 0 && encindex < ENCODINGS_COUNT);
530 if (TYPE(obj) == T_STRING) {
531 rb_str_force_encoding(obj, rb_encodings[encindex]);
ee2152e Better C implementation for Iconv
Thibault Martin-Lagardette authored
532 }
533 }
534
0202977 implement some of the MRI encoding index APIs
Laurent Sansonetti authored
535 int
508b43f @jballanc Implement a few more of the MRI encoding APIs
jballanc authored
536 rb_to_encoding_index(VALUE enc)
537 {
538 if (CLASS_OF(enc) != rb_cEncoding && TYPE(enc) != T_STRING) {
539 return -1;
540 }
541 else {
a0faa60 @jballanc Cast, don't get; would be good to export this too
jballanc authored
542 int idx = index_of_encoding((rb_encoding_t *)enc);
508b43f @jballanc Implement a few more of the MRI encoding APIs
jballanc authored
543 if (idx >= 0) {
544 return idx;
545 }
546 else if (NIL_P(enc = rb_check_string_type(enc))) {
547 return -1;
548 }
549 if (!rb_enc_asciicompat(rb_enc_get(enc))) {
550 return -1;
551 }
552 return rb_enc_find_index(StringValueCStr(enc));
553 }
554 }
555
556 int
0202977 implement some of the MRI encoding index APIs
Laurent Sansonetti authored
557 rb_enc_find_index(const char *name)
558 {
559 return index_of_encoding(rb_enc_find(name));
560 }
561
562 int
563 rb_ascii8bit_encindex(void)
564 {
565 return index_of_encoding(rb_encodings[ENCODING_BINARY]);
566 }
567
568 int
569 rb_utf8_encindex(void)
570 {
571 return index_of_encoding(rb_encodings[ENCODING_UTF8]);
572 }
573
574 int
575 rb_usascii_encindex(void)
576 {
577 return index_of_encoding(rb_encodings[ENCODING_ASCII]);
578 }
579
508b43f @jballanc Implement a few more of the MRI encoding APIs
jballanc authored
580 rb_encoding *
581 rb_enc_from_index(int idx)
582 {
583 assert(idx >= 0 && idx < ENCODINGS_COUNT);
584 return rb_encodings[idx];
585 }
583e433 adding 2 more frightening MRI methods
Laurent Sansonetti authored
586
587 VALUE
588 rb_enc_associate_index(VALUE obj, int idx)
589 {
590 if (TYPE(obj) == T_STRING) {
591 assert(idx >= 0 && idx < ENCODINGS_COUNT);
592 rb_str_force_encoding(obj, rb_encodings[idx]);
593 return obj;
594 }
595 rb_raise(rb_eArgError, "cannot set encoding on non-string object");
596 }
ad3b421 @Watson1978 copy encoding
Watson1978 authored
597
598 void
599 rb_enc_copy(VALUE obj1, VALUE obj2)
600 {
601 rb_enc_associate_index(obj1, rb_enc_get_index(obj2));
602 }
Something went wrong with that request. Please try again.