Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 381 lines (333 sloc) 11.558 kB
96ab900 more work
Laurent Sansonetti authored
1 /*
2 * MacRuby implementation of Ruby 1.9 String.
3 *
4 * This file is covered by the Ruby license. See COPYING for more details.
5 *
6 * Copyright (C) 2007-2010, Apple Inc. All rights reserved.
7 * Copyright (C) 1993-2007 Yukihiro Matsumoto
8 * Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
9 * Copyright (C) 2000 Information-technology Promotion Agency, Japan
10 */
11
2b7d5d5 import vincent's work
Laurent Sansonetti authored
12 #include "encoding.h"
13 #include <string.h>
9c1d230 committing experimental branch content
Laurent Sansonetti authored
14
96ab900 more work
Laurent Sansonetti authored
15 VALUE rb_cEncoding;
8b9745b define Encoding::ASCII_8BIT as a shortcut to US_ASCII (for now)
Laurent Sansonetti authored
16
96ab900 more work
Laurent Sansonetti authored
17 static rb_encoding_t *default_internal = NULL;
18 static rb_encoding_t *default_external = NULL;
19 rb_encoding_t *rb_encodings[ENCODINGS_COUNT];
9c1d230 committing experimental branch content
Laurent Sansonetti authored
20
96ab900 more work
Laurent Sansonetti authored
21 static void str_undefined_update_flags(rb_str_t *self) { abort(); }
22 static void str_undefined_make_data_binary(rb_str_t *self) { abort(); }
23 static bool str_undefined_try_making_data_uchars(rb_str_t *self) { abort(); }
24 static long str_undefined_length(rb_str_t *self, bool ucs2_mode) { abort(); }
25 static long str_undefined_bytesize(rb_str_t *self) { abort(); }
26 static character_boundaries_t str_undefined_get_character_boundaries(rb_str_t *self, long index, bool ucs2_mode) { abort(); }
27 static long str_undefined_offset_in_bytes_to_index(rb_str_t *self, long offset_in_bytes, bool ucs2_mode) { abort(); }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
28
29 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
30 mr_enc_s_list(VALUE klass, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
31 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
32 VALUE ary = rb_ary_new2(ENCODINGS_COUNT);
33 for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
96ab900 more work
Laurent Sansonetti authored
34 rb_ary_push(ary, (VALUE)rb_encodings[i]);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
35 }
2b7d5d5 import vincent's work
Laurent Sansonetti authored
36 return ary;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
37 }
38
39 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
40 mr_enc_s_name_list(VALUE klass, SEL sel)
41 {
42 VALUE ary = rb_ary_new();
43 for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
96ab900 more work
Laurent Sansonetti authored
44 rb_encoding_t *encoding = RENC(rb_encodings[i]);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
45 // TODO: use US-ASCII strings
96ab900 more work
Laurent Sansonetti authored
46 rb_ary_push(ary, rb_usascii_str_new2(encoding->public_name));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
47 for (unsigned int j = 0; j < encoding->aliases_count; ++j) {
96ab900 more work
Laurent Sansonetti authored
48 rb_ary_push(ary, rb_usascii_str_new2(encoding->aliases[j]));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
49 }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
50 }
51 return ary;
52 }
53
54 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
55 mr_enc_s_aliases(VALUE klass, SEL sel)
56 {
57 VALUE hash = rb_hash_new();
58 for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
96ab900 more work
Laurent Sansonetti authored
59 rb_encoding_t *encoding = RENC(rb_encodings[i]);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
60 for (unsigned int j = 0; j < encoding->aliases_count; ++j) {
96ab900 more work
Laurent Sansonetti authored
61 rb_hash_aset(hash, rb_usascii_str_new2(encoding->aliases[j]),
62 rb_usascii_str_new2(encoding->public_name));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
63 }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
64 }
2b7d5d5 import vincent's work
Laurent Sansonetti authored
65 return hash;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
66 }
67
68 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
69 mr_enc_s_default_internal(VALUE klass, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
70 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
71 return (VALUE)default_internal;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
72 }
73
74 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
75 mr_enc_s_default_external(VALUE klass, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
76 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
77 return (VALUE)default_external;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
78 }
79
80 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
81 mr_enc_name(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
82 {
96ab900 more work
Laurent Sansonetti authored
83 return rb_usascii_str_new2(RENC(self)->public_name);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
84 }
85
86 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
87 mr_enc_inspect(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
88 {
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
89 return rb_sprintf("#<%s:%s>", rb_obj_classname(self),
96ab900 more work
Laurent Sansonetti authored
90 RENC(self)->public_name);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
91 }
92
93 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
94 mr_enc_names(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
95 {
96ab900 more work
Laurent Sansonetti authored
96 rb_encoding_t *encoding = RENC(self);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
97
2b7d5d5 import vincent's work
Laurent Sansonetti authored
98 VALUE ary = rb_ary_new2(encoding->aliases_count + 1);
96ab900 more work
Laurent Sansonetti authored
99 rb_ary_push(ary, rb_usascii_str_new2(encoding->public_name));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
100 for (unsigned int i = 0; i < encoding->aliases_count; ++i) {
96ab900 more work
Laurent Sansonetti authored
101 rb_ary_push(ary, rb_usascii_str_new2(encoding->aliases[i]));
2b7d5d5 import vincent's work
Laurent Sansonetti authored
102 }
103 return ary;
1623532 added Encoding#default_external= and Encoding#default_internal= which…
Laurent Sansonetti authored
104 }
105
2b7d5d5 import vincent's work
Laurent Sansonetti authored
106 static VALUE
107 mr_enc_ascii_compatible_p(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
108 {
96ab900 more work
Laurent Sansonetti authored
109 return RENC(self)->ascii_compatible ? Qtrue : Qfalse;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
110 }
111
112 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
113 mr_enc_dummy_p(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
114 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
115 return Qfalse;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
116 }
117
2b7d5d5 import vincent's work
Laurent Sansonetti authored
118 static void
96ab900 more work
Laurent Sansonetti authored
119 define_encoding_constant(const char *name, rb_encoding_t *encoding)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
120 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
121 char c = name[0];
122 if ((c >= '0') && (c <= '9')) {
123 // constants can't start with a number
124 return;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
125 }
126
2b7d5d5 import vincent's work
Laurent Sansonetti authored
127 char *name_copy = strdup(name);
128 if ((c >= 'a') && (c <= 'z')) {
129 // the first character must be upper case
130 name_copy[0] = c - ('a' - 'A');
131 }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
132
2b7d5d5 import vincent's work
Laurent Sansonetti authored
133 // '.' and '-' must be transformed into '_'
134 for (int i = 0; name_copy[i]; ++i) {
135 if ((name_copy[i] == '.') || (name_copy[i] == '-')) {
136 name_copy[i] = '_';
023dd4d fixed Encoding#name for 10.6
Laurent Sansonetti authored
137 }
138 }
b881853 s/MR//
Laurent Sansonetti authored
139 rb_define_const(rb_cEncoding, name_copy, (VALUE)encoding);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
140 free(name_copy);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
141 }
142
96ab900 more work
Laurent Sansonetti authored
143 extern void enc_init_ucnv_encoding(rb_encoding_t *encoding);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
144
2b7d5d5 import vincent's work
Laurent Sansonetti authored
145 enum {
146 ENCODING_TYPE_SPECIAL = 0,
147 ENCODING_TYPE_UCNV
148 };
9c1d230 committing experimental branch content
Laurent Sansonetti authored
149
2b7d5d5 import vincent's work
Laurent Sansonetti authored
150 static void
151 add_encoding(
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
152 unsigned int encoding_index, // index of the encoding in the encodings
153 // array
96ab900 more work
Laurent Sansonetti authored
154 unsigned int rb_encoding_type,
2b7d5d5 import vincent's work
Laurent Sansonetti authored
155 const char *public_name, // public name for the encoding
156 unsigned char min_char_size,
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
157 bool single_byte_encoding, // in the encoding a character takes only
158 // one byte
2b7d5d5 import vincent's work
Laurent Sansonetti authored
159 bool ascii_compatible, // is the encoding ASCII compatible or not
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
160 ... // aliases for the encoding (should no include the public name)
161 // - must end with a NULL
2b7d5d5 import vincent's work
Laurent Sansonetti authored
162 )
163 {
164 assert(encoding_index < ENCODINGS_COUNT);
165
166 // create an array for the aliases
167 unsigned int aliases_count = 0;
168 va_list va_aliases;
169 va_start(va_aliases, ascii_compatible);
170 while (va_arg(va_aliases, const char *) != NULL) {
171 ++aliases_count;
172 }
173 va_end(va_aliases);
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
174 const char **aliases = (const char **)
175 malloc(sizeof(const char *) * aliases_count);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
176 va_start(va_aliases, ascii_compatible);
177 for (unsigned int i = 0; i < aliases_count; ++i) {
178 aliases[i] = va_arg(va_aliases, const char *);
179 }
180 va_end(va_aliases);
181
182 // create the MacRuby object
96ab900 more work
Laurent Sansonetti authored
183 NEWOBJ(encoding, rb_encoding_t);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
184 encoding->basic.flags = 0;
b881853 s/MR//
Laurent Sansonetti authored
185 encoding->basic.klass = rb_cEncoding;
96ab900 more work
Laurent Sansonetti authored
186 rb_encodings[encoding_index] = encoding;
187 GC_RETAIN(encoding); // it should never be deallocated
2b7d5d5 import vincent's work
Laurent Sansonetti authored
188
189 // fill the fields
190 encoding->index = encoding_index;
191 encoding->public_name = public_name;
192 encoding->min_char_size = min_char_size;
193 encoding->single_byte_encoding = single_byte_encoding;
194 encoding->ascii_compatible = ascii_compatible;
195 encoding->aliases_count = aliases_count;
196 encoding->aliases = aliases;
197
198 // fill the default implementations with aborts
199 encoding->methods.update_flags = str_undefined_update_flags;
200 encoding->methods.make_data_binary = str_undefined_make_data_binary;
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
201 encoding->methods.try_making_data_uchars =
202 str_undefined_try_making_data_uchars;
2b7d5d5 import vincent's work
Laurent Sansonetti authored
203 encoding->methods.length = str_undefined_length;
204 encoding->methods.bytesize = str_undefined_bytesize;
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
205 encoding->methods.get_character_boundaries =
206 str_undefined_get_character_boundaries;
207 encoding->methods.offset_in_bytes_to_index =
208 str_undefined_offset_in_bytes_to_index;
2b7d5d5 import vincent's work
Laurent Sansonetti authored
209
96ab900 more work
Laurent Sansonetti authored
210 switch (rb_encoding_type) {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
211 case ENCODING_TYPE_SPECIAL:
022cd7c fixed ByteString#encoding to always return US_ASCII (for now)
Laurent Sansonetti authored
212 break;
2b7d5d5 import vincent's work
Laurent Sansonetti authored
213 case ENCODING_TYPE_UCNV:
214 enc_init_ucnv_encoding(encoding);
215 break;
216 default:
217 abort();
9c1d230 committing experimental branch content
Laurent Sansonetti authored
218 }
022cd7c fixed ByteString#encoding to always return US_ASCII (for now)
Laurent Sansonetti authored
219
2b7d5d5 import vincent's work
Laurent Sansonetti authored
220 // create constants
221 define_encoding_constant(public_name, encoding);
222 for (unsigned int i = 0; i < aliases_count; ++i) {
223 define_encoding_constant(aliases[i], encoding);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
224 }
225 }
226
2b7d5d5 import vincent's work
Laurent Sansonetti authored
227 static void
228 create_encodings(void)
229 {
230 add_encoding(ENCODING_BINARY, ENCODING_TYPE_SPECIAL, "ASCII-8BIT", 1, true, true, "BINARY", NULL);
231 add_encoding(ENCODING_ASCII, ENCODING_TYPE_UCNV, "US-ASCII", 1, true, true, "ASCII", "ANSI_X3.4-1968", "646", NULL);
232 add_encoding(ENCODING_UTF8, ENCODING_TYPE_UCNV, "UTF-8", 1, false, true, "CP65001", NULL);
233 add_encoding(ENCODING_UTF16BE, ENCODING_TYPE_UCNV, "UTF-16BE", 2, false, false, NULL);
234 add_encoding(ENCODING_UTF16LE, ENCODING_TYPE_UCNV, "UTF-16LE", 2, false, false, NULL);
235 add_encoding(ENCODING_UTF32BE, ENCODING_TYPE_UCNV, "UTF-32BE", 4, false, false, "UCS-4BE", NULL);
236 add_encoding(ENCODING_UTF32LE, ENCODING_TYPE_UCNV, "UTF-32LE", 4, false, false, "UCS-4LE", NULL);
237 add_encoding(ENCODING_ISO8859_1, ENCODING_TYPE_UCNV, "ISO-8859-1", 1, true, true, "ISO8859-1", NULL);
238 add_encoding(ENCODING_MACROMAN, ENCODING_TYPE_UCNV, "macRoman", 1, true, true, NULL);
239 // FIXME: the ICU conversion tables do not seem to match Ruby's Japanese conversion tables
240 //add_encoding(ENCODING_EUCJP, ENCODING_TYPE_RUBY, "EUC-JP", 1, false, true, "eucJP", NULL);
241 //add_encoding(ENCODING_SJIS, ENCODING_TYPE_RUBY, "Shift_JIS", 1, false, true, "SJIS", NULL);
242 //add_encoding(ENCODING_CP932, ENCODING_TYPE_RUBY, "Windows-31J", 1, false, true, "CP932", "csWindows31J", NULL);
243
96ab900 more work
Laurent Sansonetti authored
244 default_external = rb_encodings[ENCODING_UTF8];
245 default_internal = rb_encodings[ENCODING_UTF8];
9c1d230 committing experimental branch content
Laurent Sansonetti authored
246 }
247
2b7d5d5 import vincent's work
Laurent Sansonetti authored
248 VALUE
249 mr_enc_s_is_compatible(VALUE klass, SEL sel, VALUE str1, VALUE str2);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
250
2b7d5d5 import vincent's work
Laurent Sansonetti authored
251 void
b881853 s/MR//
Laurent Sansonetti authored
252 Init_Encoding(void)
2b7d5d5 import vincent's work
Laurent Sansonetti authored
253 {
b881853 s/MR//
Laurent Sansonetti authored
254 rb_cEncoding = rb_define_class("Encoding", rb_cObject);
255 rb_undef_alloc_func(rb_cEncoding);
256
257 rb_objc_define_method(rb_cEncoding, "to_s", mr_enc_name, 0);
258 rb_objc_define_method(rb_cEncoding, "inspect", mr_enc_inspect, 0);
259 rb_objc_define_method(rb_cEncoding, "name", mr_enc_name, 0);
260 rb_objc_define_method(rb_cEncoding, "names", mr_enc_names, 0);
261 rb_objc_define_method(rb_cEncoding, "dummy?", mr_enc_dummy_p, 0);
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
262 rb_objc_define_method(rb_cEncoding, "ascii_compatible?",
263 mr_enc_ascii_compatible_p, 0);
264 rb_objc_define_method(CLASS_OF(rb_cEncoding), "list", mr_enc_s_list, 0);
265 rb_objc_define_method(CLASS_OF(rb_cEncoding), "name_list",
266 mr_enc_s_name_list, 0);
267 rb_objc_define_method(CLASS_OF(rb_cEncoding), "aliases",
268 mr_enc_s_aliases, 0);
b881853 s/MR//
Laurent Sansonetti authored
269 //rb_define_singleton_method(rb_cEncoding, "find", enc_find, 1);
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
270 // it's defined on Encoding, but it requires String's internals so it's
271 // defined with String
272 rb_objc_define_method(CLASS_OF(rb_cEncoding), "compatible?",
273 mr_enc_s_is_compatible, 2);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
274
275 //rb_define_method(rb_cEncoding, "_dump", enc_dump, -1);
276 //rb_define_singleton_method(rb_cEncoding, "_load", enc_load, 1);
277
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
278 rb_objc_define_method(CLASS_OF(rb_cEncoding), "default_external",
279 mr_enc_s_default_external, 0);
b881853 s/MR//
Laurent Sansonetti authored
280 //rb_define_singleton_method(rb_cEncoding, "default_external=", set_default_external, 1);
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored
281 rb_objc_define_method(CLASS_OF(rb_cEncoding), "default_internal",
282 mr_enc_s_default_internal, 0);
b881853 s/MR//
Laurent Sansonetti authored
283 //rb_define_singleton_method(rb_cEncoding, "default_internal=", set_default_internal, 1);
284 //rb_define_singleton_method(rb_cEncoding, "locale_charmap", rb_locale_charmap, 0);
2b7d5d5 import vincent's work
Laurent Sansonetti authored
285
286 create_encodings();
9c1d230 committing experimental branch content
Laurent Sansonetti authored
287 }
96ab900 more work
Laurent Sansonetti authored
288
289 // MRI C-API compatibility.
290
291 rb_encoding_t *
292 rb_enc_find(const char *name)
293 {
294 for (unsigned int i = 0; i < ENCODINGS_COUNT; i++) {
295 rb_encoding_t *enc = rb_encodings[i];
296 if (strcasecmp(enc->public_name, name) == 0) {
297 return enc;
298 }
299 for (unsigned int j = 0; j < enc->aliases_count; j++) {
300 const char *alias = enc->aliases[j];
301 if (strcasecmp(alias, name) == 0) {
302 return enc;
303 }
304 }
305 }
306 return NULL;
307 }
308
309 VALUE
310 rb_enc_from_encoding(rb_encoding_t *enc)
311 {
312 return (VALUE)enc;
313 }
314
315 rb_encoding_t *
316 rb_enc_get(VALUE obj)
317 {
318 if (IS_RSTR(obj)) {
319 return RSTR(obj)->encoding;
320 }
321 // TODO support symbols
322 return NULL;
323 }
324
325 rb_encoding_t *
326 rb_to_encoding(VALUE obj)
327 {
328 rb_encoding_t *enc;
329 if (CLASS_OF(obj) == rb_cEncoding) {
330 enc = RENC(obj);
331 }
332 else {
333 StringValue(obj);
334 enc = rb_enc_find(RSTRING_PTR(obj));
335 if (enc == NULL) {
336 rb_raise(rb_eArgError, "unknown encoding name - %s",
337 RSTRING_PTR(obj));
338 }
339 }
340 return enc;
341 }
342
343 const char *
344 rb_enc_name(rb_encoding_t *enc)
345 {
346 return RENC(enc)->public_name;
347 }
348
349 VALUE
350 rb_enc_name2(rb_encoding_t *enc)
351 {
352 return rb_usascii_str_new2(rb_enc_name(enc));
353 }
354
355 long
356 rb_enc_mbminlen(rb_encoding_t *enc)
357 {
358 return enc->min_char_size;
359 }
360
361 long
362 rb_enc_mbmaxlen(rb_encoding_t *enc)
363 {
364 return enc->single_byte_encoding ? 1 : 10; // XXX 10?
365 }
366
367 rb_encoding_t *
368 rb_locale_encoding(void)
369 {
370 // XXX
371 return rb_encodings[ENCODING_UTF8];
372 }
373
374 void
375 rb_enc_set_default_external(VALUE encoding)
376 {
377 assert(CLASS_OF(encoding) == rb_cEncoding);
378 default_external = RENC(encoding);
379 }
380
Something went wrong with that request. Please try again.