Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 271 lines (233 sloc) 9.623 kb
2b7d5d5 import vincent's work
Laurent Sansonetti authored
1 #include "encoding.h"
2 #include <string.h>
9c1d230 committing experimental branch content
Laurent Sansonetti authored
3
2b7d5d5 import vincent's work
Laurent Sansonetti authored
4 // TODO:
5 // - use rb_usascii_str_new_cstr instead of rb_str_new2
9c1d230 committing experimental branch content
Laurent Sansonetti authored
6
2b7d5d5 import vincent's work
Laurent Sansonetti authored
7 VALUE rb_cMREncoding = 0;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
8
2b7d5d5 import vincent's work
Laurent Sansonetti authored
9 #define ENC(x) ((encoding_t *)(x))
10 #define OBJC_CLASS(x) (*(VALUE *)(x))
8b9745b define Encoding::ASCII_8BIT as a shortcut to US_ASCII (for now)
Laurent Sansonetti authored
11
2b7d5d5 import vincent's work
Laurent Sansonetti authored
12 encoding_t *default_internal = NULL;
13 encoding_t *default_external = NULL;
14 encoding_t *encodings[ENCODINGS_COUNT];
9c1d230 committing experimental branch content
Laurent Sansonetti authored
15
2b7d5d5 import vincent's work
Laurent Sansonetti authored
16 static void str_undefined_update_flags(string_t *self) { abort(); }
17 static void str_undefined_make_data_binary(string_t *self) { abort(); }
18 static bool str_undefined_try_making_data_uchars(string_t *self) { abort(); }
19 static long str_undefined_length(string_t *self, bool ucs2_mode) { abort(); }
20 static long str_undefined_bytesize(string_t *self) { abort(); }
21 static character_boundaries_t str_undefined_get_character_boundaries(string_t *self, long index, bool ucs2_mode) { abort(); }
22 static long str_undefined_offset_in_bytes_to_index(string_t *self, long offset_in_bytes, bool ucs2_mode) { abort(); }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
23
24 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
25 mr_enc_s_list(VALUE klass, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
26 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
27 VALUE ary = rb_ary_new2(ENCODINGS_COUNT);
28 for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
29 rb_ary_push(ary, (VALUE)encodings[i]);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
30 }
2b7d5d5 import vincent's work
Laurent Sansonetti authored
31 return ary;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
32 }
33
34 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
35 mr_enc_s_name_list(VALUE klass, SEL sel)
36 {
37 VALUE ary = rb_ary_new();
38 for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
39 encoding_t *encoding = ENC(encodings[i]);
40 // TODO: use US-ASCII strings
41 rb_ary_push(ary, rb_str_new2(encoding->public_name));
42 for (unsigned int j = 0; j < encoding->aliases_count; ++j) {
43 rb_ary_push(ary, rb_str_new2(encoding->aliases[j]));
44 }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
45 }
46 return ary;
47 }
48
49 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
50 mr_enc_s_aliases(VALUE klass, SEL sel)
51 {
52 VALUE hash = rb_hash_new();
53 for (unsigned int i = 0; i < ENCODINGS_COUNT; ++i) {
54 encoding_t *encoding = ENC(encodings[i]);
55 for (unsigned int j = 0; j < encoding->aliases_count; ++j) {
56 rb_hash_aset(hash,
57 rb_str_new2(encoding->aliases[j]),
58 rb_str_new2(encoding->public_name));
59 }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
60 }
2b7d5d5 import vincent's work
Laurent Sansonetti authored
61 return hash;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
62 }
63
64 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
65 mr_enc_s_default_internal(VALUE klass, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
66 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
67 return (VALUE)default_internal;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
68 }
69
70 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
71 mr_enc_s_default_external(VALUE klass, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
72 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
73 return (VALUE)default_external;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
74 }
75
76 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
77 mr_enc_name(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
78 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
79 return rb_str_new2(ENC(self)->public_name);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
80 }
81
82 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
83 mr_enc_inspect(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
84 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
85 return rb_sprintf("#<%s:%s>", rb_obj_classname(self), ENC(self)->public_name);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
86 }
87
88 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
89 mr_enc_names(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
90 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
91 encoding_t *encoding = ENC(self);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
92
2b7d5d5 import vincent's work
Laurent Sansonetti authored
93 VALUE ary = rb_ary_new2(encoding->aliases_count + 1);
94 rb_ary_push(ary, rb_str_new2(encoding->public_name));
95 for (unsigned int i = 0; i < encoding->aliases_count; ++i) {
96 rb_ary_push(ary, rb_str_new2(encoding->aliases[i]));
97 }
98 return ary;
1623532 added Encoding#default_external= and Encoding#default_internal= which…
Laurent Sansonetti authored
99 }
100
2b7d5d5 import vincent's work
Laurent Sansonetti authored
101 static VALUE
102 mr_enc_ascii_compatible_p(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
103 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
104 return ENC(self)->ascii_compatible ? Qtrue : Qfalse;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
105 }
106
107 static VALUE
2b7d5d5 import vincent's work
Laurent Sansonetti authored
108 mr_enc_dummy_p(VALUE self, SEL sel)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
109 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
110 return Qfalse;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
111 }
112
2b7d5d5 import vincent's work
Laurent Sansonetti authored
113 static void
114 define_encoding_constant(const char *name, encoding_t *encoding)
9c1d230 committing experimental branch content
Laurent Sansonetti authored
115 {
2b7d5d5 import vincent's work
Laurent Sansonetti authored
116 char c = name[0];
117 if ((c >= '0') && (c <= '9')) {
118 // constants can't start with a number
119 return;
9c1d230 committing experimental branch content
Laurent Sansonetti authored
120 }
121
2b7d5d5 import vincent's work
Laurent Sansonetti authored
122 char *name_copy = strdup(name);
123 if ((c >= 'a') && (c <= 'z')) {
124 // the first character must be upper case
125 name_copy[0] = c - ('a' - 'A');
126 }
9c1d230 committing experimental branch content
Laurent Sansonetti authored
127
2b7d5d5 import vincent's work
Laurent Sansonetti authored
128 // '.' and '-' must be transformed into '_'
129 for (int i = 0; name_copy[i]; ++i) {
130 if ((name_copy[i] == '.') || (name_copy[i] == '-')) {
131 name_copy[i] = '_';
023dd4d fixed Encoding#name for 10.6
Laurent Sansonetti authored
132 }
133 }
2b7d5d5 import vincent's work
Laurent Sansonetti authored
134 rb_define_const(rb_cMREncoding, name_copy, (VALUE)encoding);
135 free(name_copy);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
136 }
137
2b7d5d5 import vincent's work
Laurent Sansonetti authored
138 extern void enc_init_ucnv_encoding(encoding_t *encoding);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
139
2b7d5d5 import vincent's work
Laurent Sansonetti authored
140 enum {
141 ENCODING_TYPE_SPECIAL = 0,
142 ENCODING_TYPE_UCNV
143 };
9c1d230 committing experimental branch content
Laurent Sansonetti authored
144
2b7d5d5 import vincent's work
Laurent Sansonetti authored
145 static void
146 add_encoding(
147 unsigned int encoding_index, // index of the encoding in the encodings array
148 unsigned int encoding_type,
149 const char *public_name, // public name for the encoding
150 unsigned char min_char_size,
151 bool single_byte_encoding, // in the encoding a character takes only one byte
152 bool ascii_compatible, // is the encoding ASCII compatible or not
153 ... // aliases for the encoding (should no include the public name) - must end with a NULL
154 )
155 {
156 assert(encoding_index < ENCODINGS_COUNT);
157
158 // create an array for the aliases
159 unsigned int aliases_count = 0;
160 va_list va_aliases;
161 va_start(va_aliases, ascii_compatible);
162 while (va_arg(va_aliases, const char *) != NULL) {
163 ++aliases_count;
164 }
165 va_end(va_aliases);
166 const char **aliases = (const char **) malloc(sizeof(const char *) * aliases_count);
167 va_start(va_aliases, ascii_compatible);
168 for (unsigned int i = 0; i < aliases_count; ++i) {
169 aliases[i] = va_arg(va_aliases, const char *);
170 }
171 va_end(va_aliases);
172
173 // create the MacRuby object
174 NEWOBJ(encoding, encoding_t);
175 encoding->basic.flags = 0;
176 encoding->basic.klass = rb_cMREncoding;
177 encodings[encoding_index] = encoding;
178 rb_objc_retain(encoding); // it should never be deallocated
179
180 // fill the fields
181 encoding->index = encoding_index;
182 encoding->public_name = public_name;
183 encoding->min_char_size = min_char_size;
184 encoding->single_byte_encoding = single_byte_encoding;
185 encoding->ascii_compatible = ascii_compatible;
186 encoding->aliases_count = aliases_count;
187 encoding->aliases = aliases;
188
189 // fill the default implementations with aborts
190 encoding->methods.update_flags = str_undefined_update_flags;
191 encoding->methods.make_data_binary = str_undefined_make_data_binary;
192 encoding->methods.try_making_data_uchars = str_undefined_try_making_data_uchars;
193 encoding->methods.length = str_undefined_length;
194 encoding->methods.bytesize = str_undefined_bytesize;
195 encoding->methods.get_character_boundaries = str_undefined_get_character_boundaries;
196 encoding->methods.offset_in_bytes_to_index = str_undefined_offset_in_bytes_to_index;
197
198 switch (encoding_type) {
199 case ENCODING_TYPE_SPECIAL:
022cd7c fixed ByteString#encoding to always return US_ASCII (for now)
Laurent Sansonetti authored
200 break;
2b7d5d5 import vincent's work
Laurent Sansonetti authored
201 case ENCODING_TYPE_UCNV:
202 enc_init_ucnv_encoding(encoding);
203 break;
204 default:
205 abort();
9c1d230 committing experimental branch content
Laurent Sansonetti authored
206 }
022cd7c fixed ByteString#encoding to always return US_ASCII (for now)
Laurent Sansonetti authored
207
2b7d5d5 import vincent's work
Laurent Sansonetti authored
208 // create constants
209 define_encoding_constant(public_name, encoding);
210 for (unsigned int i = 0; i < aliases_count; ++i) {
211 define_encoding_constant(aliases[i], encoding);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
212 }
213
2b7d5d5 import vincent's work
Laurent Sansonetti authored
214 free(aliases);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
215 }
216
2b7d5d5 import vincent's work
Laurent Sansonetti authored
217 static void
218 create_encodings(void)
219 {
220 add_encoding(ENCODING_BINARY, ENCODING_TYPE_SPECIAL, "ASCII-8BIT", 1, true, true, "BINARY", NULL);
221 add_encoding(ENCODING_ASCII, ENCODING_TYPE_UCNV, "US-ASCII", 1, true, true, "ASCII", "ANSI_X3.4-1968", "646", NULL);
222 add_encoding(ENCODING_UTF8, ENCODING_TYPE_UCNV, "UTF-8", 1, false, true, "CP65001", NULL);
223 add_encoding(ENCODING_UTF16BE, ENCODING_TYPE_UCNV, "UTF-16BE", 2, false, false, NULL);
224 add_encoding(ENCODING_UTF16LE, ENCODING_TYPE_UCNV, "UTF-16LE", 2, false, false, NULL);
225 add_encoding(ENCODING_UTF32BE, ENCODING_TYPE_UCNV, "UTF-32BE", 4, false, false, "UCS-4BE", NULL);
226 add_encoding(ENCODING_UTF32LE, ENCODING_TYPE_UCNV, "UTF-32LE", 4, false, false, "UCS-4LE", NULL);
227 add_encoding(ENCODING_ISO8859_1, ENCODING_TYPE_UCNV, "ISO-8859-1", 1, true, true, "ISO8859-1", NULL);
228 add_encoding(ENCODING_MACROMAN, ENCODING_TYPE_UCNV, "macRoman", 1, true, true, NULL);
229 // FIXME: the ICU conversion tables do not seem to match Ruby's Japanese conversion tables
230 //add_encoding(ENCODING_EUCJP, ENCODING_TYPE_RUBY, "EUC-JP", 1, false, true, "eucJP", NULL);
231 //add_encoding(ENCODING_SJIS, ENCODING_TYPE_RUBY, "Shift_JIS", 1, false, true, "SJIS", NULL);
232 //add_encoding(ENCODING_CP932, ENCODING_TYPE_RUBY, "Windows-31J", 1, false, true, "CP932", "csWindows31J", NULL);
233
234 default_external = encodings[ENCODING_UTF8];
235 default_internal = encodings[ENCODING_UTF16_NATIVE];
9c1d230 committing experimental branch content
Laurent Sansonetti authored
236 }
237
2b7d5d5 import vincent's work
Laurent Sansonetti authored
238 VALUE
239 mr_enc_s_is_compatible(VALUE klass, SEL sel, VALUE str1, VALUE str2);
9c1d230 committing experimental branch content
Laurent Sansonetti authored
240
2b7d5d5 import vincent's work
Laurent Sansonetti authored
241 void
242 Init_MREncoding(void)
243 {
244 rb_cMREncoding = rb_define_class("MREncoding", rb_cObject);
245 rb_undef_alloc_func(rb_cMREncoding);
246
247 rb_objc_define_method(rb_cMREncoding, "to_s", mr_enc_name, 0);
248 rb_objc_define_method(rb_cMREncoding, "inspect", mr_enc_inspect, 0);
249 rb_objc_define_method(rb_cMREncoding, "name", mr_enc_name, 0);
250 rb_objc_define_method(rb_cMREncoding, "names", mr_enc_names, 0);
251 rb_objc_define_method(rb_cMREncoding, "dummy?", mr_enc_dummy_p, 0);
252 rb_objc_define_method(rb_cMREncoding, "ascii_compatible?", mr_enc_ascii_compatible_p, 0);
253 rb_objc_define_method(OBJC_CLASS(rb_cMREncoding), "list", mr_enc_s_list, 0);
254 rb_objc_define_method(OBJC_CLASS(rb_cMREncoding), "name_list", mr_enc_s_name_list, 0);
255 rb_objc_define_method(OBJC_CLASS(rb_cMREncoding), "aliases", mr_enc_s_aliases, 0);
256 //rb_define_singleton_method(rb_cMREncoding, "find", enc_find, 1);
257 // it's defined on Encoding, but it requires String's internals so it's defined with String
258 rb_objc_define_method(OBJC_CLASS(rb_cMREncoding), "compatible?", mr_enc_s_is_compatible, 2);
259
260 //rb_define_method(rb_cEncoding, "_dump", enc_dump, -1);
261 //rb_define_singleton_method(rb_cEncoding, "_load", enc_load, 1);
262
263 rb_objc_define_method(OBJC_CLASS(rb_cMREncoding), "default_external", mr_enc_s_default_external, 0);
264 //rb_define_singleton_method(rb_cMREncoding, "default_external=", set_default_external, 1);
265 rb_objc_define_method(OBJC_CLASS(rb_cMREncoding), "default_internal", mr_enc_s_default_internal, 0);
266 //rb_define_singleton_method(rb_cMREncoding, "default_internal=", set_default_internal, 1);
267 //rb_define_singleton_method(rb_cMREncoding, "locale_charmap", rb_locale_charmap, 0);
268
269 create_encodings();
9c1d230 committing experimental branch content
Laurent Sansonetti authored
270 }
Something went wrong with that request. Please try again.