Skip to content
Newer
Older
100644 408 lines (350 sloc) 11 KB
7d7d3e8 @ferrous26 Change ownership to The MacRuby Team and update copyrights
ferrous26 authored Apr 22, 2012
1 /*
96ab900 more work
Laurent Sansonetti authored Feb 16, 2010
2 * MacRuby implementation of Ruby 1.9 String.
3 *
4 * This file is covered by the Ruby license. See COPYING for more details.
7d7d3e8 @ferrous26 Change ownership to The MacRuby Team and update copyrights
ferrous26 authored Apr 22, 2012
5 *
6 * Copyright (C) 2012, The MacRuby Team. All rights reserved.
9595725 update copyrights to 2011
Laurent Sansonetti authored Jan 15, 2011
7 * Copyright (C) 2007-2011, Apple Inc. All rights reserved.
96ab900 more work
Laurent Sansonetti authored Feb 16, 2010
8 * Copyright (C) 1993-2007 Yukihiro Matsumoto
9 * Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
10 * Copyright (C) 2000 Information-technology Promotion Agency, Japan
11 */
12
2b7d5d5 import vincent's work
Laurent Sansonetti authored Feb 16, 2010
13 #ifndef __ENCODING_H_
14 #define __ENCODING_H_
15
16 #if defined(__cplusplus)
17 extern "C" {
18 #endif
19
9dc1afe bye bye oniguruma, started ICU regexps
Laurent Sansonetti authored Feb 18, 2010
20 #if defined(__cplusplus)
21 # include "unicode/unistr.h"
22 #else
23 # include "unicode/ustring.h"
24 #endif
2b7d5d5 import vincent's work
Laurent Sansonetti authored Feb 16, 2010
25
26 #if __LITTLE_ENDIAN__
27 #define ENCODING_UTF16_NATIVE ENCODING_UTF16LE
28 #define ENCODING_UTF32_NATIVE ENCODING_UTF32LE
29 #define ENCODING_UTF16_NON_NATIVE ENCODING_UTF16BE
30 #define ENCODING_UTF32_NON_NATIVE ENCODING_UTF32BE
31 #else
32 #define ENCODING_UTF16_NATIVE ENCODING_UTF16BE
33 #define ENCODING_UTF32_NATIVE ENCODING_UTF32BE
34 #define ENCODING_UTF16_NON_NATIVE ENCODING_UTF16LE
35 #define ENCODING_UTF32_NON_NATIVE ENCODING_UTF32LE
36 #endif
37
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored Dec 19, 2010
38 #define IS_NATIVE_UTF16_ENC(encoding) \
96ab900 more work
Laurent Sansonetti authored Feb 16, 2010
39 ((encoding) == rb_encodings[ENCODING_UTF16_NATIVE])
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored Dec 19, 2010
40 #define IS_NON_NATIVE_UTF16_ENC(encoding) \
96ab900 more work
Laurent Sansonetti authored Feb 16, 2010
41 ((encoding) == rb_encodings[ENCODING_UTF16_NON_NATIVE])
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored Dec 19, 2010
42 #define IS_UTF16_ENC(encoding) \
43 (IS_NATIVE_UTF16_ENC(encoding) || IS_NON_NATIVE_UTF16_ENC(encoding))
44 #define IS_NATIVE_UTF32_ENC(encoding) \
96ab900 more work
Laurent Sansonetti authored Feb 16, 2010
45 ((encoding) == rb_encodings[ENCODING_UTF32_NATIVE])
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored Dec 19, 2010
46 #define IS_NON_NATIVE_UTF32_ENC(encoding) \
96ab900 more work
Laurent Sansonetti authored Feb 16, 2010
47 ((encoding) == rb_encodings[ENCODING_UTF32_NON_NATIVE])
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored Dec 19, 2010
48 #define IS_UTF32_ENC(encoding) \
49 (IS_NATIVE_UTF32_ENC(encoding) || IS_NON_NATIVE_UTF32_ENC(encoding))
50 #define IS_UTF8_ENC(encoding) ((encoding) == rb_encodings[ENCODING_UTF8])
51 #define IS_ASCII_ENC(encoding) ((encoding) == rb_encodings[ENCODING_ASCII])
52 #define IS_BINARY_ENC(encoding) ((encoding) == rb_encodings[ENCODING_BINARY])
2b7d5d5 import vincent's work
Laurent Sansonetti authored Feb 16, 2010
53
54 typedef uint8_t str_flag_t;
55
548a941 @Watson1978 fix the struct declarations
Watson1978 authored Jun 20, 2012
56 typedef struct RString {
2b7d5d5 import vincent's work
Laurent Sansonetti authored Feb 16, 2010
57 struct RBasic basic;
96ab900 more work
Laurent Sansonetti authored Feb 16, 2010
58 struct rb_encoding *encoding;
2b7d5d5 import vincent's work
Laurent Sansonetti authored Feb 16, 2010
59 long capacity_in_bytes;
60 long length_in_bytes;
67e47b9 @Watson1978 improve the performance of String#length with multi-byte character st…
Watson1978 authored Jun 7, 2012
61 long cached_length;
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored Dec 19, 2010
62 char *bytes;
2b7d5d5 import vincent's work
Laurent Sansonetti authored Feb 16, 2010
63 str_flag_t flags;
96ab900 more work
Laurent Sansonetti authored Feb 16, 2010
64 } rb_str_t;
2b7d5d5 import vincent's work
Laurent Sansonetti authored Feb 16, 2010
65
96ab900 more work
Laurent Sansonetti authored Feb 16, 2010
66 #define RSTR(x) ((rb_str_t *)x)
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored Feb 16, 2010
67
68 static inline bool
69 rb_klass_is_rstr(VALUE klass)
70 {
71 do {
72 if (klass == rb_cRubyString) {
73 return true;
74 }
75 if (klass == rb_cNSString) {
76 return false;
77 }
78 klass = RCLASS_SUPER(klass);
79 }
80 while (klass != 0);
81 return false;
82 }
83
84 #define IS_RSTR(x) (rb_klass_is_rstr(*(VALUE *)x))
85
39b55f1 some work on string
Laurent Sansonetti authored Feb 20, 2010
86 static inline void
87 rstr_modify(VALUE str)
88 {
89 const long mask = RBASIC(str)->flags;
90 if ((mask & FL_FREEZE) == FL_FREEZE) {
91 rb_raise(rb_eRuntimeError, "can't modify frozen/immutable string");
92 }
f39c792 r{ary,str,hash}_modify: check for the unstrust flag properly
Laurent Sansonetti authored Jan 28, 2011
93 if ((mask & FL_UNTRUSTED) != FL_UNTRUSTED) {
a58377f faster r{ary,hash,str}_modify() functions
Laurent Sansonetti authored May 26, 2010
94 if (rb_safe_level() >= 4) {
95 rb_raise(rb_eSecurityError, "Insecure: can't modify string");
96 }
39b55f1 some work on string
Laurent Sansonetti authored Feb 20, 2010
97 }
98 }
99
4a8ff28 more string work
Laurent Sansonetti authored Feb 23, 2010
100 static inline void
101 rstr_frozen_check(VALUE str)
102 {
103 const long mask = RBASIC(str)->flags;
104 if ((mask & FL_FREEZE) == FL_FREEZE) {
105 rb_raise(rb_eRuntimeError, "string frozen");
106 }
107 }
108
2b7d5d5 import vincent's work
Laurent Sansonetti authored Feb 16, 2010
109 typedef struct {
110 long start_offset_in_bytes;
111 long end_offset_in_bytes;
112 } character_boundaries_t;
113
254d7f6 @vincentisambart String#scan should now be much faster for non-ASCII strings
vincentisambart authored Jan 7, 2011
114 typedef struct {
115 character_boundaries_t cached_boundaries;
116 long cached_boundaries_index;
117 long cached_length;
118 } character_boundaries_cache_t;
119
96ab900 more work
Laurent Sansonetti authored Feb 16, 2010
120 typedef struct rb_encoding {
2b7d5d5 import vincent's work
Laurent Sansonetti authored Feb 16, 2010
121 struct RBasic basic;
122 unsigned int index;
123 const char *public_name;
124 const char **aliases;
125 unsigned int aliases_count;
126 unsigned char min_char_size;
127 bool single_byte_encoding : 1;
128 bool ascii_compatible : 1;
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored Dec 19, 2010
129 bool little_endian : 1; // only meaningful for UTF-16 or UTF-32
2b7d5d5 import vincent's work
Laurent Sansonetti authored Feb 16, 2010
130 void *private_data;
96ab900 more work
Laurent Sansonetti authored Feb 16, 2010
131 } rb_encoding_t;
132
133 #define RENC(x) ((rb_encoding_t *)(x))
2b7d5d5 import vincent's work
Laurent Sansonetti authored Feb 16, 2010
134
135 enum {
136 ENCODING_BINARY = 0,
137 ENCODING_ASCII,
138 ENCODING_UTF8,
139 ENCODING_UTF16BE,
140 ENCODING_UTF16LE,
141 ENCODING_UTF32BE,
142 ENCODING_UTF32LE,
143 ENCODING_ISO8859_1,
311371a @vincentisambart added all the ISO-8859 encodings left as some of them are used in rub…
vincentisambart authored Dec 22, 2010
144 ENCODING_ISO8859_2,
145 ENCODING_ISO8859_3,
146 ENCODING_ISO8859_4,
147 ENCODING_ISO8859_5,
148 ENCODING_ISO8859_6,
149 ENCODING_ISO8859_7,
150 ENCODING_ISO8859_8,
151 ENCODING_ISO8859_9,
152 ENCODING_ISO8859_10,
153 ENCODING_ISO8859_11,
154 ENCODING_ISO8859_13,
155 ENCODING_ISO8859_14,
156 ENCODING_ISO8859_15,
157 ENCODING_ISO8859_16,
2b7d5d5 import vincent's work
Laurent Sansonetti authored Feb 16, 2010
158 ENCODING_MACROMAN,
0f75583 @vincentisambart added the encodings used in the specs to be able to remove tags
vincentisambart authored May 9, 2010
159 ENCODING_MACCYRILLIC,
160 ENCODING_BIG5,
e23dfce @vincentisambart removed unused files, fixed bugs, added the crappy ICU EUC-JP support
vincentisambart authored May 9, 2010
161 ENCODING_EUCJP,
ffe45d2 Add support for Encoding::Converter and move String#encode and String…
Patrick Thomson authored Jun 1, 2010
162 ENCODING_SJIS,
2b7d5d5 import vincent's work
Laurent Sansonetti authored Feb 16, 2010
163 //ENCODING_CP932,
164
165 ENCODINGS_COUNT
166 };
167
96ab900 more work
Laurent Sansonetti authored Feb 16, 2010
168 extern rb_encoding_t *rb_encodings[ENCODINGS_COUNT];
2b7d5d5 import vincent's work
Laurent Sansonetti authored Feb 16, 2010
169
170 #define STRING_ASCII_ONLY_SET 0x010
171 #define STRING_ASCII_ONLY 0x008
172 #define STRING_VALID_ENCODING_SET 0x004
173 #define STRING_VALID_ENCODING 0x002
174
175 #define BYTES_TO_UCHARS(len) ((len) / sizeof(UChar))
176 #define UCHARS_TO_BYTES(len) ((len) * sizeof(UChar))
177
178 #define ODD_NUMBER(x) ((x) & 0x1)
179
254d7f6 @vincentisambart String#scan should now be much faster for non-ASCII strings
vincentisambart authored Jan 7, 2011
180 static inline void
181 reset_character_boundaries_cache(character_boundaries_cache_t *cache)
182 {
183 assert(cache != NULL);
184 cache->cached_boundaries_index = -1;
185 cache->cached_length = -1;
186 }
187
2b7d5d5 import vincent's work
Laurent Sansonetti authored Feb 16, 2010
188 static inline long
189 div_round_up(long a, long b)
190 {
191 return ((a) + (b - 1)) / b;
192 }
193
96ab900 more work
Laurent Sansonetti authored Feb 16, 2010
194 void str_update_flags(rb_str_t *self);
2b7d5d5 import vincent's work
Laurent Sansonetti authored Feb 16, 2010
195
196 static inline void
67e47b9 @Watson1978 improve the performance of String#length with multi-byte character st…
Watson1978 authored Jun 7, 2012
197 str_reset_cache(rb_str_t *self)
198 {
199 self->cached_length = 0;
200 }
201
202 static inline void
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored Dec 19, 2010
203 str_reset_flags(rb_str_t *self)
2b7d5d5 import vincent's work
Laurent Sansonetti authored Feb 16, 2010
204 {
67e47b9 @Watson1978 improve the performance of String#length with multi-byte character st…
Watson1978 authored Jun 7, 2012
205 str_reset_cache(self);
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored Dec 19, 2010
206 self->flags = 0;
2b7d5d5 import vincent's work
Laurent Sansonetti authored Feb 16, 2010
207 }
208
209 static inline bool
96ab900 more work
Laurent Sansonetti authored Feb 16, 2010
210 str_check_flag_and_update_if_needed(rb_str_t *self, str_flag_t flag_set,
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored Feb 16, 2010
211 str_flag_t flag)
2b7d5d5 import vincent's work
Laurent Sansonetti authored Feb 16, 2010
212 {
213 if (!(self->flags & flag_set)) {
214 str_update_flags(self);
215 assert(self->flags & flag_set);
216 }
217 return self->flags & flag;
218 }
219
220 static inline bool
96ab900 more work
Laurent Sansonetti authored Feb 16, 2010
221 str_is_valid_encoding(rb_str_t *self)
2b7d5d5 import vincent's work
Laurent Sansonetti authored Feb 16, 2010
222 {
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored Feb 16, 2010
223 return str_check_flag_and_update_if_needed(self, STRING_VALID_ENCODING_SET,
224 STRING_VALID_ENCODING);
2b7d5d5 import vincent's work
Laurent Sansonetti authored Feb 16, 2010
225 }
226
227 static inline bool
96ab900 more work
Laurent Sansonetti authored Feb 16, 2010
228 str_is_ascii_only(rb_str_t *self)
2b7d5d5 import vincent's work
Laurent Sansonetti authored Feb 16, 2010
229 {
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored Feb 16, 2010
230 return str_check_flag_and_update_if_needed(self, STRING_ASCII_ONLY_SET,
231 STRING_ASCII_ONLY);
2b7d5d5 import vincent's work
Laurent Sansonetti authored Feb 16, 2010
232 }
233
234 static inline bool
96ab900 more work
Laurent Sansonetti authored Feb 16, 2010
235 str_is_ruby_ascii_only(rb_str_t *self)
2b7d5d5 import vincent's work
Laurent Sansonetti authored Feb 16, 2010
236 {
237 // for MRI, a string in a non-ASCII-compatible encoding (like UTF-16)
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored Feb 16, 2010
238 // containing only ASCII characters is not "ASCII only" though for us it
239 // is internally
2b7d5d5 import vincent's work
Laurent Sansonetti authored Feb 16, 2010
240 if (!self->encoding->ascii_compatible) {
241 return false;
242 }
243 return str_is_ascii_only(self);
244 }
245
246 static inline void
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored Dec 19, 2010
247 str_set_flag(rb_str_t *self, bool status, str_flag_t flag_set,
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored Feb 16, 2010
248 str_flag_t flag)
2b7d5d5 import vincent's work
Laurent Sansonetti authored Feb 16, 2010
249 {
250 if (status) {
251 self->flags = self->flags | flag_set | flag;
252 }
253 else {
254 self->flags = (self->flags | flag_set) & ~flag;
255 }
256 }
257
258 static inline void
96ab900 more work
Laurent Sansonetti authored Feb 16, 2010
259 str_set_ascii_only(rb_str_t *self, bool status)
2b7d5d5 import vincent's work
Laurent Sansonetti authored Feb 16, 2010
260 {
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored Dec 19, 2010
261 str_set_flag(self, status, STRING_ASCII_ONLY_SET, STRING_ASCII_ONLY);
2b7d5d5 import vincent's work
Laurent Sansonetti authored Feb 16, 2010
262 }
263
264 static inline void
96ab900 more work
Laurent Sansonetti authored Feb 16, 2010
265 str_set_valid_encoding(rb_str_t *self, bool status)
2b7d5d5 import vincent's work
Laurent Sansonetti authored Feb 16, 2010
266 {
e22f26d @vincentisambart changed the internal representation of strings
vincentisambart authored Dec 19, 2010
267 str_set_flag(self, status, STRING_VALID_ENCODING_SET,
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored Feb 16, 2010
268 STRING_VALID_ENCODING);
2b7d5d5 import vincent's work
Laurent Sansonetti authored Feb 16, 2010
269 }
270
ffe45d2 Add support for Encoding::Converter and move String#encode and String…
Patrick Thomson authored Jun 1, 2010
271 typedef enum {
272 TRANSCODE_BEHAVIOR_RAISE_EXCEPTION,
273 TRANSCODE_BEHAVIOR_REPLACE_WITH_STRING,
274 TRANSCODE_BEHAVIOR_REPLACE_WITH_XML_TEXT,
275 TRANSCODE_BEHAVIOR_REPLACE_WITH_XML_ATTR
276 } transcode_behavior_t;
277
278 typedef enum {
279 ECONV_INVALID_MASK = 1,
280 ECONV_INVALID_REPLACE = 1 << 1,
281 ECONV_UNDEF_MASK = 1 << 2,
282 ECONV_UNDEF_REPLACE = 1 << 3,
283 ECONV_UNDEF_HEX_CHARREF = 1 << 4,
284 ECONV_PARTIAL_INPUT = 1 << 5,
285 ECONV_AFTER_OUTPUT = 1 << 6,
286 ECONV_UNIVERSAL_NEWLINE_DECORATOR = 1 << 7,
287 ECONV_CRLF_NEWLINE_DECORATOR = 1 << 8,
288 ECONV_CR_NEWLINE_DECORATOR = 1 << 9,
289 ECONV_XML_TEXT_DECORATOR = 1 << 10,
290 ECONV_XML_ATTR_CONTENT_DECORATOR = 1 << 11,
291 ECONV_XML_ATTR_QUOTE_DECORATOR = 1 << 12
292 } transcode_flags_t;
293
294 rb_str_t *str_transcode(rb_str_t *self, rb_encoding_t *src_encoding, rb_encoding_t *dst_encoding,
295 int behavior_for_invalid, int behavior_for_undefined, rb_str_t *replacement_str);
296
297 static inline rb_str_t *
298 str_simple_transcode(rb_str_t *self, rb_encoding_t *dst_encoding)
299 {
300 return str_transcode(self, self->encoding, dst_encoding,
301 TRANSCODE_BEHAVIOR_RAISE_EXCEPTION, TRANSCODE_BEHAVIOR_RAISE_EXCEPTION, NULL);
302 }
303
254d7f6 @vincentisambart String#scan should now be much faster for non-ASCII strings
vincentisambart authored Jan 7, 2011
304 VALUE rb_str_substr_with_cache(VALUE str, long beg, long len,
305 character_boundaries_cache_t *cache);
306 VALUE rb_reg_nth_match_with_cache(int nth, VALUE match,
307 character_boundaries_cache_t *cache);
308
a4c25b8 improved rb_eql() for performance, makes faster hash lookup / keys co…
Laurent Sansonetti authored Dec 17, 2010
309 int rstr_compare(rb_str_t *str1, rb_str_t *str2);
ffe45d2 Add support for Encoding::Converter and move String#encode and String…
Patrick Thomson authored Jun 1, 2010
310
a8bd277 Make Symbol NSCoding compliant
Thibault Martin-Lagardette authored Mar 25, 2010
311 void rb_str_NSCoder_encode(void *coder, VALUE str, const char *key);
312 VALUE rb_str_NSCoder_decode(void *coder, const char *key);
313
4ba7de8 better string/symbol interaction
Laurent Sansonetti authored Feb 26, 2010
314 VALUE mr_enc_s_is_compatible(VALUE klass, SEL sel, VALUE str1, VALUE str2);
315 VALUE rb_str_intern_fast(VALUE str);
d13c044 added Symbol#[]
Laurent Sansonetti authored Mar 12, 2010
316 VALUE rstr_aref(VALUE str, SEL sel, int argc, VALUE *argv);
d77b7be ditto
Laurent Sansonetti authored Mar 20, 2010
317 VALUE rstr_swapcase(VALUE str, SEL sel);
318 VALUE rstr_capitalize(VALUE str, SEL sel);
319 VALUE rstr_upcase(VALUE str, SEL sel);
320 VALUE rstr_downcase(VALUE str, SEL sel);
57e1af9 faster #<<, #[] and #[]=
Laurent Sansonetti authored May 14, 2010
321 VALUE rstr_concat(VALUE self, SEL sel, VALUE other);
19f18d3 a new Symbol class, unicode-aware + refactored/cleaned symbol generation
Laurent Sansonetti authored Feb 25, 2010
322
d4e1f7a more string work
Laurent Sansonetti authored Feb 23, 2010
323 // The following functions should always been prefered over anything else,
324 // especially if this "else" is RSTRING_PTR and RSTRING_LEN.
325 // They also work on CFStrings.
d1673a2 introduce a better unichar API, which should be as fast as before the…
Laurent Sansonetti authored Jan 5, 2011
326
9dc1afe bye bye oniguruma, started ICU regexps
Laurent Sansonetti authored Feb 18, 2010
327 VALUE rb_unicode_str_new(const UniChar *ptr, const size_t len);
d1673a2 introduce a better unichar API, which should be as fast as before the…
Laurent Sansonetti authored Jan 5, 2011
328
12bd89d let's use 64 unichars like CF
Laurent Sansonetti authored Jan 5, 2011
329 #define STR_UCHARS_STATIC_BUFSIZE 64
d1673a2 introduce a better unichar API, which should be as fast as before the…
Laurent Sansonetti authored Jan 5, 2011
330
331 typedef struct {
332 UChar static_buf[STR_UCHARS_STATIC_BUFSIZE];
333 UChar *chars;
334 long len;
335 } rb_str_uchars_buf_t;
336
337 void rb_str_get_uchars_always(VALUE str, rb_str_uchars_buf_t *buf);
338
339 static inline void
340 rb_str_get_uchars(VALUE str, rb_str_uchars_buf_t *buf)
341 {
342 if (IS_RSTR(str)) {
343 rb_str_t *rstr = RSTR(str);
344 if (rstr->encoding->ascii_compatible && str_is_ascii_only(rstr)
345 && rstr->length_in_bytes < STR_UCHARS_STATIC_BUFSIZE) {
346 // Fast path.
1893ab0 no c99 in headers used by C extensions
Laurent Sansonetti authored Jan 5, 2011
347 long i;
348 for (i = 0; i < rstr->length_in_bytes; i++) {
d1673a2 introduce a better unichar API, which should be as fast as before the…
Laurent Sansonetti authored Jan 5, 2011
349 buf->static_buf[i] = rstr->bytes[i];
350 }
351 buf->chars = buf->static_buf;
352 buf->len = rstr->length_in_bytes;
353 return;
354 }
355 }
356 rb_str_get_uchars_always(str, buf);
357 }
358
359 UChar *rb_str_xcopy_uchars(VALUE str, long *len_p);
360
361 #define RB_STR_GET_UCHARS(str, _chars, _len) \
362 rb_str_uchars_buf_t __buf; \
363 rb_str_get_uchars(str, &__buf); \
364 UChar *_chars = __buf.chars; \
365 long _len = __buf.len
366
6707044 some work on regexp matching
Laurent Sansonetti authored Feb 19, 2010
367 long rb_str_chars_len(VALUE str);
d4e1f7a more string work
Laurent Sansonetti authored Feb 23, 2010
368 UChar rb_str_get_uchar(VALUE str, long pos);
17a100f more string work...
Laurent Sansonetti authored Feb 24, 2010
369 void rb_str_append_uchar(VALUE str, UChar c);
02d14a8 misc bugfix
Laurent Sansonetti authored Mar 10, 2010
370 void rb_str_append_uchars(VALUE str, const UChar *chars, long len);
f045214 regexps are now AOT compilable
Laurent Sansonetti authored Feb 25, 2010
371 unsigned long rb_str_hash_uchars(const UChar *chars, long chars_len);
4cecb13 unicode string formats (a work in progress)
Laurent Sansonetti authored Mar 2, 2010
372 long rb_uchar_strtol(UniChar *chars, long chars_len, long pos,
373 long *end_offset);
64b9824 IO and Marshal now returning UTF8 strings
Laurent Sansonetti authored Mar 17, 2010
374 void rb_str_force_encoding(VALUE str, rb_encoding_t *encoding);
ffe45d2 Add support for Encoding::Converter and move String#encode and String…
Patrick Thomson authored Jun 1, 2010
375 rb_str_t *str_need_string(VALUE str);
3e200e8 make NSString#dup return a RubyString
Laurent Sansonetti authored Jun 6, 2011
376 rb_str_t *str_new_from_cfstring(CFStringRef source);
ffe45d2 Add support for Encoding::Converter and move String#encode and String…
Patrick Thomson authored Jun 1, 2010
377 rb_str_t *replacement_string_for_encoding(rb_encoding_t* enc);
378 void str_replace_with_string(rb_str_t *self, rb_str_t *source);
379
51c47d9 when trying to convert a string into a numeric type, raise an excepti…
Laurent Sansonetti authored Aug 11, 2010
380 static inline void
381 str_check_ascii_compatible(VALUE str)
382 {
383 if (IS_RSTR(str) && !RSTR(str)->encoding->ascii_compatible) {
384 rb_raise(rb_eEncCompatError, "ASCII incompatible encoding: %s",
385 RSTR(str)->encoding->public_name);
386 }
387 }
9dc1afe bye bye oniguruma, started ICU regexps
Laurent Sansonetti authored Feb 18, 2010
388
f03ba94 String() and Array() will respectively convert NSStrings and NSArrays…
Laurent Sansonetti authored May 27, 2011
389 VALUE rb_f_string(VALUE, SEL, VALUE);
5640f80 Dir.glob: convert paths to unicode normalization form C
Laurent Sansonetti authored Dec 7, 2010
390 VALUE rstr_new_path(const char *path);
391
5819dc0 better RSTRING_{PTR,LEN] for pure NSStrings
Laurent Sansonetti authored Feb 9, 2011
392 const char *nsstr_cstr(VALUE str);
393 long nsstr_clen(VALUE str);
394
d178850 add NSString#encode which returns a properly encoded RubyString, and …
Laurent Sansonetti authored Jun 6, 2011
395 static inline id
396 rstr_only(id rcv, SEL sel)
397 {
398 rb_raise(rb_eArgError, "method `%s' does not work on NSStrings",
399 sel_getName(sel));
400 return rcv; // never reached
401 }
402
2b7d5d5 import vincent's work
Laurent Sansonetti authored Feb 16, 2010
403 #if defined(__cplusplus)
0382b34 indented code, better type checking, removed rb_cCFString, started ad…
Laurent Sansonetti authored Feb 16, 2010
404 } // extern "C"
2b7d5d5 import vincent's work
Laurent Sansonetti authored Feb 16, 2010
405 #endif
406
407 #endif /* __ENCODING_H_ */
Something went wrong with that request. Please try again.