Permalink
Browse files

import vincent's work

git-svn-id: http://svn.macosforge.org/repository/ruby/MacRuby/branches/icu@3534 23306eb0-4c56-4727-a40e-e92c0eb68959
  • Loading branch information...
1 parent 40d5fd3 commit 2b7d5d54c9736ff5447f5b0e2f0c60ff55805e72 @lrz lrz committed Feb 16, 2010
Showing with 39,388 additions and 6,075 deletions.
  1. +201 −533 encoding.c
  2. +230 −0 encoding.h
  3. +1 −1 rakelib/builder/builder.rb
  4. +2 −1 rakelib/builder/options.rb
  5. +1,004 −5,540 string.c
  6. +410 −0 ucnv.c
  7. +557 −0 unicode/brkiter.h
  8. +201 −0 unicode/caniter.h
  9. +716 −0 unicode/chariter.h
  10. +41 −0 unicode/dbbi.h
  11. +202 −0 unicode/docmain.h
  12. +765 −0 unicode/locid.h
  13. +823 −0 unicode/normlzr.h
  14. +92 −0 unicode/parseerr.h
  15. +230 −0 unicode/parsepos.h
  16. +290 −0 unicode/platform.h
  17. +273 −0 unicode/ppalmos.h
  18. +184 −0 unicode/putil.h
  19. +298 −0 unicode/pwin32.h
  20. +700 −0 unicode/rbbi.h
  21. +259 −0 unicode/rep.h
  22. +485 −0 unicode/resbund.h
  23. +187 −0 unicode/schriter.h
  24. +271 −0 unicode/strenum.h
  25. +112 −0 unicode/symtable.h
  26. +1,917 −0 unicode/ubidi.h
  27. +478 −0 unicode/ubrk.h
  28. +180 −0 unicode/ucasemap.h
  29. +158 −0 unicode/ucat.h
  30. +3,026 −0 unicode/uchar.h
  31. +381 −0 unicode/uchriter.h
  32. +267 −0 unicode/uclean.h
  33. +1,938 −0 unicode/ucnv.h
  34. +162 −0 unicode/ucnv_cb.h
  35. +456 −0 unicode/ucnv_err.h
  36. +215 −0 unicode/uconfig.h
  37. +389 −0 unicode/udata.h
  38. +50 −0 unicode/udeprctd.h
  39. +262 −0 unicode/udraft.h
  40. +134 −0 unicode/uenum.h
  41. +308 −0 unicode/uidna.h
  42. +68 −0 unicode/uintrnal.h
  43. +707 −0 unicode/uiter.h
  44. +931 −0 unicode/uloc.h
  45. +381 −0 unicode/umachine.h
  46. +60 −0 unicode/umisc.h
  47. +127 −0 unicode/unifilt.h
  48. +125 −0 unicode/unifunct.h
  49. +163 −0 unicode/unimatch.h
  50. +1,337 −0 unicode/uniset.h
  51. +4,120 −0 unicode/unistr.h
  52. +575 −0 unicode/unorm.h
  53. +310 −0 unicode/uobject.h
  54. +32 −0 unicode/uobslete.h
  55. +106 −0 unicode/urbtok.h
  56. +1,605 −0 unicode/urename.h
  57. +155 −0 unicode/urep.h
  58. +871 −0 unicode/ures.h
  59. +224 −0 unicode/uscript.h
  60. +782 −0 unicode/uset.h
  61. +318 −0 unicode/usetiter.h
  62. +234 −0 unicode/ushape.h
  63. +156 −0 unicode/usprep.h
  64. +1,474 −0 unicode/ustring.h
  65. +46 −0 unicode/usystem.h
  66. +1,569 −0 unicode/utext.h
  67. +227 −0 unicode/utf.h
  68. +605 −0 unicode/utf16.h
  69. +23 −0 unicode/utf32.h
  70. +652 −0 unicode/utf8.h
  71. +1,171 −0 unicode/utf_old.h
  72. +352 −0 unicode/utrace.h
  73. +786 −0 unicode/utypes.h
  74. +241 −0 unicode/uversion.h
View

Large diffs are not rendered by default.

Oops, something went wrong.
View
@@ -0,0 +1,230 @@
+#ifndef __ENCODING_H_
+#define __ENCODING_H_
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include "ruby.h"
+#include <stdbool.h>
+#include "unicode/ustring.h"
+
+#if __LITTLE_ENDIAN__
+#define ENCODING_UTF16_NATIVE ENCODING_UTF16LE
+#define ENCODING_UTF32_NATIVE ENCODING_UTF32LE
+#define ENCODING_UTF16_NON_NATIVE ENCODING_UTF16BE
+#define ENCODING_UTF32_NON_NATIVE ENCODING_UTF32BE
+#else
+#define ENCODING_UTF16_NATIVE ENCODING_UTF16BE
+#define ENCODING_UTF32_NATIVE ENCODING_UTF32BE
+#define ENCODING_UTF16_NON_NATIVE ENCODING_UTF16LE
+#define ENCODING_UTF32_NON_NATIVE ENCODING_UTF32LE
+#endif
+
+#define NATIVE_UTF16_ENC(encoding) ((encoding) == encodings[ENCODING_UTF16_NATIVE])
+#define NON_NATIVE_UTF16_ENC(encoding) ((encoding) == encodings[ENCODING_UTF16_NON_NATIVE])
+#define UTF16_ENC(encoding) (NATIVE_UTF16_ENC(encoding) || NON_NATIVE_UTF16_ENC(encoding))
+#define NATIVE_UTF32_ENC(encoding) ((encoding) == encodings[ENCODING_UTF32_NATIVE])
+#define NON_NATIVE_UTF32_ENC(encoding) ((encoding) == encodings[ENCODING_UTF32_NON_NATIVE])
+#define UTF32_ENC(encoding) (NATIVE_UTF32_ENC(encoding) || NON_NATIVE_UTF32_ENC(encoding))
+#define BINARY_ENC(encoding) ((encoding) == encodings[ENCODING_BINARY])
+
+typedef uint8_t str_flag_t;
+
+typedef struct {
+ struct RBasic basic;
+ struct encoding_s *encoding;
+ long capacity_in_bytes;
+ long length_in_bytes;
+ union {
+ char *bytes;
+ UChar *uchars;
+ } data;
+ str_flag_t flags;
+} string_t;
+
+typedef struct {
+ long start_offset_in_bytes;
+ long end_offset_in_bytes;
+} character_boundaries_t;
+
+typedef struct {
+ void (*update_flags)(string_t *);
+ void (*make_data_binary)(string_t *);
+ bool (*try_making_data_uchars)(string_t *);
+ long (*length)(string_t *, bool);
+ long (*bytesize)(string_t *);
+ character_boundaries_t (*get_character_boundaries)(string_t *, long, bool);
+ long (*offset_in_bytes_to_index)(string_t *, long, bool);
+} encoding_methods_t;
+
+typedef struct encoding_s {
+ struct RBasic basic;
+ unsigned int index;
+ const char *public_name;
+ const char **aliases;
+ unsigned int aliases_count;
+ unsigned char min_char_size;
+ bool single_byte_encoding : 1;
+ bool ascii_compatible : 1;
+ encoding_methods_t methods;
+ void *private_data;
+} encoding_t;
+
+enum {
+ ENCODING_BINARY = 0,
+ ENCODING_ASCII,
+ ENCODING_UTF8,
+ ENCODING_UTF16BE,
+ ENCODING_UTF16LE,
+ ENCODING_UTF32BE,
+ ENCODING_UTF32LE,
+ ENCODING_ISO8859_1,
+ ENCODING_MACROMAN,
+ //ENCODING_EUCJP,
+ //ENCODING_SJIS,
+ //ENCODING_CP932,
+
+ ENCODINGS_COUNT
+};
+
+extern encoding_t *encodings[ENCODINGS_COUNT];
+
+extern VALUE rb_cMREncoding;
+
+#define STRING_HAS_SUPPLEMENTARY 0x020
+#define STRING_HAS_SUPPLEMENTARY_SET 0x010
+#define STRING_ASCII_ONLY 0x008
+#define STRING_ASCII_ONLY_SET 0x010
+#define STRING_ASCII_ONLY 0x008
+#define STRING_VALID_ENCODING_SET 0x004
+#define STRING_VALID_ENCODING 0x002
+#define STRING_STORED_IN_UCHARS 0x001
+
+#define STRING_REQUIRED_FLAGS STRING_STORED_IN_UCHARS
+
+#define STR(x) ((string_t *)(x))
+
+#define BYTES_TO_UCHARS(len) ((len) / sizeof(UChar))
+#define UCHARS_TO_BYTES(len) ((len) * sizeof(UChar))
+
+#define ODD_NUMBER(x) ((x) & 0x1)
+
+static inline long
+div_round_up(long a, long b)
+{
+ return ((a) + (b - 1)) / b;
+}
+
+void
+str_update_flags(string_t *self);
+
+static inline void
+str_unset_facultative_flags(string_t *self)
+{
+ self->flags &= ~STRING_HAS_SUPPLEMENTARY_SET & ~STRING_ASCII_ONLY_SET & ~STRING_VALID_ENCODING_SET;
+}
+
+static inline bool
+str_known_to_have_an_invalid_encoding(string_t *self)
+{
+ return (self->flags & (STRING_VALID_ENCODING_SET | STRING_VALID_ENCODING)) == STRING_VALID_ENCODING_SET;
+}
+
+static inline bool
+str_known_not_to_have_any_supplementary(string_t *self)
+{
+ return (self->flags & (STRING_HAS_SUPPLEMENTARY_SET | STRING_HAS_SUPPLEMENTARY)) == STRING_HAS_SUPPLEMENTARY_SET;
+}
+
+static inline bool
+str_check_flag_and_update_if_needed(string_t *self, str_flag_t flag_set, str_flag_t flag)
+{
+ if (!(self->flags & flag_set)) {
+ str_update_flags(self);
+ assert(self->flags & flag_set);
+ }
+ return self->flags & flag;
+}
+
+static inline bool
+str_is_valid_encoding(string_t *self)
+{
+ return str_check_flag_and_update_if_needed(self, STRING_VALID_ENCODING_SET, STRING_VALID_ENCODING);
+}
+
+static inline bool
+str_is_ascii_only(string_t *self)
+{
+ return str_check_flag_and_update_if_needed(self, STRING_ASCII_ONLY_SET, STRING_ASCII_ONLY);
+}
+
+static inline bool
+str_is_ruby_ascii_only(string_t *self)
+{
+ // for MRI, a string in a non-ASCII-compatible encoding (like UTF-16)
+ // containing only ASCII characters is not "ASCII only" though for us it is internally
+ if (!self->encoding->ascii_compatible) {
+ return false;
+ }
+
+ return str_is_ascii_only(self);
+}
+
+static inline bool
+str_is_stored_in_uchars(string_t *self)
+{
+ return self->flags & STRING_STORED_IN_UCHARS;
+}
+
+static inline void
+str_negate_stored_in_uchars(string_t *self)
+{
+ self->flags ^= STRING_STORED_IN_UCHARS;
+}
+
+static inline void
+str_set_stored_in_uchars(string_t *self, bool status)
+{
+ if (status) {
+ self->flags |= STRING_STORED_IN_UCHARS;
+ }
+ else {
+ self->flags &= ~STRING_STORED_IN_UCHARS;
+ }
+}
+
+static inline void
+str_set_facultative_flag(string_t *self, bool status, str_flag_t flag_set, str_flag_t flag)
+{
+ if (status) {
+ self->flags = self->flags | flag_set | flag;
+ }
+ else {
+ self->flags = (self->flags | flag_set) & ~flag;
+ }
+}
+
+static inline void
+str_set_has_supplementary(string_t *self, bool status)
+{
+ str_set_facultative_flag(self, status, STRING_HAS_SUPPLEMENTARY_SET, STRING_HAS_SUPPLEMENTARY);
+}
+
+static inline void
+str_set_ascii_only(string_t *self, bool status)
+{
+ str_set_facultative_flag(self, status, STRING_ASCII_ONLY_SET, STRING_ASCII_ONLY);
+}
+
+static inline void
+str_set_valid_encoding(string_t *self, bool status)
+{
+ str_set_facultative_flag(self, status, STRING_VALID_ENCODING_SET, STRING_VALID_ENCODING);
+}
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* __ENCODING_H_ */
@@ -9,7 +9,7 @@
onig/enc/utf8 onig/enc/euc_jp onig/enc/sjis onig/enc/iso8859_1
onig/enc/utf16_be onig/enc/utf16_le onig/enc/utf32_be onig/enc/utf32_le
ruby signal sprintf st string struct time transcode util variable version
- thread id objc bs encoding main dln dmyext marshal gcd
+ thread id objc bs ucnv encoding main dln dmyext marshal gcd
vm_eval prelude miniprelude gc-stub bridgesupport compiler dispatcher vm
debugger MacRuby MacRubyDebuggerConnector NSArray NSDictionary
}
@@ -102,14 +102,15 @@ def self.option(name, default)
CC = '/usr/bin/gcc'
CXX = '/usr/bin/g++'
CFLAGS = "-I. -I./include -I./onig -I/usr/include/libxml2 #{ARCHFLAGS} -fno-common -pipe -O3 -g -Wall -fexceptions"
+CFLAGS << " -I./unicode" # TODO use /usr/local/include/unicode on FNI installs...
CFLAGS << " -Wno-parentheses -Wno-deprecated-declarations -Werror" if NO_WARN_BUILD
OBJC_CFLAGS = CFLAGS + " -fobjc-gc-only"
CXXFLAGS = `#{LLVM_CONFIG} --cxxflags #{LLVM_MODULES}`.sub(/-DNDEBUG/, '').strip
CXXFLAGS << " -I. -I./include -g -Wall #{ARCHFLAGS}"
CXXFLAGS << " -Wno-parentheses -Wno-deprecated-declarations -Werror" if NO_WARN_BUILD
CXXFLAGS << " -DLLVM_TOT" if ENV['LLVM_TOT']
LDFLAGS = `#{LLVM_CONFIG} --ldflags --libs #{LLVM_MODULES}`.strip.gsub(/\n/, '')
-LDFLAGS << " -lpthread -ldl -lxml2 -lobjc -lauto -framework Foundation"
+LDFLAGS << " -lpthread -ldl -lxml2 -lobjc -lauto -licucore -framework Foundation"
DLDFLAGS = "-dynamiclib -undefined suppress -flat_namespace -install_name #{INSTALL_NAME} -current_version #{MACRUBY_VERSION} -compatibility_version #{MACRUBY_VERSION}"
DLDFLAGS << " -unexported_symbols_list #{UNEXPORTED_SYMBOLS_LIST}" if UNEXPORTED_SYMBOLS_LIST
CFLAGS << " -std=c99" # we add this one later to not conflict with C++ flags
Oops, something went wrong.

0 comments on commit 2b7d5d5

Please sign in to comment.