Permalink
Browse files

bye bye oniguruma, started ICU regexps

git-svn-id: http://svn.macosforge.org/repository/ruby/MacRuby/branches/icu@3573 23306eb0-4c56-4727-a40e-e92c0eb68959
  • Loading branch information...
lrz committed Feb 18, 2010
1 parent fbc3da8 commit 9dc1afe8d035eff1f5103017a99c5dc798b15f19
Showing with 41,273 additions and 47,304 deletions.
  1. +0 −1 complex.c
  2. +11 −2 encoding.h
  3. +0 −1 gc.c
  4. +210 −0 icu-1060/unicode/basictz.h
  5. +7 −7 { → icu-1060}/unicode/brkiter.h
  6. +2,170 −0 icu-1060/unicode/calendar.h
  7. 0 { → icu-1060}/unicode/caniter.h
  8. 0 { → icu-1060}/unicode/chariter.h
  9. +746 −0 icu-1060/unicode/choicfmt.h
  10. +400 −0 icu-1060/unicode/coleitr.h
  11. +1,035 −0 icu-1060/unicode/coll.h
  12. +130 −0 icu-1060/unicode/curramt.h
  13. +117 −0 icu-1060/unicode/currunit.h
  14. +700 −0 icu-1060/unicode/datefmt.h
  15. 0 { → icu-1060}/unicode/dbbi.h
  16. +370 −0 icu-1060/unicode/dcfmtsym.h
  17. +1,901 −0 icu-1060/unicode/decimfmt.h
  18. +8 −8 { → icu-1060}/unicode/docmain.h
  19. +721 −0 icu-1060/unicode/dtfmtsym.h
  20. +159 −0 icu-1060/unicode/dtintrv.h
  21. +949 −0 icu-1060/unicode/dtitvfmt.h
  22. +528 −0 icu-1060/unicode/dtitvinf.h
  23. +423 −0 icu-1060/unicode/dtptngen.h
  24. +250 −0 icu-1060/unicode/dtrule.h
  25. +291 −0 icu-1060/unicode/fieldpos.h
  26. +591 −0 icu-1060/unicode/fmtable.h
  27. +296 −0 icu-1060/unicode/format.h
  28. +823 −0 icu-1060/unicode/gregocal.h
  29. 0 { → icu-1060}/unicode/locid.h
  30. +77 −0 icu-1060/unicode/measfmt.h
  31. +71 −0 icu-1060/unicode/measunit.h
  32. +137 −0 icu-1060/unicode/measure.h
  33. +940 −0 icu-1060/unicode/msgfmt.h
  34. 0 { → icu-1060}/unicode/normlzr.h
  35. +886 −0 icu-1060/unicode/numfmt.h
  36. 0 { → icu-1060}/unicode/parseerr.h
  37. 0 { → icu-1060}/unicode/parsepos.h
  38. +35 −9 { → icu-1060}/unicode/platform.h
  39. +541 −0 icu-1060/unicode/plurfmt.h
  40. +291 −0 icu-1060/unicode/plurrule.h
  41. 0 { → icu-1060}/unicode/ppalmos.h
  42. +2 −2 { → icu-1060}/unicode/putil.h
  43. +30 −17 { → icu-1060}/unicode/pwin32.h
  44. +25 −3 { → icu-1060}/unicode/rbbi.h
  45. +1,049 −0 icu-1060/unicode/rbnf.h
  46. +361 −0 icu-1060/unicode/rbtz.h
  47. +1,232 −0 icu-1060/unicode/regex.h
  48. 0 { → icu-1060}/unicode/rep.h
  49. +3 −3 { → icu-1060}/unicode/resbund.h
  50. 0 { → icu-1060}/unicode/schriter.h
  51. +569 −0 icu-1060/unicode/search.h
  52. +927 −0 icu-1060/unicode/simpletz.h
  53. +975 −0 icu-1060/unicode/smpdtfmt.h
  54. +324 −0 icu-1060/unicode/sortkey.h
  55. +3 −3 { → icu-1060}/unicode/strenum.h
  56. +518 −0 icu-1060/unicode/stsearch.h
  57. 0 { → icu-1060}/unicode/symtable.h
  58. +926 −0 icu-1060/unicode/tblcoll.h
  59. +803 −0 icu-1060/unicode/timezone.h
  60. +1,323 −0 icu-1060/unicode/translit.h
  61. +828 −0 icu-1060/unicode/tzrule.h
  62. +195 −0 icu-1060/unicode/tztrans.h
  63. +256 −160 { → icu-1060}/unicode/ubidi.h
  64. +19 −15 { → icu-1060}/unicode/ubrk.h
  65. +1,161 −0 icu-1060/unicode/ucal.h
  66. +391 −0 icu-1060/unicode/ucasemap.h
  67. 0 { → icu-1060}/unicode/ucat.h
  68. +104 −68 { → icu-1060}/unicode/uchar.h
  69. 0 { → icu-1060}/unicode/uchriter.h
  70. 0 { → icu-1060}/unicode/uclean.h
  71. +54 −25 { → icu-1060}/unicode/ucnv.h
  72. 0 { → icu-1060}/unicode/ucnv_cb.h
  73. +9 −2 { → icu-1060}/unicode/ucnv_err.h
  74. +1,132 −0 icu-1060/unicode/ucol.h
  75. +315 −0 icu-1060/unicode/ucoleitr.h
  76. +16 −3 { → icu-1060}/unicode/uconfig.h
  77. +349 −0 icu-1060/unicode/ucsdet.h
  78. +270 −0 icu-1060/unicode/ucurr.h
  79. +962 −0 icu-1060/unicode/udat.h
  80. +9 −9 { → icu-1060}/unicode/udata.h
  81. +471 −0 icu-1060/unicode/udatpg.h
  82. +10 −10 { → icu-1060}/unicode/udeprctd.h
  83. +166 −0 icu-1060/unicode/udraft.h
  84. 0 { → icu-1060}/unicode/uenum.h
  85. +9 −5 { → icu-1060}/unicode/uidna.h
  86. +180 −0 icu-1060/unicode/uintrnal.h
  87. +3 −3 { → icu-1060}/unicode/uiter.h
  88. +119 −4 { → icu-1060}/unicode/uloc.h
  89. +192 −0 icu-1060/unicode/ulocdata.h
  90. +0 −43 { → icu-1060}/unicode/umachine.h
  91. 0 { → icu-1060}/unicode/umisc.h
  92. +647 −0 icu-1060/unicode/umsg.h
  93. 0 { → icu-1060}/unicode/unifilt.h
  94. 0 { → icu-1060}/unicode/unifunct.h
  95. 0 { → icu-1060}/unicode/unimatch.h
  96. +97 −0 icu-1060/unicode/unirepl.h
  97. +245 −16 { → icu-1060}/unicode/uniset.h
  98. +243 −133 { → icu-1060}/unicode/unistr.h
  99. +4 −3 { → icu-1060}/unicode/unorm.h
  100. +869 −0 icu-1060/unicode/unum.h
  101. +2 −4 { → icu-1060}/unicode/uobject.h
  102. +1 −1 { → icu-1060}/unicode/uobslete.h
  103. +23 −3 { → icu-1060}/unicode/urbtok.h
  104. +989 −0 icu-1060/unicode/uregex.h
  105. +1,775 −0 icu-1060/unicode/urename.h
  106. 0 { → icu-1060}/unicode/urep.h
  107. +9 −9 { → icu-1060}/unicode/ures.h
  108. +48 −18 { → icu-1060}/unicode/uscript.h
  109. +766 −0 icu-1060/unicode/usearch.h
  110. +277 −7 { → icu-1060}/unicode/uset.h
  111. 0 { → icu-1060}/unicode/usetiter.h
  112. +30 −1 { → icu-1060}/unicode/ushape.h
  113. 0 { → icu-1060}/unicode/usprep.h
  114. +904 −0 icu-1060/unicode/ustdio.h
  115. +67 −0 icu-1060/unicode/ustream.h
  116. +18 −13 { → icu-1060}/unicode/ustring.h
  117. +8 −8 { → icu-1060}/unicode/usystem.h
  118. +119 −126 { → icu-1060}/unicode/utext.h
  119. +2 −2 { → icu-1060}/unicode/utf.h
  120. +2 −2 { → icu-1060}/unicode/utf16.h
  121. 0 { → icu-1060}/unicode/utf32.h
  122. +2 −2 { → icu-1060}/unicode/utf8.h
  123. 0 { → icu-1060}/unicode/utf_old.h
  124. +481 −0 icu-1060/unicode/utmscale.h
  125. +6 −0 { → icu-1060}/unicode/utrace.h
  126. +583 −0 icu-1060/unicode/utrans.h
  127. +21 −6 { → icu-1060}/unicode/utypes.h
  128. +66 −32 { → icu-1060}/unicode/uversion.h
  129. +443 −0 icu-1060/unicode/vtzone.h
  130. +3 −0 include/ruby/intern.h
  131. +0 −785 include/ruby/oniguruma.h
  132. +0 −67 include/ruby/re.h
  133. +0 −38 include/ruby/regex.h
  134. +0 −9 include/ruby/ruby.h
  135. +3 −2 marshal.c
  136. +0 −1 onig/AUTHORS
  137. +0 −28 onig/COPYING
  138. +0 −58 onig/enc/ascii.c
  139. +0 −162 onig/enc/big5.c
  140. +0 −200 onig/enc/cp1251.c
  141. +0 −285 onig/enc/euc_jp.c
  142. +0 −158 onig/enc/euc_kr.c
  143. +0 −138 onig/enc/euc_tw.c
  144. +0 −495 onig/enc/gb18030.c
  145. +0 −272 onig/enc/iso8859_1.c
  146. +0 −239 onig/enc/iso8859_10.c
  147. +0 −96 onig/enc/iso8859_11.c
  148. +0 −228 onig/enc/iso8859_13.c
  149. +0 −241 onig/enc/iso8859_14.c
  150. +0 −235 onig/enc/iso8859_15.c
  151. +0 −237 onig/enc/iso8859_16.c
  152. +0 −235 onig/enc/iso8859_2.c
  153. +0 −235 onig/enc/iso8859_3.c
  154. +0 −237 onig/enc/iso8859_4.c
  155. +0 −226 onig/enc/iso8859_5.c
  156. +0 −96 onig/enc/iso8859_6.c
  157. +0 −222 onig/enc/iso8859_7.c
  158. +0 −96 onig/enc/iso8859_8.c
  159. +0 −228 onig/enc/iso8859_9.c
  160. +0 −249 onig/enc/koi8.c
  161. +0 −212 onig/enc/koi8_r.c
  162. +0 −1,162 onig/enc/mktable.c
  163. +0 −318 onig/enc/sjis.c
  164. +0 −11,356 onig/enc/unicode.c
  165. +0 −228 onig/enc/utf16_be.c
  166. +0 −229 onig/enc/utf16_le.c
  167. +0 −184 onig/enc/utf32_be.c
  168. +0 −184 onig/enc/utf32_le.c
  169. +0 −305 onig/enc/utf8.c
  170. +0 −85 onig/oniggnu.h
  171. +0 −169 onig/onigposix.h
  172. +0 −817 onig/oniguruma.h
  173. +0 −6,311 onig/regcomp.c
  174. +0 −902 onig/regenc.c
  175. +0 −186 onig/regenc.h
  176. +0 −387 onig/regerror.c
  177. +0 −3,805 onig/regexec.c
  178. +0 −215 onig/regext.c
  179. +0 −164 onig/reggnu.c
  180. +0 −829 onig/regint.h
  181. +0 −5,534 onig/regparse.c
  182. +0 −351 onig/regparse.h
  183. +0 −98 onig/regposerr.c
  184. +0 −303 onig/regposix.c
  185. +0 −315 onig/regsyntax.c
  186. +0 −76 onig/regtrav.c
  187. +0 −56 onig/regversion.c
  188. +9 −4 parse.y
  189. +3 −8 rakelib/builder/builder.rb
  190. +10 −3 rakelib/builder/options.rb
  191. +0 −1 rational.c
  192. +0 −3,786 re.c
  193. +335 −0 re.cpp
  194. +59 −19 string.c
  195. +0 −180 unicode/ucasemap.h
  196. +0 −262 unicode/udraft.h
  197. +0 −68 unicode/uintrnal.h
  198. +0 −1,605 unicode/urename.h
View
@@ -7,7 +7,6 @@
#include "ruby.h"
#include <math.h>
-#include "ruby/re.h"
#include "ruby/node.h"
#include "vm.h"
#include "id.h"
View
@@ -17,8 +17,12 @@ extern "C" {
#endif
#include "ruby.h"
-#include <stdbool.h>
-#include "unicode/ustring.h"
+
+#if defined(__cplusplus)
+# include "unicode/unistr.h"
+#else
+# include "unicode/ustring.h"
+#endif
#if __LITTLE_ENDIAN__
#define ENCODING_UTF16_NATIVE ENCODING_UTF16LE
@@ -267,6 +271,11 @@ str_set_valid_encoding(rb_str_t *self, bool status)
STRING_VALID_ENCODING);
}
+VALUE rb_unicode_str_new(const UniChar *ptr, const size_t len);
+
+void str_get_uchars(VALUE str, UChar **chars_p, long *chars_len_p,
+ bool *need_free_p);
+
// Return a string object appropriate for bstr_ calls. This does nothing for
// data/binary RubyStrings.
VALUE rb_str_bstr(VALUE str);
View
1 gc.c
@@ -21,7 +21,6 @@
#include "ruby/signal.h"
#include "ruby/st.h"
#include "ruby/node.h"
-#include "ruby/re.h"
#include "ruby/io.h"
#include "ruby/util.h"
#include "objc.h"
View
@@ -0,0 +1,210 @@
+/*
+*******************************************************************************
+* Copyright (C) 2007-2008, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*/
+#ifndef BASICTZ_H
+#define BASICTZ_H
+
+/**
+ * \file
+ * \brief C++ API: ICU TimeZone base class
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/timezone.h"
+#include "unicode/tzrule.h"
+#include "unicode/tztrans.h"
+
+U_NAMESPACE_BEGIN
+
+// forward declarations
+class UVector;
+
+/**
+ * <code>BasicTimeZone</code> is an abstract class extending <code>TimeZone</code>.
+ * This class provides some additional methods to access time zone transitions and rules.
+ * All ICU <code>TimeZone</code> concrete subclasses extend this class.
+ * @stable ICU 4.0
+ */
+class U_I18N_API BasicTimeZone: public TimeZone {
+public:
+ /**
+ * Destructor.
+ * @stable ICU 4.0
+ */
+ virtual ~BasicTimeZone();
+
+ /**
+ * Gets the first time zone transition after the base time.
+ * @param base The base time.
+ * @param inclusive Whether the base time is inclusive or not.
+ * @param result Receives the first transition after the base time.
+ * @return TRUE if the transition is found.
+ * @stable ICU 4.0
+ */
+ virtual UBool getNextTransition(UDate base, UBool inclusive, TimeZoneTransition& result) /*const*/ = 0;
+
+ /**
+ * Gets the most recent time zone transition before the base time.
+ * @param base The base time.
+ * @param inclusive Whether the base time is inclusive or not.
+ * @param result Receives the most recent transition before the base time.
+ * @return TRUE if the transition is found.
+ * @stable ICU 4.0
+ */
+ virtual UBool getPreviousTransition(UDate base, UBool inclusive, TimeZoneTransition& result) /*const*/ = 0;
+
+ /**
+ * Checks if the time zone has equivalent transitions in the time range.
+ * This method returns true when all of transition times, from/to standard
+ * offsets and DST savings used by this time zone match the other in the
+ * time range.
+ * @param tz The <code>BasicTimeZone</code> object to be compared with.
+ * @param start The start time of the evaluated time range (inclusive)
+ * @param end The end time of the evaluated time range (inclusive)
+ * @param ignoreDstAmount
+ * When true, any transitions with only daylight saving amount
+ * changes will be ignored, except either of them is zero.
+ * For example, a transition from rawoffset 3:00/dstsavings 1:00
+ * to rawoffset 2:00/dstsavings 2:00 is excluded from the comparison,
+ * but a transtion from rawoffset 2:00/dstsavings 1:00 to
+ * rawoffset 3:00/dstsavings 0:00 is included.
+ * @param ec Output param to filled in with a success or an error.
+ * @return true if the other time zone has the equivalent transitions in the
+ * time range.
+ * @stable ICU 4.0
+ */
+ virtual UBool hasEquivalentTransitions(/*const*/ BasicTimeZone& tz, UDate start, UDate end,
+ UBool ignoreDstAmount, UErrorCode& ec) /*const*/;
+
+ /**
+ * Returns the number of <code>TimeZoneRule</code>s which represents time transitions,
+ * for this time zone, that is, all <code>TimeZoneRule</code>s for this time zone except
+ * <code>InitialTimeZoneRule</code>. The return value range is 0 or any positive value.
+ * @param status Receives error status code.
+ * @return The number of <code>TimeZoneRule</code>s representing time transitions.
+ * @stable ICU 4.0
+ */
+ virtual int32_t countTransitionRules(UErrorCode& status) /*const*/ = 0;
+
+ /**
+ * Gets the <code>InitialTimeZoneRule</code> and the set of <code>TimeZoneRule</code>
+ * which represent time transitions for this time zone. On successful return,
+ * the argument initial points to non-NULL <code>InitialTimeZoneRule</code> and
+ * the array trsrules is filled with 0 or multiple <code>TimeZoneRule</code>
+ * instances up to the size specified by trscount. The results are referencing the
+ * rule instance held by this time zone instance. Therefore, after this time zone
+ * is destructed, they are no longer available.
+ * @param initial Receives the initial timezone rule
+ * @param trsrules Receives the timezone transition rules
+ * @param trscount On input, specify the size of the array 'transitions' receiving
+ * the timezone transition rules. On output, actual number of
+ * rules filled in the array will be set.
+ * @param status Receives error status code.
+ * @stable ICU 4.0
+ */
+ virtual void getTimeZoneRules(const InitialTimeZoneRule*& initial,
+ const TimeZoneRule* trsrules[], int32_t& trscount, UErrorCode& status) /*const*/ = 0;
+
+ /**
+ * Gets the set of time zone rules valid at the specified time. Some known external time zone
+ * implementations are not capable to handle historic time zone rule changes. Also some
+ * implementations can only handle certain type of rule definitions.
+ * If this time zone does not use any daylight saving time within about 1 year from the specified
+ * time, only the <code>InitialTimeZone</code> is returned. Otherwise, the rule for standard
+ * time and daylight saving time transitions are returned in addition to the
+ * <code>InitialTimeZoneRule</code>. The standard and daylight saving time transition rules are
+ * represented by <code>AnnualTimeZoneRule</code> with <code>DateTimeRule::DOW</code> for its date
+ * rule and <code>DateTimeRule::WALL_TIME</code> for its time rule. Because daylight saving time
+ * rule is changing time to time in many time zones and also mapping a transition time rule to
+ * different type is lossy transformation, the set of rules returned by this method may be valid
+ * for short period of time.
+ * The time zone rule objects returned by this method is owned by the caller, so the caller is
+ * responsible for deleting them after use.
+ * @param date The date used for extracting time zone rules.
+ * @param initial Receives the <code>InitialTimeZone</code>, always not NULL.
+ * @param std Receives the <code>AnnualTimeZoneRule</code> for standard time transitions.
+ * When this time time zone does not observe daylight saving times around the
+ * specified date, NULL is set.
+ * @param dst Receives the <code>AnnualTimeZoneRule</code> for daylight saving time
+ * transitions. When this time zone does not observer daylight saving times
+ * around the specified date, NULL is set.
+ * @param status Receives error status code.
+ * @stable ICU 4.0
+ */
+ virtual void getSimpleRulesNear(UDate date, InitialTimeZoneRule*& initial,
+ AnnualTimeZoneRule*& std, AnnualTimeZoneRule*& dst, UErrorCode& status) /*const*/;
+
+
+ /**
+ * The time type option bit flags used by getOffsetFromLocal
+ * @internal
+ */
+ enum {
+ kStandard = 0x01,
+ kDaylight = 0x03,
+ kFormer = 0x04,
+ kLatter = 0x0C
+ };
+
+ /**
+ * Get time zone offsets from local wall time.
+ * @internal
+ */
+ virtual void getOffsetFromLocal(UDate date, int32_t nonExistingTimeOpt, int32_t duplicatedTimeOpt,
+ int32_t& rawOffset, int32_t& dstOffset, UErrorCode& status) /*const*/;
+
+protected:
+
+ /**
+ * The time type option bit masks used by getOffsetFromLocal
+ * @internal
+ */
+ enum {
+ kStdDstMask = kDaylight,
+ kFormerLatterMask = kLatter
+ };
+
+ /**
+ * Default constructor.
+ * @stable ICU 4.0
+ */
+ BasicTimeZone();
+
+ /**
+ * Construct a timezone with a given ID.
+ * @param id a system time zone ID
+ * @stable ICU 4.0
+ */
+ BasicTimeZone(const UnicodeString &id);
+
+ /**
+ * Copy constructor.
+ * @param source the object to be copied.
+ * @stable ICU 4.0
+ */
+ BasicTimeZone(const BasicTimeZone& source);
+
+ /**
+ * Gets the set of TimeZoneRule instances applicable to the specified time and after.
+ * @param start The start date used for extracting time zone rules
+ * @param initial Receives the InitialTimeZone, always not NULL
+ * @param transitionRules Receives the transition rules, could be NULL
+ * @param status Receives error status code
+ */
+ void getTimeZoneRulesAfter(UDate start, InitialTimeZoneRule*& initial, UVector*& transitionRules,
+ UErrorCode& status) /*const*/;
+};
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif // BASICTZ_H
+
+//eof
@@ -1,6 +1,6 @@
/*
********************************************************************************
-* Copyright (C) 1997-2006, International Business Machines
+* Copyright (C) 1997-2007, International Business Machines
* Corporation and others. All Rights Reserved.
********************************************************************************
*
@@ -26,7 +26,7 @@
* \file
* \brief C++ API: Break Iterator.
*/
-
+
#if UCONFIG_NO_BREAK_ITERATION
U_NAMESPACE_BEGIN
@@ -92,8 +92,8 @@ U_NAMESPACE_BEGIN
* file ubrk.h
* <p>
* Code snippits illustrating the use of the Break Iterator APIs
- * are available in the ICU User Guide,
- * http://icu.sourceforge.net/userguide/boundaryAnalysis.html
+ * are available in the ICU User Guide,
+ * http://icu-project.org/userguide/boundaryAnalysis.html
* and in the sample program icu/source/samples/break/break.cpp"
*
*/
@@ -161,7 +161,7 @@ class U_COMMON_API BreakIterator : public UObject {
* @param status receives any error codes.
* @return The current UText for this break iterator. If an input
* UText was provided, it will always be returned.
- * @draft ICU 3.4
+ * @stable ICU 3.4
*/
virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0;
@@ -174,7 +174,7 @@ class U_COMMON_API BreakIterator : public UObject {
virtual void setText(const UnicodeString &text) = 0;
/**
- * Reset the break iterator to operate over the text represented by
+ * Reset the break iterator to operate over the text represented by
* the UText. The iterator position is reset to the start.
*
* This function makes a shallow clone of the supplied UText. This means
@@ -184,7 +184,7 @@ class U_COMMON_API BreakIterator : public UObject {
*
* @param text The UText used to change the text.
* @param status receives any error codes.
- * @draft ICU 3.4
+ * @stable ICU 3.4
*/
virtual void setText(UText *text, UErrorCode &status) = 0;
Oops, something went wrong.

0 comments on commit 9dc1afe

Please sign in to comment.