Skip to content
Browse files

bye bye oniguruma, started ICU regexps

git-svn-id: http://svn.macosforge.org/repository/ruby/MacRuby/branches/icu@3573 23306eb0-4c56-4727-a40e-e92c0eb68959
  • Loading branch information...
1 parent fbc3da8 commit 9dc1afe8d035eff1f5103017a99c5dc798b15f19 Laurent Sansonetti committed Feb 18, 2010
Sorry, we could not display the entire diff because it was too big.
View
1 complex.c
@@ -7,7 +7,6 @@
#include "ruby.h"
#include <math.h>
-#include "ruby/re.h"
#include "ruby/node.h"
#include "vm.h"
#include "id.h"
View
13 encoding.h
@@ -17,8 +17,12 @@ extern "C" {
#endif
#include "ruby.h"
-#include <stdbool.h>
-#include "unicode/ustring.h"
+
+#if defined(__cplusplus)
+# include "unicode/unistr.h"
+#else
+# include "unicode/ustring.h"
+#endif
#if __LITTLE_ENDIAN__
#define ENCODING_UTF16_NATIVE ENCODING_UTF16LE
@@ -267,6 +271,11 @@ str_set_valid_encoding(rb_str_t *self, bool status)
STRING_VALID_ENCODING);
}
+VALUE rb_unicode_str_new(const UniChar *ptr, const size_t len);
+
+void str_get_uchars(VALUE str, UChar **chars_p, long *chars_len_p,
+ bool *need_free_p);
+
// Return a string object appropriate for bstr_ calls. This does nothing for
// data/binary RubyStrings.
VALUE rb_str_bstr(VALUE str);
View
1 gc.c
@@ -21,7 +21,6 @@
#include "ruby/signal.h"
#include "ruby/st.h"
#include "ruby/node.h"
-#include "ruby/re.h"
#include "ruby/io.h"
#include "ruby/util.h"
#include "objc.h"
View
210 icu-1060/unicode/basictz.h
@@ -0,0 +1,210 @@
+/*
+*******************************************************************************
+* Copyright (C) 2007-2008, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+*/
+#ifndef BASICTZ_H
+#define BASICTZ_H
+
+/**
+ * \file
+ * \brief C++ API: ICU TimeZone base class
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/timezone.h"
+#include "unicode/tzrule.h"
+#include "unicode/tztrans.h"
+
+U_NAMESPACE_BEGIN
+
+// forward declarations
+class UVector;
+
+/**
+ * <code>BasicTimeZone</code> is an abstract class extending <code>TimeZone</code>.
+ * This class provides some additional methods to access time zone transitions and rules.
+ * All ICU <code>TimeZone</code> concrete subclasses extend this class.
+ * @stable ICU 4.0
+ */
+class U_I18N_API BasicTimeZone: public TimeZone {
+public:
+ /**
+ * Destructor.
+ * @stable ICU 4.0
+ */
+ virtual ~BasicTimeZone();
+
+ /**
+ * Gets the first time zone transition after the base time.
+ * @param base The base time.
+ * @param inclusive Whether the base time is inclusive or not.
+ * @param result Receives the first transition after the base time.
+ * @return TRUE if the transition is found.
+ * @stable ICU 4.0
+ */
+ virtual UBool getNextTransition(UDate base, UBool inclusive, TimeZoneTransition& result) /*const*/ = 0;
+
+ /**
+ * Gets the most recent time zone transition before the base time.
+ * @param base The base time.
+ * @param inclusive Whether the base time is inclusive or not.
+ * @param result Receives the most recent transition before the base time.
+ * @return TRUE if the transition is found.
+ * @stable ICU 4.0
+ */
+ virtual UBool getPreviousTransition(UDate base, UBool inclusive, TimeZoneTransition& result) /*const*/ = 0;
+
+ /**
+ * Checks if the time zone has equivalent transitions in the time range.
+ * This method returns true when all of transition times, from/to standard
+ * offsets and DST savings used by this time zone match the other in the
+ * time range.
+ * @param tz The <code>BasicTimeZone</code> object to be compared with.
+ * @param start The start time of the evaluated time range (inclusive)
+ * @param end The end time of the evaluated time range (inclusive)
+ * @param ignoreDstAmount
+ * When true, any transitions with only daylight saving amount
+ * changes will be ignored, except either of them is zero.
+ * For example, a transition from rawoffset 3:00/dstsavings 1:00
+ * to rawoffset 2:00/dstsavings 2:00 is excluded from the comparison,
+ * but a transtion from rawoffset 2:00/dstsavings 1:00 to
+ * rawoffset 3:00/dstsavings 0:00 is included.
+ * @param ec Output param to filled in with a success or an error.
+ * @return true if the other time zone has the equivalent transitions in the
+ * time range.
+ * @stable ICU 4.0
+ */
+ virtual UBool hasEquivalentTransitions(/*const*/ BasicTimeZone& tz, UDate start, UDate end,
+ UBool ignoreDstAmount, UErrorCode& ec) /*const*/;
+
+ /**
+ * Returns the number of <code>TimeZoneRule</code>s which represents time transitions,
+ * for this time zone, that is, all <code>TimeZoneRule</code>s for this time zone except
+ * <code>InitialTimeZoneRule</code>. The return value range is 0 or any positive value.
+ * @param status Receives error status code.
+ * @return The number of <code>TimeZoneRule</code>s representing time transitions.
+ * @stable ICU 4.0
+ */
+ virtual int32_t countTransitionRules(UErrorCode& status) /*const*/ = 0;
+
+ /**
+ * Gets the <code>InitialTimeZoneRule</code> and the set of <code>TimeZoneRule</code>
+ * which represent time transitions for this time zone. On successful return,
+ * the argument initial points to non-NULL <code>InitialTimeZoneRule</code> and
+ * the array trsrules is filled with 0 or multiple <code>TimeZoneRule</code>
+ * instances up to the size specified by trscount. The results are referencing the
+ * rule instance held by this time zone instance. Therefore, after this time zone
+ * is destructed, they are no longer available.
+ * @param initial Receives the initial timezone rule
+ * @param trsrules Receives the timezone transition rules
+ * @param trscount On input, specify the size of the array 'transitions' receiving
+ * the timezone transition rules. On output, actual number of
+ * rules filled in the array will be set.
+ * @param status Receives error status code.
+ * @stable ICU 4.0
+ */
+ virtual void getTimeZoneRules(const InitialTimeZoneRule*& initial,
+ const TimeZoneRule* trsrules[], int32_t& trscount, UErrorCode& status) /*const*/ = 0;
+
+ /**
+ * Gets the set of time zone rules valid at the specified time. Some known external time zone
+ * implementations are not capable to handle historic time zone rule changes. Also some
+ * implementations can only handle certain type of rule definitions.
+ * If this time zone does not use any daylight saving time within about 1 year from the specified
+ * time, only the <code>InitialTimeZone</code> is returned. Otherwise, the rule for standard
+ * time and daylight saving time transitions are returned in addition to the
+ * <code>InitialTimeZoneRule</code>. The standard and daylight saving time transition rules are
+ * represented by <code>AnnualTimeZoneRule</code> with <code>DateTimeRule::DOW</code> for its date
+ * rule and <code>DateTimeRule::WALL_TIME</code> for its time rule. Because daylight saving time
+ * rule is changing time to time in many time zones and also mapping a transition time rule to
+ * different type is lossy transformation, the set of rules returned by this method may be valid
+ * for short period of time.
+ * The time zone rule objects returned by this method is owned by the caller, so the caller is
+ * responsible for deleting them after use.
+ * @param date The date used for extracting time zone rules.
+ * @param initial Receives the <code>InitialTimeZone</code>, always not NULL.
+ * @param std Receives the <code>AnnualTimeZoneRule</code> for standard time transitions.
+ * When this time time zone does not observe daylight saving times around the
+ * specified date, NULL is set.
+ * @param dst Receives the <code>AnnualTimeZoneRule</code> for daylight saving time
+ * transitions. When this time zone does not observer daylight saving times
+ * around the specified date, NULL is set.
+ * @param status Receives error status code.
+ * @stable ICU 4.0
+ */
+ virtual void getSimpleRulesNear(UDate date, InitialTimeZoneRule*& initial,
+ AnnualTimeZoneRule*& std, AnnualTimeZoneRule*& dst, UErrorCode& status) /*const*/;
+
+
+ /**
+ * The time type option bit flags used by getOffsetFromLocal
+ * @internal
+ */
+ enum {
+ kStandard = 0x01,
+ kDaylight = 0x03,
+ kFormer = 0x04,
+ kLatter = 0x0C
+ };
+
+ /**
+ * Get time zone offsets from local wall time.
+ * @internal
+ */
+ virtual void getOffsetFromLocal(UDate date, int32_t nonExistingTimeOpt, int32_t duplicatedTimeOpt,
+ int32_t& rawOffset, int32_t& dstOffset, UErrorCode& status) /*const*/;
+
+protected:
+
+ /**
+ * The time type option bit masks used by getOffsetFromLocal
+ * @internal
+ */
+ enum {
+ kStdDstMask = kDaylight,
+ kFormerLatterMask = kLatter
+ };
+
+ /**
+ * Default constructor.
+ * @stable ICU 4.0
+ */
+ BasicTimeZone();
+
+ /**
+ * Construct a timezone with a given ID.
+ * @param id a system time zone ID
+ * @stable ICU 4.0
+ */
+ BasicTimeZone(const UnicodeString &id);
+
+ /**
+ * Copy constructor.
+ * @param source the object to be copied.
+ * @stable ICU 4.0
+ */
+ BasicTimeZone(const BasicTimeZone& source);
+
+ /**
+ * Gets the set of TimeZoneRule instances applicable to the specified time and after.
+ * @param start The start date used for extracting time zone rules
+ * @param initial Receives the InitialTimeZone, always not NULL
+ * @param transitionRules Receives the transition rules, could be NULL
+ * @param status Receives error status code
+ */
+ void getTimeZoneRulesAfter(UDate start, InitialTimeZoneRule*& initial, UVector*& transitionRules,
+ UErrorCode& status) /*const*/;
+};
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif // BASICTZ_H
+
+//eof
View
14 unicode/brkiter.h → icu-1060/unicode/brkiter.h
@@ -1,6 +1,6 @@
/*
********************************************************************************
-* Copyright (C) 1997-2006, International Business Machines
+* Copyright (C) 1997-2007, International Business Machines
* Corporation and others. All Rights Reserved.
********************************************************************************
*
@@ -26,7 +26,7 @@
* \file
* \brief C++ API: Break Iterator.
*/
-
+
#if UCONFIG_NO_BREAK_ITERATION
U_NAMESPACE_BEGIN
@@ -92,8 +92,8 @@ U_NAMESPACE_BEGIN
* file ubrk.h
* <p>
* Code snippits illustrating the use of the Break Iterator APIs
- * are available in the ICU User Guide,
- * http://icu.sourceforge.net/userguide/boundaryAnalysis.html
+ * are available in the ICU User Guide,
+ * http://icu-project.org/userguide/boundaryAnalysis.html
* and in the sample program icu/source/samples/break/break.cpp"
*
*/
@@ -161,7 +161,7 @@ class U_COMMON_API BreakIterator : public UObject {
* @param status receives any error codes.
* @return The current UText for this break iterator. If an input
* UText was provided, it will always be returned.
- * @draft ICU 3.4
+ * @stable ICU 3.4
*/
virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0;
@@ -174,7 +174,7 @@ class U_COMMON_API BreakIterator : public UObject {
virtual void setText(const UnicodeString &text) = 0;
/**
- * Reset the break iterator to operate over the text represented by
+ * Reset the break iterator to operate over the text represented by
* the UText. The iterator position is reset to the start.
*
* This function makes a shallow clone of the supplied UText. This means
@@ -184,7 +184,7 @@ class U_COMMON_API BreakIterator : public UObject {
*
* @param text The UText used to change the text.
* @param status receives any error codes.
- * @draft ICU 3.4
+ * @stable ICU 3.4
*/
virtual void setText(UText *text, UErrorCode &status) = 0;
View
2,170 icu-1060/unicode/calendar.h
2,170 additions, 0 deletions not shown because the diff is too large. Please use a local Git client to view these changes.
View
0 unicode/caniter.h → icu-1060/unicode/caniter.h
File renamed without changes.
View
0 unicode/chariter.h → icu-1060/unicode/chariter.h
File renamed without changes.
View
746 icu-1060/unicode/choicfmt.h
@@ -0,0 +1,746 @@
+/*
+********************************************************************************
+* Copyright (C) 1997-2008, International Business Machines
+* Corporation and others. All Rights Reserved.
+********************************************************************************
+*
+* File CHOICFMT.H
+*
+* Modification History:
+*
+* Date Name Description
+* 02/19/97 aliu Converted from java.
+* 03/20/97 helena Finished first cut of implementation and got rid
+* of nextDouble/previousDouble and replaced with
+* boolean array.
+* 4/10/97 aliu Clean up. Modified to work on AIX.
+* 8/6/97 nos Removed overloaded constructor, member var 'buffer'.
+* 07/22/98 stephen Removed operator!= (implemented in Format)
+********************************************************************************
+*/
+
+#ifndef CHOICFMT_H
+#define CHOICFMT_H
+
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C++ API: Choice Format.
+ */
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/unistr.h"
+#include "unicode/numfmt.h"
+#include "unicode/fieldpos.h"
+#include "unicode/format.h"
+
+U_NAMESPACE_BEGIN
+
+class MessageFormat;
+
+/**
+ * ChoiceFormat converts between ranges of numeric values
+ * and string names for those ranges. A <code>ChoiceFormat</code> splits
+ * the real number line <code>-Inf</code> to <code>+Inf</code> into two
+ * or more contiguous ranges. Each range is mapped to a
+ * string. <code>ChoiceFormat</code> is generally used in a
+ * <code>MessageFormat</code> for displaying grammatically correct
+ * plurals such as &quot;There are 2 files.&quot;</p>
+ *
+ * <p>There are two methods of defining a <code>ChoiceFormat</code>; both
+ * are equivalent. The first is by using a string pattern. This is the
+ * preferred method in most cases. The second method is through direct
+ * specification of the arrays that make up the
+ * <code>ChoiceFormat</code>.</p>
+ *
+ * <p><strong>Patterns</strong></p>
+ *
+ * <p>In most cases, the preferred way to define a
+ * <code>ChoiceFormat</code> is with a pattern. Here is an example of a
+ * <code>ChoiceFormat</code> pattern:</p>
+ *
+ * \htmlonly<pre> 0&#x2264;are no files|1&#x2264;is one file|1&lt;are many files</pre>\endhtmlonly
+ *
+ * <p>or equivalently,</p>
+ *
+ * \htmlonly<pre> 0#are no files|1#is one file|1&lt;are many files</pre>\endhtmlonly
+ *
+ * <p>The pattern consists of a number or <em>range specifiers</em>
+ * separated by vertical bars '|' (U+007C). There is no
+ * vertical bar after the last range. Each range specifier is of the
+ * form:</p>
+ *
+ * \htmlonly<blockquote><em>Number Separator String</em></blockquote>\endhtmlonly
+ *
+ * <p><em>Number</em> is a floating point number that can be parsed by a
+ * default <code>NumberFormat</code> for the US locale. It gives the
+ * lower limit of this range. The lower limit is either inclusive or
+ * exclusive, depending on the <em>separator</em>. The upper limit is
+ * given by the lower limit of the next range. The Unicode infinity
+ * sign \htmlonly&#x221E \endhtmlonly (U+221E) is recognized for positive infinity. It may be preceded by
+ * '-' (U+002D) to indicate negative infinity.</p>
+ *
+ * <p><em>String</em> is the format string for this range, with special
+ * characters enclosed in single quotes (<code>'The #
+ * sign'</code>). Single quotes themselves are indicated by two single
+ * quotes in a row (<code>'o''clock'</code>).</p>
+ *
+ * <p><em>Separator</em> is one of the following single characters:
+ *
+ * <ul>
+ * <li>\htmlonly'&#x2264;' \endhtmlonly (U+2264) or '#' (U+0023)
+ * indicates that the lower limit given by <em>Number</em> is
+ * inclusive. (The two characters are equivalent to ChoiceFormat.)
+ * This means that the limit value <em>Number</em> belongs to this
+ * range. Another way of saying this is that the corresponding
+ * closure is <code>FALSE</code>.</li>
+ *
+ * <li>'<' (U+003C) indicates that the lower limit given by
+ * <em>Number</em> is exclusive. This means that the value
+ * <em>Number</em> belongs to the prior range.</li> Another way of
+ * saying this is that the corresponding closure is
+ * <code>TRUE</code>.
+ * </ul>
+ *
+ * <p>See below for more information about closures.</p>
+ *
+ * <p><strong>Arrays</strong></p>
+ *
+ * <p>A <code>ChoiceFormat</code> defining <code>n</code> intervals
+ * (<code>n</code> &gt;= 2) is specified by three arrays of
+ * <code>n</code> items:
+ *
+ * <ul>
+ * <li><code>double limits[]</code> gives the start of each
+ * interval. This must be a non-decreasing list of values, none of
+ * which may be <code>NaN</code>.</li>
+ * <li><code>UBool closures[]</code> determines whether each limit
+ * value is contained in the interval below it or in the interval
+ * above it. If <code>closures[i]</code> is <code>FALSE</code>, then
+ * <code>limits[i]</code> is a member of interval
+ * <code>i</code>. Otherwise it is a member of interval
+ * <code>i+1</code>. If no closures array is specified, this is
+ * equivalent to having all closures be <code>FALSE</code>. Closures
+ * allow one to specify half-open, open, or closed intervals.</li>
+ * <li><code>UnicodeString formats[]</code> gives the string label
+ * associated with each interval.</li>
+ * </ul>
+ *
+ * <p><strong>Formatting and Parsing</strong></p>
+ *
+ * <p>During formatting, a number is converted to a
+ * string. <code>ChoiceFormat</code> accomplishes this by mapping the
+ * number to an interval using the following rule. Given a number
+ * <code>X</code> and and index value <code>j</code> in the range
+ * <code>0..n-1</code>, where <code>n</code> is the number of ranges:</p>
+ *
+ * \htmlonly<blockquote>\endhtmlonly<code>X</code> matches <code>j</code> if and only if
+ * <code>limit[j] &lt;= X &lt; limit[j+1]</code>
+ * \htmlonly</blockquote>\endhtmlonly
+ *
+ * <p>(This assumes that all closures are <code>FALSE</code>. If some
+ * closures are <code>TRUE</code> then the relations must be changed to
+ * <code>&lt;=</code> or <code>&lt;</code> as appropriate.) If there is
+ * no match, then either the first or last index is used, depending on
+ * whether the number is too low or too high. Once a number is mapped to
+ * an interval <code>j</code>, the string <code>formats[j]</code> is
+ * output.</p>
+ *
+ * <p>During parsing, a string is converted to a
+ * number. <code>ChoiceFormat</code> finds the element
+ * <code>formats[j]</code> equal to the string, and returns
+ * <code>limits[j]</code> as the parsed value.</p>
+ *
+ * <p><strong>Notes</strong></p>
+ *
+ * <p>The first limit value does not define a range boundary. For
+ * example, in the pattern \htmlonly&quot;<code>1.0#a|2.0#b</code>&quot;\endhtmlonly, the
+ * intervals are [-Inf, 2.0) and [2.0, +Inf]. It appears that the first
+ * interval should be [1.0, 2.0). However, since all values that are too
+ * small are mapped to range zero, the first interval is effectively
+ * [-Inf, 2.0). However, the first limit value <em>is</em> used during
+ * formatting. In this example, <code>parse(&quot;a&quot;)</code> returns
+ * 1.0.</p>
+ *
+ * <p>There are no gaps between intervals and the entire number line is
+ * covered. A <code>ChoiceFormat</code> maps <em>all</em> possible
+ * double values to a finite set of intervals.</p>
+ *
+ * <p>The non-number <code>NaN</code> is mapped to interval zero during
+ * formatting.</p>
+ *
+ * <p><strong>Examples</strong></p>
+ *
+ * <p>Here is an example of two arrays that map the number
+ * <code>1..7</code> to the English day of the week abbreviations
+ * <code>Sun..Sat</code>. No closures array is given; this is the same as
+ * specifying all closures to be <code>FALSE</code>.</p>
+ *
+ * <pre> {1,2,3,4,5,6,7},
+ * {&quot;Sun&quot;,&quot;Mon&quot;,&quot;Tue&quot;,&quot;Wed&quot;,&quot;Thur&quot;,&quot;Fri&quot;,&quot;Sat&quot;}</pre>
+ *
+ * <p>Here is an example that maps the ranges [-Inf, 1), [1, 1], and (1,
+ * +Inf] to three strings. That is, the number line is split into three
+ * ranges: x &lt; 1.0, x = 1.0, and x &gt; 1.0.</p>
+ *
+ * <pre> {0, 1, 1},
+ * {FALSE, FALSE, TRUE},
+ * {&quot;no files&quot;, &quot;one file&quot;, &quot;many files&quot;}</pre>
+ *
+ * <p>Here is a simple example that shows formatting and parsing: </p>
+ *
+ * \code
+ * #include <unicode/choicfmt.h>
+ * #include <unicode/unistr.h>
+ * #include <iostream.h>
+ *
+ * int main(int argc, char *argv[]) {
+ * double limits[] = {1,2,3,4,5,6,7};
+ * UnicodeString monthNames[] = {
+ * "Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
+ * ChoiceFormat fmt(limits, monthNames, 7);
+ * UnicodeString str;
+ * char buf[256];
+ * for (double x = 1.0; x <= 8.0; x += 1.0) {
+ * fmt.format(x, str);
+ * str.extract(0, str.length(), buf, 256, "");
+ * str.truncate(0);
+ * cout << x << " -> "
+ * << buf << endl;
+ * }
+ * cout << endl;
+ * return 0;
+ * }
+ * \endcode
+ *
+ * <p>Here is a more complex example using a <code>ChoiceFormat</code>
+ * constructed from a pattern together with a
+ * <code>MessageFormat</code>.</p>
+ *
+ * \code
+ * #include <unicode/choicfmt.h>
+ * #include <unicode/msgfmt.h>
+ * #include <unicode/unistr.h>
+ * #include <iostream.h>
+ *
+ * int main(int argc, char *argv[]) {
+ * UErrorCode status = U_ZERO_ERROR;
+ * double filelimits[] = {0,1,2};
+ * UnicodeString filepart[] =
+ * {"are no files","is one file","are {0} files"};
+ * ChoiceFormat* fileform = new ChoiceFormat(filelimits, filepart, 3 );
+ * Format* testFormats[] =
+ * {fileform, NULL, NumberFormat::createInstance(status)};
+ * MessageFormat pattform("There {0} on {1}", status );
+ * pattform.adoptFormats(testFormats, 3);
+ * Formattable testArgs[] = {0L, "Disk A"};
+ * FieldPosition fp(0);
+ * UnicodeString str;
+ * char buf[256];
+ * for (int32_t i = 0; i < 4; ++i) {
+ * Formattable fInt(i);
+ * testArgs[0] = fInt;
+ * pattform.format(testArgs, 2, str, fp, status );
+ * str.extract(0, str.length(), buf, "");
+ * str.truncate(0);
+ * cout << "Output for i=" << i << " : " << buf << endl;
+ * }
+ * cout << endl;
+ * return 0;
+ * }
+ * \endcode
+ *
+ * <p><em>User subclasses are not supported.</em> While clients may write
+ * subclasses, such code will not necessarily work and will not be
+ * guaranteed to work stably from release to release.
+ */
+class U_I18N_API ChoiceFormat: public NumberFormat {
+public:
+ /**
+ * Construct a new ChoiceFormat with the limits and the corresponding formats
+ * based on the pattern.
+ *
+ * @param pattern Pattern used to construct object.
+ * @param status Output param to receive success code. If the
+ * pattern cannot be parsed, set to failure code.
+ * @stable ICU 2.0
+ */
+ ChoiceFormat(const UnicodeString& pattern,
+ UErrorCode& status);
+
+
+ /**
+ * Construct a new ChoiceFormat with the given limits and formats. Copy
+ * the limits and formats instead of adopting them.
+ *
+ * @param limits Array of limit values.
+ * @param formats Array of formats.
+ * @param count Size of 'limits' and 'formats' arrays.
+ * @stable ICU 2.0
+ */
+
+ ChoiceFormat(const double* limits,
+ const UnicodeString* formats,
+ int32_t count );
+
+ /**
+ * Construct a new ChoiceFormat with the given limits and formats.
+ * Copy the limits and formats (instead of adopting them). By
+ * default, each limit in the array specifies the inclusive lower
+ * bound of its range, and the exclusive upper bound of the previous
+ * range. However, if the isLimitOpen element corresponding to a
+ * limit is TRUE, then the limit is the exclusive lower bound of its
+ * range, and the inclusive upper bound of the previous range.
+ * @param limits Array of limit values
+ * @param closures Array of booleans specifying whether each
+ * element of 'limits' is open or closed. If FALSE, then the
+ * corresponding limit is a member of the range above it. If TRUE,
+ * then the limit belongs to the range below it.
+ * @param formats Array of formats
+ * @param count Size of 'limits', 'closures', and 'formats' arrays
+ * @stable ICU 2.4
+ */
+ ChoiceFormat(const double* limits,
+ const UBool* closures,
+ const UnicodeString* formats,
+ int32_t count);
+
+ /**
+ * Copy constructor.
+ *
+ * @param that ChoiceFormat object to be copied from
+ * @stable ICU 2.0
+ */
+ ChoiceFormat(const ChoiceFormat& that);
+
+ /**
+ * Assignment operator.
+ *
+ * @param that ChoiceFormat object to be copied
+ * @stable ICU 2.0
+ */
+ const ChoiceFormat& operator=(const ChoiceFormat& that);
+
+ /**
+ * Destructor.
+ * @stable ICU 2.0
+ */
+ virtual ~ChoiceFormat();
+
+ /**
+ * Clone this Format object polymorphically. The caller owns the
+ * result and should delete it when done.
+ *
+ * @return a copy of this object
+ * @stable ICU 2.0
+ */
+ virtual Format* clone(void) const;
+
+ /**
+ * Return true if the given Format objects are semantically equal.
+ * Objects of different subclasses are considered unequal.
+ *
+ * @param other ChoiceFormat object to be compared
+ * @return true if other is the same as this.
+ * @stable ICU 2.0
+ */
+ virtual UBool operator==(const Format& other) const;
+
+ /**
+ * Sets the pattern.
+ * @param pattern The pattern to be applied.
+ * @param status Output param set to success/failure code on
+ * exit. If the pattern is invalid, this will be
+ * set to a failure result.
+ * @stable ICU 2.0
+ */
+ virtual void applyPattern(const UnicodeString& pattern,
+ UErrorCode& status);
+
+ /**
+ * Sets the pattern.
+ * @param pattern The pattern to be applied.
+ * @param parseError Struct to recieve information on position
+ * of error if an error is encountered
+ * @param status Output param set to success/failure code on
+ * exit. If the pattern is invalid, this will be
+ * set to a failure result.
+ * @stable ICU 2.0
+ */
+ virtual void applyPattern(const UnicodeString& pattern,
+ UParseError& parseError,
+ UErrorCode& status);
+ /**
+ * Gets the pattern.
+ *
+ * @param pattern Output param which will recieve the pattern
+ * Previous contents are deleted.
+ * @return A reference to 'pattern'
+ * @stable ICU 2.0
+ */
+ virtual UnicodeString& toPattern(UnicodeString &pattern) const;
+
+ /**
+ * Set the choices to be used in formatting.
+ *
+ * @param limitsToCopy Contains the top value that you want
+ * parsed with that format,and should be in
+ * ascending sorted order. When formatting X,
+ * the choice will be the i, where limit[i]
+ * &lt;= X &lt; limit[i+1].
+ * @param formatsToCopy The format strings you want to use for each limit.
+ * @param count The size of the above arrays.
+ * @stable ICU 2.0
+ */
+ virtual void setChoices(const double* limitsToCopy,
+ const UnicodeString* formatsToCopy,
+ int32_t count );
+
+ /**
+ * Set the choices to be used in formatting. See class description
+ * for documenatation of the limits, closures, and formats arrays.
+ * @param limits Array of limits
+ * @param closures Array of limit booleans
+ * @param formats Array of format string
+ * @param count The size of the above arrays
+ * @stable ICU 2.4
+ */
+ virtual void setChoices(const double* limits,
+ const UBool* closures,
+ const UnicodeString* formats,
+ int32_t count);
+
+ /**
+ * Get the limits passed in the constructor.
+ *
+ * @param count The size of the limits arrays
+ * @return the limits.
+ * @stable ICU 2.0
+ */
+ virtual const double* getLimits(int32_t& count) const;
+
+ /**
+ * Get the limit booleans passed in the constructor. The caller
+ * must not delete the result.
+ *
+ * @param count The size of the arrays
+ * @return the closures
+ * @stable ICU 2.4
+ */
+ virtual const UBool* getClosures(int32_t& count) const;
+
+ /**
+ * Get the formats passed in the constructor.
+ *
+ * @param count The size of the arrays
+ * @return the formats.
+ * @stable ICU 2.0
+ */
+ virtual const UnicodeString* getFormats(int32_t& count) const;
+
+ /**
+ * Format a double or long number using this object's choices.
+ *
+ * @param number The value to be formatted.
+ * @param appendTo Output parameter to receive result.
+ * Result is appended to existing contents.
+ * @param pos On input: an alignment field, if desired.
+ * On output: the offsets of the alignment field.
+ * @return Reference to 'appendTo' parameter.
+ * @stable ICU 2.0
+ */
+ virtual UnicodeString& format(double number,
+ UnicodeString& appendTo,
+ FieldPosition& pos) const;
+ /**
+ * Format a int_32t number using this object's choices.
+ *
+ * @param number The value to be formatted.
+ * @param appendTo Output parameter to receive result.
+ * Result is appended to existing contents.
+ * @param pos On input: an alignment field, if desired.
+ * On output: the offsets of the alignment field.
+ * @return Reference to 'appendTo' parameter.
+ * @stable ICU 2.0
+ */
+ virtual UnicodeString& format(int32_t number,
+ UnicodeString& appendTo,
+ FieldPosition& pos) const;
+
+ /**
+ * Format an int64_t number using this object's choices.
+ *
+ * @param number The value to be formatted.
+ * @param appendTo Output parameter to receive result.
+ * Result is appended to existing contents.
+ * @param pos On input: an alignment field, if desired.
+ * On output: the offsets of the alignment field.
+ * @return Reference to 'appendTo' parameter.
+ * @stable ICU 2.8
+ */
+ virtual UnicodeString& format(int64_t number,
+ UnicodeString& appendTo,
+ FieldPosition& pos) const;
+
+ /**
+ * Format an array of objects using this object's choices.
+ *
+ * @param objs The array of objects to be formatted.
+ * @param cnt The size of objs.
+ * @param appendTo Output parameter to receive result.
+ * Result is appended to existing contents.
+ * @param pos On input: an alignment field, if desired.
+ * On output: the offsets of the alignment field.
+ * @param success Output param set to success/failure code on
+ * exit.
+ * @return Reference to 'appendTo' parameter.
+ * @stable ICU 2.0
+ */
+ virtual UnicodeString& format(const Formattable* objs,
+ int32_t cnt,
+ UnicodeString& appendTo,
+ FieldPosition& pos,
+ UErrorCode& success) const;
+ /**
+ * Format an object using this object's choices.
+ *
+ *
+ * @param obj The object to be formatted.
+ * @param appendTo Output parameter to receive result.
+ * Result is appended to existing contents.
+ * @param pos On input: an alignment field, if desired.
+ * On output: the offsets of the alignment field.
+ * @param status Output param set to success/failure code on
+ * exit.
+ * @return Reference to 'appendTo' parameter.
+ * @stable ICU 2.0
+ */
+ virtual UnicodeString& format(const Formattable& obj,
+ UnicodeString& appendTo,
+ FieldPosition& pos,
+ UErrorCode& status) const;
+
+ /**
+ * Redeclared NumberFormat method.
+ *
+ * @param obj The object to be formatted.
+ * @param appendTo Output parameter to receive result.
+ * Result is appended to existing contents.
+ * @param status Output param set to success/failure code on
+ * exit.
+ * @return Reference to 'appendTo' parameter.
+ * @stable ICU 2.0
+ */
+ UnicodeString& format(const Formattable& obj,
+ UnicodeString& appendTo,
+ UErrorCode& status) const;
+
+ /**
+ * Redeclared NumberFormat method.
+ * Format a double number. These methods call the NumberFormat
+ * pure virtual format() methods with the default FieldPosition.
+ *
+ * @param number The value to be formatted.
+ * @param appendTo Output parameter to receive result.
+ * Result is appended to existing contents.
+ * @return Reference to 'appendTo' parameter.
+ * @stable ICU 2.0
+ */
+ UnicodeString& format( double number,
+ UnicodeString& appendTo) const;
+
+ /**
+ * Redeclared NumberFormat method.
+ * Format a long number. These methods call the NumberFormat
+ * pure virtual format() methods with the default FieldPosition.
+ *
+ * @param number The value to be formatted.
+ * @param appendTo Output parameter to receive result.
+ * Result is appended to existing contents.
+ * @return Reference to 'appendTo' parameter.
+ * @stable ICU 2.0
+ */
+ UnicodeString& format( int32_t number,
+ UnicodeString& appendTo) const;
+
+ /**
+ * Return a long if possible (e.g. within range LONG_MAX,
+ * LONG_MAX], and with no decimals), otherwise a double. If
+ * IntegerOnly is set, will stop at a decimal point (or equivalent;
+ * e.g. for rational numbers "1 2/3", will stop after the 1).
+ * <P>
+ * If no object can be parsed, parsePosition is unchanged, and NULL is
+ * returned.
+ *
+ * @param text The text to be parsed.
+ * @param result Formattable to be set to the parse result.
+ * If parse fails, return contents are undefined.
+ * @param parsePosition The position to start parsing at on input.
+ * On output, moved to after the last successfully
+ * parse character. On parse failure, does not change.
+ * @see NumberFormat::isParseIntegerOnly
+ * @stable ICU 2.0
+ */
+ virtual void parse(const UnicodeString& text,
+ Formattable& result,
+ ParsePosition& parsePosition) const;
+
+ /**
+ * Return a long if possible (e.g. within range LONG_MAX,
+ * LONG_MAX], and with no decimals), otherwise a double. If
+ * IntegerOnly is set, will stop at a decimal point (or equivalent;
+ * e.g. for rational numbers "1 2/3", will stop after the 1).
+ * <P>
+ * If no object can be parsed, parsePosition is unchanged, and NULL is
+ * returned.
+ *
+ * @param text The text to be parsed.
+ * @param result Formattable to be set to the parse result.
+ * If parse fails, return contents are undefined.
+ * @param status Output param with the formatted string.
+ * @see NumberFormat::isParseIntegerOnly
+ * @stable ICU 2.0
+ */
+ virtual void parse(const UnicodeString& text,
+ Formattable& result,
+ UErrorCode& status) const;
+
+
+public:
+ /**
+ * Returns a unique class ID POLYMORPHICALLY. Pure virtual override.
+ * This method is to implement a simple version of RTTI, since not all
+ * C++ compilers support genuine RTTI. Polymorphic operator==() and
+ * clone() methods call this method.
+ *
+ * @return The class ID for this object. All objects of a
+ * given class have the same class ID. Objects of
+ * other classes have different class IDs.
+ * @stable ICU 2.0
+ */
+ virtual UClassID getDynamicClassID(void) const;
+
+ /**
+ * Return the class ID for this class. This is useful only for
+ * comparing to a return value from getDynamicClassID(). For example:
+ * <pre>
+ * . Base* polymorphic_pointer = createPolymorphicObject();
+ * . if (polymorphic_pointer->getDynamicClassID() ==
+ * . Derived::getStaticClassID()) ...
+ * </pre>
+ * @return The class ID for all objects of this class.
+ * @stable ICU 2.0
+ */
+ static UClassID U_EXPORT2 getStaticClassID(void);
+
+private:
+ // static cache management (thread-safe)
+ // static NumberFormat* getNumberFormat(UErrorCode &status); // call this function to 'check out' a numberformat from the cache.
+ // static void releaseNumberFormat(NumberFormat *adopt); // call this function to 'return' the number format to the cache.
+
+ /**
+ * Converts a string to a double value using a default NumberFormat object
+ * which is static (shared by all ChoiceFormat instances).
+ * @param string the string to be converted with.
+ * @return the converted double number.
+ */
+ static double stod(const UnicodeString& string);
+
+ /**
+ * Converts a double value to a string using a default NumberFormat object
+ * which is static (shared by all ChoiceFormat instances).
+ * @param value the double number to be converted with.
+ * @param string the result string.
+ * @return the converted string.
+ */
+ static UnicodeString& dtos(double value, UnicodeString& string);
+
+ ChoiceFormat(); // default constructor not implemented
+
+ /**
+ * Construct a new ChoiceFormat with the limits and the corresponding formats
+ * based on the pattern.
+ *
+ * @param newPattern Pattern used to construct object.
+ * @param parseError Struct to recieve information on position
+ * of error if an error is encountered.
+ * @param status Output param to receive success code. If the
+ * pattern cannot be parsed, set to failure code.
+ * @stable ICU 2.0
+ */
+ ChoiceFormat(const UnicodeString& newPattern,
+ UParseError& parseError,
+ UErrorCode& status);
+
+ friend class MessageFormat;
+ /**
+ * Each ChoiceFormat divides the range -Inf..+Inf into fCount
+ * intervals. The intervals are:
+ *
+ * 0: fChoiceLimits[0]..fChoiceLimits[1]
+ * 1: fChoiceLimits[1]..fChoiceLimits[2]
+ * ...
+ * fCount-2: fChoiceLimits[fCount-2]..fChoiceLimits[fCount-1]
+ * fCount-1: fChoiceLimits[fCount-1]..+Inf
+ *
+ * Interval 0 is special; during formatting (mapping numbers to
+ * strings), it also contains all numbers less than
+ * fChoiceLimits[0], as well as NaN values.
+ *
+ * Interval i maps to and from string fChoiceFormats[i]. When
+ * parsing (mapping strings to numbers), then intervals map to
+ * their lower limit, that is, interval i maps to fChoiceLimit[i].
+ *
+ * The intervals may be closed, half open, or open. This affects
+ * formatting but does not affect parsing. Interval i is affected
+ * by fClosures[i] and fClosures[i+1]. If fClosures[i]
+ * is FALSE, then the value fChoiceLimits[i] is in interval i.
+ * That is, intervals i and i are:
+ *
+ * i-1: ... x < fChoiceLimits[i]
+ * i: fChoiceLimits[i] <= x ...
+ *
+ * If fClosures[i] is TRUE, then the value fChoiceLimits[i] is
+ * in interval i-1. That is, intervals i-1 and i are:
+ *
+ * i-1: ... x <= fChoiceLimits[i]
+ * i: fChoiceLimits[i] < x ...
+ *
+ * Because of the nature of interval 0, fClosures[0] has no
+ * effect.
+
+ */
+ double* fChoiceLimits;
+ UBool* fClosures;
+ UnicodeString* fChoiceFormats;
+ int32_t fCount;
+};
+
+inline UnicodeString&
+ChoiceFormat::format(const Formattable& obj,
+ UnicodeString& appendTo,
+ UErrorCode& status) const {
+ // Don't use Format:: - use immediate base class only,
+ // in case immediate base modifies behavior later.
+ return NumberFormat::format(obj, appendTo, status);
+}
+
+inline UnicodeString&
+ChoiceFormat::format(double number,
+ UnicodeString& appendTo) const {
+ return NumberFormat::format(number, appendTo);
+}
+
+inline UnicodeString&
+ChoiceFormat::format(int32_t number,
+ UnicodeString& appendTo) const {
+ return NumberFormat::format(number, appendTo);
+}
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif // _CHOICFMT
+//eof
View
400 icu-1060/unicode/coleitr.h
@@ -0,0 +1,400 @@
+/*
+ ******************************************************************************
+ * Copyright (C) 1997-2008, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ ******************************************************************************
+ */
+
+/**
+ * \file
+ * \brief C++ API: Collation Element Iterator.
+ */
+
+/**
+* File coleitr.h
+*
+*
+*
+* Created by: Helena Shih
+*
+* Modification History:
+*
+* Date Name Description
+*
+* 8/18/97 helena Added internal API documentation.
+* 08/03/98 erm Synched with 1.2 version CollationElementIterator.java
+* 12/10/99 aliu Ported Thai collation support from Java.
+* 01/25/01 swquek Modified into a C++ wrapper calling C APIs (ucoliter.h)
+* 02/19/01 swquek Removed CollationElementsIterator() since it is
+* private constructor and no calls are made to it
+*/
+
+#ifndef COLEITR_H
+#define COLEITR_H
+
+#include "unicode/utypes.h"
+
+
+#if !UCONFIG_NO_COLLATION
+
+#include "unicode/uobject.h"
+#include "unicode/tblcoll.h"
+#include "unicode/ucoleitr.h"
+
+/**
+ * The UCollationElements struct.
+ * For usage in C programs.
+ * @stable ICU 2.0
+ */
+typedef struct UCollationElements UCollationElements;
+
+U_NAMESPACE_BEGIN
+
+/**
+* The CollationElementIterator class is used as an iterator to walk through
+* each character of an international string. Use the iterator to return the
+* ordering priority of the positioned character. The ordering priority of a
+* character, which we refer to as a key, defines how a character is collated in
+* the given collation object.
+* For example, consider the following in Spanish:
+* <pre>
+* "ca" -> the first key is key('c') and second key is key('a').
+* "cha" -> the first key is key('ch') and second key is key('a').</pre>
+* And in German,
+* <pre> \htmlonly "&#x00E6;b"-> the first key is key('a'), the second key is key('e'), and
+* the third key is key('b'). \endhtmlonly </pre>
+* The key of a character, is an integer composed of primary order(short),
+* secondary order(char), and tertiary order(char). Java strictly defines the
+* size and signedness of its primitive data types. Therefore, the static
+* functions primaryOrder(), secondaryOrder(), and tertiaryOrder() return
+* int32_t to ensure the correctness of the key value.
+* <p>Example of the iterator usage: (without error checking)
+* <pre>
+* \code
+* void CollationElementIterator_Example()
+* {
+* UnicodeString str = "This is a test";
+* UErrorCode success = U_ZERO_ERROR;
+* RuleBasedCollator* rbc =
+* (RuleBasedCollator*) RuleBasedCollator::createInstance(success);
+* CollationElementIterator* c =
+* rbc->createCollationElementIterator( str );
+* int32_t order = c->next(success);
+* c->reset();
+* order = c->previous(success);
+* delete c;
+* delete rbc;
+* }
+* \endcode
+* </pre>
+* <p>
+* CollationElementIterator::next returns the collation order of the next
+* character based on the comparison level of the collator.
+* CollationElementIterator::previous returns the collation order of the
+* previous character based on the comparison level of the collator.
+* The Collation Element Iterator moves only in one direction between calls to
+* CollationElementIterator::reset. That is, CollationElementIterator::next()
+* and CollationElementIterator::previous can not be inter-used. Whenever
+* CollationElementIterator::previous is to be called after
+* CollationElementIterator::next() or vice versa,
+* CollationElementIterator::reset has to be called first to reset the status,
+* shifting pointers to either the end or the start of the string. Hence at the
+* next call of CollationElementIterator::previous or
+* CollationElementIterator::next(), the first or last collation order will be
+* returned.
+* If a change of direction is done without a CollationElementIterator::reset(),
+* the result is undefined.
+* The result of a forward iterate (CollationElementIterator::next) and
+* reversed result of the backward iterate (CollationElementIterator::previous)
+* on the same string are equivalent, if collation orders with the value
+* UCOL_IGNORABLE are ignored.
+* Character based on the comparison level of the collator. A collation order
+* consists of primary order, secondary order and tertiary order. The data
+* type of the collation order is <strong>t_int32</strong>.
+*
+* Note, CollationElementIterator should not be subclassed.
+* @see Collator
+* @see RuleBasedCollator
+* @version 1.8 Jan 16 2001
+*/
+class U_I18N_API CollationElementIterator : public UObject {
+public:
+
+ // CollationElementIterator public data member ------------------------------
+
+ enum {
+ /**
+ * NULLORDER indicates that an error has occured while processing
+ * @stable ICU 2.0
+ */
+ NULLORDER = (int32_t)0xffffffff
+ };
+
+ // CollationElementIterator public constructor/destructor -------------------
+
+ /**
+ * Copy constructor.
+ *
+ * @param other the object to be copied from
+ * @stable ICU 2.0
+ */
+ CollationElementIterator(const CollationElementIterator& other);
+
+ /**
+ * Destructor
+ * @stable ICU 2.0
+ */
+ virtual ~CollationElementIterator();
+
+ // CollationElementIterator public methods ----------------------------------
+
+ /**
+ * Returns true if "other" is the same as "this"
+ *
+ * @param other the object to be compared
+ * @return true if "other" is the same as "this"
+ * @stable ICU 2.0
+ */
+ UBool operator==(const CollationElementIterator& other) const;
+
+ /**
+ * Returns true if "other" is not the same as "this".
+ *
+ * @param other the object to be compared
+ * @return true if "other" is not the same as "this"
+ * @stable ICU 2.0
+ */
+ UBool operator!=(const CollationElementIterator& other) const;
+
+ /**
+ * Resets the cursor to the beginning of the string.
+ * @stable ICU 2.0
+ */
+ void reset(void);
+
+ /**
+ * Gets the ordering priority of the next character in the string.
+ * @param status the error code status.
+ * @return the next character's ordering. otherwise returns NULLORDER if an
+ * error has occured or if the end of string has been reached
+ * @stable ICU 2.0
+ */
+ int32_t next(UErrorCode& status);
+
+ /**
+ * Get the ordering priority of the previous collation element in the string.
+ * @param status the error code status.
+ * @return the previous element's ordering. otherwise returns NULLORDER if an
+ * error has occured or if the start of string has been reached
+ * @stable ICU 2.0
+ */
+ int32_t previous(UErrorCode& status);
+
+ /**
+ * Gets the primary order of a collation order.
+ * @param order the collation order
+ * @return the primary order of a collation order.
+ * @stable ICU 2.0
+ */
+ static inline int32_t primaryOrder(int32_t order);
+
+ /**
+ * Gets the secondary order of a collation order.
+ * @param order the collation order
+ * @return the secondary order of a collation order.
+ * @stable ICU 2.0
+ */
+ static inline int32_t secondaryOrder(int32_t order);
+
+ /**
+ * Gets the tertiary order of a collation order.
+ * @param order the collation order
+ * @return the tertiary order of a collation order.
+ * @stable ICU 2.0
+ */
+ static inline int32_t tertiaryOrder(int32_t order);
+
+ /**
+ * Return the maximum length of any expansion sequences that end with the
+ * specified comparison order.
+ * @param order a collation order returned by previous or next.
+ * @return maximum size of the expansion sequences ending with the collation
+ * element or 1 if collation element does not occur at the end of any
+ * expansion sequence
+ * @stable ICU 2.0
+ */
+ int32_t getMaxExpansion(int32_t order) const;
+
+ /**
+ * Gets the comparison order in the desired strength. Ignore the other
+ * differences.
+ * @param order The order value
+ * @stable ICU 2.0
+ */
+ int32_t strengthOrder(int32_t order) const;
+
+ /**
+ * Sets the source string.
+ * @param str the source string.
+ * @param status the error code status.
+ * @stable ICU 2.0
+ */
+ void setText(const UnicodeString& str, UErrorCode& status);
+
+ /**
+ * Sets the source string.
+ * @param str the source character iterator.
+ * @param status the error code status.
+ * @stable ICU 2.0
+ */
+ void setText(CharacterIterator& str, UErrorCode& status);
+
+ /**
+ * Checks if a comparison order is ignorable.
+ * @param order the collation order.
+ * @return TRUE if a character is ignorable, FALSE otherwise.
+ * @stable ICU 2.0
+ */
+ static inline UBool isIgnorable(int32_t order);
+
+ /**
+ * Gets the offset of the currently processed character in the source string.
+ * @return the offset of the character.
+ * @stable ICU 2.0
+ */
+ int32_t getOffset(void) const;
+
+ /**
+ * Sets the offset of the currently processed character in the source string.
+ * @param newOffset the new offset.
+ * @param status the error code status.
+ * @return the offset of the character.
+ * @stable ICU 2.0
+ */
+ void setOffset(int32_t newOffset, UErrorCode& status);
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ *
+ * @stable ICU 2.2
+ */
+ virtual UClassID getDynamicClassID() const;
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ *
+ * @stable ICU 2.2
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+protected:
+
+ // CollationElementIterator protected constructors --------------------------
+ /**
+ * @stable ICU 2.0
+ */
+ friend class RuleBasedCollator;
+
+ /**
+ * CollationElementIterator constructor. This takes the source string and the
+ * collation object. The cursor will walk thru the source string based on the
+ * predefined collation rules. If the source string is empty, NULLORDER will
+ * be returned on the calls to next().
+ * @param sourceText the source string.
+ * @param order the collation object.
+ * @param status the error code status.
+ * @stable ICU 2.0
+ */
+ CollationElementIterator(const UnicodeString& sourceText,
+ const RuleBasedCollator* order, UErrorCode& status);
+
+ /**
+ * CollationElementIterator constructor. This takes the source string and the
+ * collation object. The cursor will walk thru the source string based on the
+ * predefined collation rules. If the source string is empty, NULLORDER will
+ * be returned on the calls to next().
+ * @param sourceText the source string.
+ * @param order the collation object.
+ * @param status the error code status.
+ * @stable ICU 2.0
+ */
+ CollationElementIterator(const CharacterIterator& sourceText,
+ const RuleBasedCollator* order, UErrorCode& status);
+
+ // CollationElementIterator protected methods -------------------------------
+
+ /**
+ * Assignment operator
+ *
+ * @param other the object to be copied
+ * @stable ICU 2.0
+ */
+ const CollationElementIterator&
+ operator=(const CollationElementIterator& other);
+
+private:
+ CollationElementIterator(); // default constructor not implemented
+
+ // CollationElementIterator private data members ----------------------------
+
+ /**
+ * Data wrapper for collation elements
+ */
+ UCollationElements *m_data_;
+
+ /**
+ * Indicates if m_data_ belongs to this object.
+ */
+ UBool isDataOwned_;
+
+};
+
+// CollationElementIterator inline method defination --------------------------
+
+/**
+* Get the primary order of a collation order.
+* @param order the collation order
+* @return the primary order of a collation order.
+*/
+inline int32_t CollationElementIterator::primaryOrder(int32_t order)
+{
+ order &= RuleBasedCollator::PRIMARYORDERMASK;
+ return (order >> RuleBasedCollator::PRIMARYORDERSHIFT);
+}
+
+/**
+* Get the secondary order of a collation order.
+* @param order the collation order
+* @return the secondary order of a collation order.
+*/
+inline int32_t CollationElementIterator::secondaryOrder(int32_t order)
+{
+ order = order & RuleBasedCollator::SECONDARYORDERMASK;
+ return (order >> RuleBasedCollator::SECONDARYORDERSHIFT);
+}
+
+/**
+* Get the tertiary order of a collation order.
+* @param order the collation order
+* @return the tertiary order of a collation order.
+*/
+inline int32_t CollationElementIterator::tertiaryOrder(int32_t order)
+{
+ return (order &= RuleBasedCollator::TERTIARYORDERMASK);
+}
+
+inline int32_t CollationElementIterator::getMaxExpansion(int32_t order) const
+{
+ return ucol_getMaxExpansion(m_data_, (uint32_t)order);
+}
+
+inline UBool CollationElementIterator::isIgnorable(int32_t order)
+{
+ return (primaryOrder(order) == RuleBasedCollator::PRIMIGNORABLE);
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_COLLATION */
+
+#endif
View
1,035 icu-1060/unicode/coll.h
@@ -0,0 +1,1035 @@
+/*
+******************************************************************************
+* Copyright (C) 1996-2008, International Business Machines *
+* Corporation and others. All Rights Reserved. *
+******************************************************************************
+*/
+
+/**
+ * \file
+ * \brief C++ API: Collation Service.
+ */
+
+/**
+* File coll.h
+*
+* Created by: Helena Shih
+*
+* Modification History:
+*
+* Date Name Description
+* 02/5/97 aliu Modified createDefault to load collation data from
+* binary files when possible. Added related methods
+* createCollationFromFile, chopLocale, createPathName.
+* 02/11/97 aliu Added members addToCache, findInCache, and fgCache.
+* 02/12/97 aliu Modified to create objects from RuleBasedCollator cache.
+* Moved cache out of Collation class.
+* 02/13/97 aliu Moved several methods out of this class and into
+* RuleBasedCollator, with modifications. Modified
+* createDefault() to call new RuleBasedCollator(Locale&)
+* constructor. General clean up and documentation.
+* 02/20/97 helena Added clone, operator==, operator!=, operator=, copy
+* constructor and getDynamicClassID.
+* 03/25/97 helena Updated with platform independent data types.
+* 05/06/97 helena Added memory allocation error detection.
+* 06/20/97 helena Java class name change.
+* 09/03/97 helena Added createCollationKeyValues().
+* 02/10/98 damiba Added compare() with length as parameter.
+* 04/23/99 stephen Removed EDecompositionMode, merged with
+* Normalizer::EMode.
+* 11/02/99 helena Collator performance enhancements. Eliminates the
+* UnicodeString construction and special case for NO_OP.
+* 11/23/99 srl More performance enhancements. Inlining of
+* critical accessors.
+* 05/15/00 helena Added version information API.
+* 01/29/01 synwee Modified into a C++ wrapper which calls C apis
+* (ucoll.h).
+*/
+
+#ifndef COLL_H
+#define COLL_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION
+
+#include "unicode/uobject.h"
+#include "unicode/ucol.h"
+#include "unicode/normlzr.h"
+#include "unicode/locid.h"
+#include "unicode/uniset.h"
+#include "unicode/umisc.h"
+
+U_NAMESPACE_BEGIN
+
+class StringEnumeration;
+
+#if !UCONFIG_NO_SERVICE
+/**
+ * @stable ICU 2.6
+ */
+class CollatorFactory;
+#endif
+
+/**
+* @stable ICU 2.0
+*/
+class CollationKey;
+
+/**
+* The <code>Collator</code> class performs locale-sensitive string
+* comparison.<br>
+* You use this class to build searching and sorting routines for natural
+* language text.<br>
+* <em>Important: </em>The ICU collation service has been reimplemented
+* in order to achieve better performance and UCA compliance.
+* For details, see the
+* <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
+* collation design document</a>.
+* <p>
+* <code>Collator</code> is an abstract base class. Subclasses implement
+* specific collation strategies. One subclass,
+* <code>RuleBasedCollator</code>, is currently provided and is applicable
+* to a wide set of languages. Other subclasses may be created to handle more
+* specialized needs.
+* <p>
+* Like other locale-sensitive classes, you can use the static factory method,
+* <code>createInstance</code>, to obtain the appropriate
+* <code>Collator</code> object for a given locale. You will only need to
+* look at the subclasses of <code>Collator</code> if you need to
+* understand the details of a particular collation strategy or if you need to
+* modify that strategy.
+* <p>
+* The following example shows how to compare two strings using the
+* <code>Collator</code> for the default locale.
+* \htmlonly<blockquote>\endhtmlonly
+* <pre>
+* \code
+* // Compare two strings in the default locale
+* UErrorCode success = U_ZERO_ERROR;
+* Collator* myCollator = Collator::createInstance(success);
+* if (myCollator->compare("abc", "ABC") < 0)
+* cout << "abc is less than ABC" << endl;
+* else
+* cout << "abc is greater than or equal to ABC" << endl;
+* \endcode
+* </pre>
+* \htmlonly</blockquote>\endhtmlonly
+* <p>
+* You can set a <code>Collator</code>'s <em>strength</em> property to
+* determine the level of difference considered significant in comparisons.
+* Five strengths are provided: <code>PRIMARY</code>, <code>SECONDARY</code>,
+* <code>TERTIARY</code>, <code>QUATERNARY</code> and <code>IDENTICAL</code>.
+* The exact assignment of strengths to language features is locale dependant.
+* For example, in Czech, "e" and "f" are considered primary differences,
+* while "e" and "\u00EA" are secondary differences, "e" and "E" are tertiary
+* differences and "e" and "e" are identical. The following shows how both case
+* and accents could be ignored for US English.
+* \htmlonly<blockquote>\endhtmlonly
+* <pre>
+* \code
+* //Get the Collator for US English and set its strength to PRIMARY
+* UErrorCode success = U_ZERO_ERROR;
+* Collator* usCollator = Collator::createInstance(Locale::US, success);
+* usCollator->setStrength(Collator::PRIMARY);
+* if (usCollator->compare("abc", "ABC") == 0)
+* cout << "'abc' and 'ABC' strings are equivalent with strength PRIMARY" << endl;
+* \endcode
+* </pre>
+* \htmlonly</blockquote>\endhtmlonly
+* <p>
+* For comparing strings exactly once, the <code>compare</code> method
+* provides the best performance. When sorting a list of strings however, it
+* is generally necessary to compare each string multiple times. In this case,
+* sort keys provide better performance. The <code>getSortKey</code> methods
+* convert a string to a series of bytes that can be compared bitwise against
+* other sort keys using <code>strcmp()</code>. Sort keys are written as
+* zero-terminated byte strings. They consist of several substrings, one for
+* each collation strength level, that are delimited by 0x01 bytes.
+* If the string code points are appended for UCOL_IDENTICAL, then they are
+* processed for correct code point order comparison and may contain 0x01
+* bytes but not zero bytes.
+* </p>
+* <p>
+* An older set of APIs returns a <code>CollationKey</code> object that wraps
+* the sort key bytes instead of returning the bytes themselves.
+* Its use is deprecated, but it is still available for compatibility with
+* Java.
+* </p>
+* <p>
+* <strong>Note:</strong> <code>Collator</code>s with different Locale,
+* and CollationStrength settings will return different sort
+* orders for the same set of strings. Locales have specific collation rules,
+* and the way in which secondary and tertiary differences are taken into
+* account, for example, will result in a different sorting order for same
+* strings.
+* </p>
+* @see RuleBasedCollator
+* @see CollationKey
+* @see CollationElementIterator
+* @see Locale
+* @see Normalizer
+* @version 2.0 11/15/01
+*/
+
+class U_I18N_API Collator : public UObject {
+public:
+
+ // Collator public enums -----------------------------------------------
+
+ /**
+ * Base letter represents a primary difference. Set comparison level to
+ * PRIMARY to ignore secondary and tertiary differences.<br>
+ * Use this to set the strength of a Collator object.<br>
+ * Example of primary difference, "abc" &lt; "abd"
+ *
+ * Diacritical differences on the same base letter represent a secondary
+ * difference. Set comparison level to SECONDARY to ignore tertiary
+ * differences. Use this to set the strength of a Collator object.<br>
+ * Example of secondary difference, "&auml;" >> "a".
+ *
+ * Uppercase and lowercase versions of the same character represents a
+ * tertiary difference. Set comparison level to TERTIARY to include all
+ * comparison differences. Use this to set the strength of a Collator
+ * object.<br>
+ * Example of tertiary difference, "abc" &lt;&lt;&lt; "ABC".
+ *
+ * Two characters are considered "identical" when they have the same unicode
+ * spellings.<br>
+ * For example, "&auml;" == "&auml;".
+ *
+ * UCollationStrength is also used to determine the strength of sort keys
+ * generated from Collator objects.
+ * @stable ICU 2.0
+ */
+ enum ECollationStrength
+ {
+ PRIMARY = 0,
+ SECONDARY = 1,
+ TERTIARY = 2,
+ QUATERNARY = 3,
+ IDENTICAL = 15
+ };
+
+ /**
+ * LESS is returned if source string is compared to be less than target
+ * string in the compare() method.
+ * EQUAL is returned if source string is compared to be equal to target
+ * string in the compare() method.
+ * GREATER is returned if source string is compared to be greater than
+ * target string in the compare() method.
+ * @see Collator#compare
+ * @deprecated ICU 2.6. Use C enum UCollationResult defined in ucol.h
+ */
+ enum EComparisonResult
+ {
+ LESS = -1,
+ EQUAL = 0,
+ GREATER = 1
+ };
+
+ // Collator public destructor -----------------------------------------
+
+ /**
+ * Destructor
+ * @stable ICU 2.0
+ */
+ virtual ~Collator();
+
+ // Collator public methods --------------------------------------------
+
+ /**
+ * Returns true if "other" is the same as "this"
+ * @param other Collator object to be compared
+ * @return true if other is the same as this.
+ * @stable ICU 2.0
+ */
+ virtual UBool operator==(const Collator& other) const;
+
+ /**
+ * Returns true if "other" is not the same as "this".
+ * @param other Collator object to be compared
+ * @return true if other is not the same as this.
+ * @stable ICU 2.0
+ */
+ virtual UBool operator!=(const Collator& other) const;
+
+ /**
+ * Makes a shallow copy of the current object.
+ * @return a copy of this object
+ * @stable ICU 2.0
+ */
+ virtual Collator* clone(void) const = 0;
+
+ /**
+ * Creates the Collator object for the current default locale.
+ * The default locale is determined by Locale::getDefault.
+ * The UErrorCode& err parameter is used to return status information to the user.
+ * To check whether the construction succeeded or not, you should check the
+ * value of U_SUCCESS(err). If you wish more detailed information, you can
+ * check for informational error results which still indicate success.
+ * U_USING_FALLBACK_ERROR indicates that a fall back locale was used. For
+ * example, 'de_CH' was requested, but nothing was found there, so 'de' was
+ * used. U_USING_DEFAULT_ERROR indicates that the default locale data was
+ * used; neither the requested locale nor any of its fall back locales
+ * could be found.
+ * The caller owns the returned object and is responsible for deleting it.
+ *
+ * @param err the error code status.
+ * @return the collation object of the default locale.(for example, en_US)
+ * @see Locale#getDefault
+ * @stable ICU 2.0
+ */
+ static Collator* U_EXPORT2 createInstance(UErrorCode& err);
+
+ /**
+ * Gets the table-based collation object for the desired locale. The
+ * resource of the desired locale will be loaded by ResourceLoader.
+ * Locale::ENGLISH is the base collation table and all other languages are
+ * built on top of it with additional language-specific modifications.
+ * The UErrorCode& err parameter is used to return status information to the user.
+ * To check whether the construction succeeded or not, you should check
+ * the value of U_SUCCESS(err). If you wish more detailed information, you
+ * can check for informational error results which still indicate success.
+ * U_USING_FALLBACK_ERROR indicates that a fall back locale was used. For
+ * example, 'de_CH' was requested, but nothing was found there, so 'de' was
+ * used. U_USING_DEFAULT_ERROR indicates that the default locale data was
+ * used; neither the requested locale nor any of its fall back locales
+ * could be found.
+ * The caller owns the returned object and is responsible for deleting it.
+ * @param loc The locale ID for which to open a collator.
+ * @param err the error code status.
+ * @return the created table-based collation object based on the desired
+ * locale.
+ * @see Locale
+ * @see ResourceLoader
+ * @stable ICU 2.2
+ */
+ static Collator* U_EXPORT2 createInstance(const Locale& loc, UErrorCode& err);
+
+#ifdef U_USE_COLLATION_OBSOLETE_2_6
+ /**
+ * Create a Collator with a specific version.
+ * This is the same as createInstance(loc, err) except that getVersion() of
+ * the returned object is guaranteed to be the same as the version
+ * parameter.
+ * This is designed to be used to open the same collator for a given
+ * locale even when ICU is updated.
+ * The same locale and version guarantees the same sort keys and
+ * comparison results.
+ * <p>
+ * Note: this API will be removed in a future release. Use
+ * <tt>createInstance(const Locale&, UErrorCode&) instead.</tt></p>
+ *
+ * @param loc The locale ID for which to open a collator.
+ * @param version The requested collator version.
+ * @param err A reference to a UErrorCode,
+ * must not indicate a failure before calling this function.
+ * @return A pointer to a Collator, or 0 if an error occurred
+ * or a collator with the requested version is not available.
+ *
+ * @see getVersion
+ * @obsolete ICU 2.6
+ */
+ static Collator *createInstance(const Locale &loc, UVersionInfo version, UErrorCode &err);
+#endif
+
+ /**
+ * The comparison function compares the character data stored in two
+ * different strings. Returns information about whether a string is less
+ * than, greater than or equal to another string.
+ * @param source the source string to be compared with.
+ * @param target the string that is to be compared with the source string.
+ * @return Returns a byte value. GREATER if source is greater
+ * than target; EQUAL if source is equal to target; LESS if source is less
+ * than target
+ * @deprecated ICU 2.6 use the overload with UErrorCode &
+ */
+ virtual EComparisonResult compare(const UnicodeString& source,
+ const UnicodeString& target) const;
+
+ /**
+ * The comparison function compares the character data stored in two
+ * different strings. Returns information about whether a string is less
+ * than, greater than or equal to another string.
+ * @param source the source string to be compared with.
+ * @param target the string that is to be compared with the source string.
+ * @param status possible error code
+ * @return Returns an enum value. UCOL_GREATER if source is greater
+ * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
+ * than target
+ * @stable ICU 2.6
+ */
+ virtual UCollationResult compare(const UnicodeString& source,
+ const UnicodeString& target,
+ UErrorCode &status) const = 0;
+
+ /**
+ * Does the same thing as compare but limits the comparison to a specified
+ * length
+ * @param source the source string to be compared with.
+ * @param target the string that is to be compared with the source string.
+ * @param length the length the comparison is limited to
+ * @return Returns a byte value. GREATER if source (up to the specified
+ * length) is greater than target; EQUAL if source (up to specified
+ * length) is equal to target; LESS if source (up to the specified
+ * length) is less than target.
+ * @deprecated ICU 2.6 use the overload with UErrorCode &
+ */
+ virtual EComparisonResult compare(const UnicodeString& source,
+ const UnicodeString& target,
+ int32_t length) const;
+
+ /**
+ * Does the same thing as compare but limits the comparison to a specified
+ * length
+ * @param source the source string to be compared with.
+ * @param target the string that is to be compared with the source string.
+ * @param length the length the comparison is limited to
+ * @param status possible error code
+ * @return Returns an enum value. UCOL_GREATER if source (up to the specified
+ * length) is greater than target; UCOL_EQUAL if source (up to specified
+ * length) is equal to target; UCOL_LESS if source (up to the specified
+ * length) is less than target.
+ * @stable ICU 2.6
+ */
+ virtual UCollationResult compare(const UnicodeString& source,
+ const UnicodeString& target,
+ int32_t length,
+ UErrorCode &status) const = 0;
+
+ /**
+ * The comparison function compares the character data stored in two
+ * different string arrays. Returns information about whether a string array
+ * is less than, greater than or equal to another string array.
+ * @param source the source string array to be compared with.
+ * @param sourceLength the length of the source string array. If this value
+ * is equal to -1, the string array is null-terminated.
+ * @param target the string that is to be compared with the source string.
+ * @param targetLength the length of the target string array. If this value
+ * is equal to -1, the string array is null-terminated.
+ * @return Returns a byte value. GREATER if source is greater than target;
+ * EQUAL if source is equal to target; LESS if source is less than
+ * target
+ * @deprecated ICU 2.6 use the overload with UErrorCode &
+ */
+ virtual EComparisonResult compare(const UChar* source, int32_t sourceLength,
+ const UChar* target, int32_t targetLength)
+ const;
+
+ /**
+ * The comparison function compares the character data stored in two
+ * different string arrays. Returns information about whether a string array
+ * is less than, greater than or equal to another string array.
+ * @param source the source string array to be compared with.
+ * @param sourceLength the length of the source string array. If this value
+ * is equal to -1, the string array is null-terminated.
+ * @param target the string that is to be compared with the source string.
+ * @param targetLength the length of the target string array. If this value
+ * is equal to -1, the string array is null-terminated.
+ * @param status possible error code
+ * @return Returns an enum value. UCOL_GREATER if source is greater
+ * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
+ * than target
+ * @stable ICU 2.6
+ */
+ virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
+ const UChar* target, int32_t targetLength,
+ UErrorCode &status) const = 0;
+
+ /**
+ * Transforms the string into a series of characters that can be compared
+ * with CollationKey::compareTo. It is not possible to restore the original
+ * string from the chars in the sort key. The generated sort key handles
+ * only a limited number of ignorable characters.
+ * <p>Use CollationKey::equals or CollationKey::compare to compare the
+ * generated sort keys.
+ * If the source string is null, a null collation key will be returned.
+ * @param source the source string to be transformed into a sort key.
+ * @param key the collation key to be filled in
+ * @param status the error code status.
+ * @return the collation key of the string based on the collation rules.
+ * @see CollationKey#compare
+ * @deprecated ICU 2.8 Use getSortKey(...) instead
+ */
+ virtual CollationKey& getCollationKey(const UnicodeString& source,
+ CollationKey& key,
+ UErrorCode& status) const = 0;
+
+ /**
+ * Transforms the string into a series of characters that can be compared
+ * with CollationKey::compareTo. It is not possible to restore the original
+ * string from the chars in the sort key. The generated sort key handles
+ * only a limited number of ignorable characters.
+ * <p>Use CollationKey::equals or CollationKey::compare to compare the
+ * generated sort keys.
+ * <p>If the source string is null, a null collation key will be returned.
+ * @param source the source string to be transformed into a sort key.
+ * @param sourceLength length of the collation key
+ * @param key the collation key to be filled in
+ * @param status the error code status.
+ * @return the collation key of the string based on the collation rules.
+ * @see CollationKey#compare
+ * @deprecated ICU 2.8 Use getSortKey(...) instead
+ */
+ virtual CollationKey& getCollationKey(const UChar*source,
+ int32_t sourceLength,
+ CollationKey& key,
+ UErrorCode& status) const = 0;
+ /**
+ * Generates the hash code for the collation object
+ * @stable ICU 2.0
+ */
+ virtual int32_t hashCode(void) const = 0;
+
+ /**
+ * Gets the locale of the Collator
+ *
+ * @param type can be either requested, valid or actual locale. For more
+ * information see the definition of ULocDataLocaleType in
+ * uloc.h
+ * @param status the error code status.
+ * @return locale where the collation data lives. If the collator
+ * was instantiated from rules, locale is empty.
+ * @deprecated ICU 2.8 This API is under consideration for revision
+ * in ICU 3.0.
+ */
+ virtual const Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const = 0;
+
+ /**
+ * Convenience method for comparing two strings based on the collation rules.
+ * @param source the source string to be compared with.
+ * @param target the target string to be compared with.
+ * @return true if the first string is greater than the second one,
+ * according to the collation rules. false, otherwise.
+ * @see Collator#compare
+ * @stable ICU 2.0
+ */
+ UBool greater(const UnicodeString& source, const UnicodeString& target)
+ const;
+
+ /**
+ * Convenience method for comparing two strings based on the collation rules.
+ * @param source the source string to be compared with.
+ * @param target the target string to be compared with.
+ * @return true if the first string is greater than or equal to the second
+ * one, according to the collation rules. false, otherwise.
+ * @see Collator#compare
+ * @stable ICU 2.0
+ */
+ UBool greaterOrEqual(const UnicodeString& source,
+ const UnicodeString& target) const;
+
+ /**
+ * Convenience method for comparing two strings based on the collation rules.
+ * @param source the source string to be compared with.
+ * @param target the target string to be compared with.
+ * @return true if the strings are equal according to the collation rules.
+ * false, otherwise.
+ * @see Collator#compare
+ * @stable ICU 2.0
+ */
+ UBool equals(const UnicodeString& source, const UnicodeString& target) const;
+
+ /**
+ * Determines the minimum strength that will be use in comparison or
+ * transformation.
+ * <p>E.g. with strength == SECONDARY, the tertiary difference is ignored
+ * <p>E.g. with strength == PRIMARY, the secondary and tertiary difference
+ * are ignored.
+ * @return the current comparison level.
+ * @see Collator#setStrength
+ * @deprecated ICU 2.6 Use getAttribute(UCOL_STRENGTH...) instead
+ */
+ virtual ECollationStrength getStrength(void) const = 0;
+
+ /**
+ * Sets the minimum strength to be used in comparison or transformation.
+ * <p>Example of use:
+ * <pre>
+ * \code
+ * UErrorCode status = U_ZERO_ERROR;
+ * Collator*myCollation = Collator::createInstance(Locale::US, status);
+ * if (U_FAILURE(status)) return;
+ * myCollation->setStrength(Collator::PRIMARY);
+ * // result will be "abc" == "ABC"
+ * // tertiary differences will be ignored
+ * Collator::ComparisonResult result = myCollation->compare("abc", "ABC");
+ * \endcode
+ * </pre>
+ * @see Collator#getStrength
+ * @param newStrength the new comparison level.
+ * @deprecated ICU 2.6 Use setAttribute(UCOL_STRENGTH...) instead
+ */
+ virtual void setStrength(ECollationStrength newStrength) = 0;
+
+ /**
+ * Get name of the object for the desired Locale, in the desired langauge
+ * @param objectLocale must be from getAvailableLocales
+ * @param displayLocale specifies the desired locale for output
+ * @param name the fill-in parameter of the return value
+ * @return display-able name of the object for the object locale in the
+ * desired language
+ * @stable ICU 2.0
+ */
+ static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
+ const Locale& displayLocale,
+ UnicodeString& name);
+
+ /**
+ * Get name of the object for the desired Locale, in the langauge of the
+ * default locale.
+ * @param objectLocale must be from getAvailableLocales
+ * @param name the fill-in parameter of the return value
+ * @return name of the object for the desired locale in the default language
+ * @stable ICU 2.0
+ */
+ static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
+ UnicodeString& name);
+
+ /**
+ * Get the set of Locales for which Collations are installed.
+ *
+ * <p>Note this does not include locales supported by registered collators.
+ * If collators might have been registered, use the overload of getAvailableLocales
+ * that returns a StringEnumeration.</p>
+ *
+ * @param count the output parameter of number of elements in the locale list
+ * @return the list of available locales for which collations are installed
+ * @stable ICU 2.0
+ */
+ static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
+
+ /**
+ * Return a StringEnumeration over the locales available at the time of the call,
+ * including registered locales. If a severe error occurs (such as out of memory
+ * condition) this will return null. If there is no locale data, an empty enumeration
+ * will be returned.
+ * @return a StringEnumeration over the locales available at the time of the call
+ * @stable ICU 2.6
+ */
+ static StringEnumeration* U_EXPORT2 getAvailableLocales(void);
+
+ /**
+ * Create a string enumerator of all possible keywords that are relevant to
+ * collation. At this point, the only recognized keyword for this
+ * service is "collation".
+ * @param status input-output error code
+ * @return a string enumeration over locale strings. The caller is
+ * responsible for closing the result.
+ * @stable ICU 3.0
+ */
+ static StringEnumeration* U_EXPORT2 getKeywords(UErrorCode& status);
+
+ /**
+ * Given a keyword, create a string enumeration of all values
+ * for that keyword that are currently in use.
+ * @param keyword a particular keyword as enumerated by
+ * ucol_getKeywords. If any other keyword is passed in, status is set
+ * to U_ILLEGAL_ARGUMENT_ERROR.
+ * @param status input-output error code
+ * @return a string enumeration over collation keyword values, or NULL
+ * upon error. The caller is responsible for deleting the result.
+ * @stable ICU 3.0
+ */
+ static StringEnumeration* U_EXPORT2 getKeywordValues(const char *keyword, UErrorCode& status);
+
+ /**
+ * Return the functionally equivalent locale for the given
+ * requested locale, with respect to given keyword, for the
+ * collation service. If two locales return the same result, then
+ * collators instantiated for these locales will behave
+ * equivalently. The converse is not always true; two collators
+ * may in fact be equivalent, but return different results, due to
+ * internal details. The return result has no other meaning than
+ * that stated above, and implies nothing as to the relationship
+ * between the two locales. This is intended for use by
+ * applications who wish to cache collators, or otherwise reuse
+ * collators when possible. The functional equivalent may change
+ * over time. For more information, please see the <a
+ * href="http://icu-project.org/userguide/locale.html#services">
+ * Locales and Services</a> section of the ICU User Guide.
+ * @param keyword a particular keyword as enumerated by
+ * ucol_getKeywords.
+ * @param locale the requested locale
+ * @param isAvailable reference to a fillin parameter that
+ * indicates whether the requested locale was 'available' to the
+ * collation service. A locale is defined as 'available' if it
+ * physically exists within the collation locale data.
+ * @param status reference to input-output error code
+ * @return the functionally equivalent collation locale, or the root
+ * locale upon error.
+ * @stable ICU 3.0
+ */
+ static Locale U_EXPORT2 getFunctionalEquivalent(const char* keyword, const Locale& locale,
+ UBool& isAvailable, UErrorCode& status);
+
+#if !UCONFIG_NO_SERVICE
+ /**
+ * Register a new Collator. The collator will be adopted.
+ * @param toAdopt the Collator instance to be adopted
+ * @param locale the locale with which the collator will be associated
+ * @param status the in/out status code, no special meanings are assigned
+ * @return a registry key that can be used to unregister this collator
+ * @stable ICU 2.6
+ */
+ static URegistryKey U_EXPORT2 registerInstance(Collator* toAdopt, const Locale& locale, UErrorCode& status);
+
+ /**
+ * Register a new CollatorFactory. The factory will be adopted.
+ * @param toAdopt the CollatorFactory instance to be adopted
+ * @param status the in/out status code, no special meanings are assigned
+ * @return a registry key that can be used to unregister this collator
+ * @stable ICU 2.6
+ */
+ static URegistryKey U_EXPORT2 registerFactory(CollatorFactory* toAdopt, UErrorCode& status);
+
+ /**
+ * Unregister a previously-registered Collator or CollatorFactory
+ * using the key returned from the register call. Key becomes
+ * invalid after a successful call and should not be used again.
+ * The object corresponding to the key will be deleted.
+ * @param key the registry key returned by a previous call to registerInstance
+ * @param status the in/out status code, no special meanings are assigned
+ * @return TRUE if the collator for the key was successfully unregistered
+ * @stable ICU 2.6
+ */
+ static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
+#endif /* UCONFIG_NO_SERVICE */
+
+ /**
+ * Gets the version information for a Collator.
+ * @param info the version # information, the result will be filled in
+ * @stable ICU 2.0
+ */
+ virtual void getVersion(UVersionInfo info) const = 0;
+
+ /**
+ * Returns a unique class ID POLYMORPHICALLY. Pure virtual method.
+ * This method is to implement a simple version of RTTI, since not all C++
+ * compilers support genuine RTTI. Polymorphic operator==() and clone()
+ * methods call this method.
+ * @return The class ID for this object. All objects of a given class have
+ * the same class ID. Objects of other classes have different class
+ * IDs.
+ * @stable ICU 2.0
+ */
+ virtual UClassID getDynamicClassID(void) const = 0;
+
+ /**
+ * Universal attribute setter
+ * @param attr attribute type
+ * @param value attribute value
+ * @param status to indicate whether the operation went on smoothly or
+ * there were errors
+ * @stable ICU 2.2
+ */
+ virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
+ UErrorCode &status) = 0;
+
+ /**
+ * Universal attribute getter
+ * @param attr attribute type
+ * @param status to indicate whether the operation went on smoothly or
+ * there were errors
+ * @return attribute value
+ * @stable ICU 2.2
+ */
+ virtual UColAttributeValue getAttribute(UColAttribute attr,
+ UErrorCode &status) = 0;
+
+ /**
+ * Sets the variable top to a collation element value of a string supplied.
+ * @param varTop one or more (if contraction) UChars to which the variable top should be set
+ * @param len length of variable top string. If -1 it is considered to be zero terminated.
+ * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
+ * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
+ * U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
+ * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
+ * @stable ICU 2.0
+ */
+ virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) = 0;
+
+ /**
+ * Sets the variable top to a collation element value of a string supplied.
+ * @param varTop an UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set
+ * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
+ * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
+ * U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
+ * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
+ * @stable ICU 2.0
+ */
+ virtual uint32_t setVariableTop(const UnicodeString varTop, UErrorCode &status) = 0;
+
+ /**
+ * Sets the variable top to a collation element value supplied. Variable top is set to the upper 16 bits.
+ * Lower 16 bits are ignored.
+ * @param varTop CE value, as returned by setVariableTop or ucol)getVariableTop
+ * @param status error code (not changed by function)
+ * @stable ICU 2.0
+ */
+ virtual void setVariableTop(const uint32_t varTop, UErrorCode &status) = 0;
+
+ /**
+ * Gets the variable top value of a Collator.
+ * Lower 16 bits are undefined and should be ignored.
+ * @param status error code (not changed by function). If error code is set, the return value is undefined.
+ * @stable ICU 2.0
+ */
+ virtual uint32_t getVariableTop(UErrorCode &status) const = 0;
+
+ /**
+ * Get an UnicodeSet that contains all the characters and sequences
+ * tailored in this collator.
+ * @param status error code of the operation
+ * @return a pointer to a UnicodeSet object containing all the
+ * code points and sequences that may sort differently than
+ * in the UCA. The object must be disposed of by using delete
+ * @stable ICU 2.4
+ */
+ virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
+
+
+ /**
+ * Thread safe cloning operation
+ * @return pointer to the new clone, user should remove it.
+ * @stable ICU 2.2
+ */
+ virtual Collator* safeClone(void) = 0;
+
+ /**
+ * Get the sort key as an array of bytes from an UnicodeString.
+ * Sort key byte arrays are zero-terminated and can be compared using
+ * strcmp().
+ * @param source string to be processed.
+ * @param result buffer to store result in. If NULL, number of bytes needed
+ * will be returned.
+ * @param resultLength length of the result buffer. If if not enough the
+ * buffer will be filled to capacity.
+ * @return Number of bytes needed for storing the sort key
+ * @stable ICU 2.2
+ */
+ virtual int32_t getSortKey(const UnicodeString& source,
+ uint8_t* result,
+ int32_t resultLength) const = 0;
+
+ /**
+ * Get the sort key as an array of bytes from an UChar buffer.
+ * Sort key byte arrays are zero-terminated and can be compared using
+ * strcmp().
+ * @param source string to be processed.
+ * @param sourceLength length of string to be processed.
+ * If -1, the string is 0 terminated and length will be decided by the
+ * function.
+ * @param result buffer to store result in. If NULL, number of bytes needed
+ * will be returned.
+ * @param resultLength length of the result buffer. If if not enough the
+ * buffer will be filled to capacity.
+ * @return Number of bytes needed for storing the sort key
+ * @stable ICU 2.2
+ */
+ virtual int32_t getSortKey(const UChar*source, int32_t sourceLength,
+ uint8_t*result, int32_t resultLength) const = 0;
+
+ /**
+ * Produce a bound for a given sortkey and a number of levels.
+ * Return value is always the number of bytes needed, regardless of
+ * whether the result buffer was big enough or even valid.<br>
+ * Resulting bounds can be used to produce a range of strings that are
+ * between upper and lower bounds. For example, if bounds are produced
+ * for a sortkey of string "smith", strings between upper and lower
+ * bounds with one level would include "Smith", "SMITH", "sMiTh".<br>
+ * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER
+ * is produced, strings matched would be as above. However, if bound
+ * produced using UCOL_BOUND_UPPER_LONG is used, the above example will
+ * also match "Smithsonian" and similar.<br>
+ * For more on usage, see example in cintltst/capitst.c in procedure
+ * TestBounds.
+ * Sort keys may be compared using <TT>strcmp</TT>.
+ * @param source The source sortkey.
+ * @param sourceLength The length of source, or -1 if null-terminated.
+ * (If an unmodified sortkey is passed, it is always null
+ * terminated).
+ * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which
+ * produces a lower inclusive bound, UCOL_BOUND_UPPER, that
+ * produces upper bound that matches strings of the same length
+ * or UCOL_BOUND_UPPER_LONG that matches strings that have the
+ * same starting substring as the source string.
+ * @param noOfLevels Number of levels required in the resulting bound (for most
+ * uses, the recommended value is 1). See users guide for
+ * explanation on number of levels a sortkey can have.
+ * @param result A pointer to a buffer to receive the resulting sortkey.
+ * @param resultLength The maximum size of result.
+ * @param status Used for returning error code if something went wrong. If the
+ * number of levels requested is higher than the number of levels
+ * in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is
+ * issued.
+ * @return The size needed to fully store the bound.
+ * @see ucol_keyHashCode
+ * @stable ICU 2.1
+ */
+ static int32_t U_EXPORT2 getBound(const uint8_t *source,
+ int32_t sourceLength,
+ UColBoundMode boundType,
+ uint32_t noOfLevels,
+ uint8_t *result,
+ int32_t resultLength,
+ UErrorCode &status);
+
+
+protected:
+
+ // Collator protected constructors -------------------------------------
+
+ /**
+ * Default constructor.
+ * Constructor is different from the old default Collator constructor.
+ * The task for determing the default collation strength and normalization
+ * mode is left to the child class.
+ * @stable ICU 2.0
+ */
+ Collator();
+
+ /**
+ * Constructor.
+ * Empty constructor, does not handle the arguments.
+ * This constructor is done for backward compatibility with 1.7 and 1.8.
+ * The task for handling the argument collation strength and normalization
+ * mode is left to the child class.
+ * @param collationStrength collation strength
+ * @param decompositionMode
+ * @deprecated ICU 2.4. Subclasses should use the default constructor
+ * instead and handle the strength and normalization mode themselves.
+ */
+ Collator(UCollationStrength collationStrength,
+ UNormalizationMode decompositionMode);
+
+ /**
+ * Copy constructor.
+ * @param other Collator object to be copied from
+ * @stable ICU 2.0
+ */
+ Collator(const Collator& other);
+
+ // Collator protected methods -----------------------------------------
+
+
+ /**
+ * Used internally by registraton to define the requested and valid locales.
+ * @param requestedLocale the requsted locale
+ * @param validLocale the valid locale
+ * @internal
+ */
+ virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
+
+public:
+#if !UCONFIG_NO_SERVICE
+ /**
+ * used only by ucol_open, not for public use
+ * @internal
+ */
+ static UCollator* createUCollator(const char* loc, UErrorCode* status);
+#endif
+private:
+ /**
+ * Assignment operator. Private for now.
+ * @internal
+ */
+ Collator& operator=(const Collator& other);
+
+ friend class CFactory;
+ friend class SimpleCFactory;
+ friend class ICUCollatorFactory;
+ friend class ICUCollatorService;
+ static Collator* makeInstance(const Locale& desiredLocale,
+ UErrorCode& status);
+
+ // Collator private data members ---------------------------------------
+
+ /*
+ synwee : removed as attributes to be handled by child class
+ UCollationStrength strength;
+ Normalizer::EMode decmp;
+ */
+ /* This is useless information */
+/* static const UVersionInfo fVersion;*/
+};
+
+#if !UCONFIG_NO_SERVICE
+/**
+ * A factory, used with registerFactory, the creates multiple collators and provides
+ * display names for them. A factory supports some number of locales-- these are the
+ * locales for which it can create collators. The factory can be visible, in which
+ * case the supported locales will be enumerated by getAvailableLocales, or invisible,
+ * in which they are not. Invisible locales are still supported, they are just not
+ * listed by getAvailableLocales.
+ * <p>
+ * If standard locale display names are sufficient, Collator instances can
+ * be registered using registerInstance instead.</p>
+ * <p>
+ * Note: if the collators are to be used from C APIs, they must be instances