<?xml version="1.0" encoding="UTF-8"?>
<commit>
  <added type="array"/>
  <modified type="array">
    <modified>
      <diff>@@ -751,6 +751,10 @@ class PrepareBuild(CommandLineApp):
                 self.sys_include_dirs.insert(0, '/usr/local/stow/cppunit/include')
                 self.sys_library_dirs.insert(0, '/usr/local/stow/cppunit/lib')
 
+            if exists('/usr/local/stow/icu/include'):
+                self.sys_include_dirs.insert(0, '/usr/local/stow/icu/include')
+                self.sys_library_dirs.insert(0, '/usr/local/stow/icu/lib')
+
             self.CXXFLAGS.append('-march=nocona')
             self.CXXFLAGS.append('-msse3')
             self.CPPFLAGS.append('-D_GLIBCXX_FULLY_DYNAMIC_STRING=1')
@@ -979,6 +983,14 @@ class PrepareBuild(CommandLineApp):
                 self.sys_include_dirs.insert(0, '/usr/local/stow/cppunit-debug/include')
                 self.sys_library_dirs.insert(0, '/usr/local/stow/cppunit-debug/lib')
 
+            if exists('/usr/local/stow/icu-debug/include'):
+                if '/usr/local/stow/icu/include' in self.sys_include_dirs:
+                    self.sys_include_dirs.remove('/usr/local/stow/icu/include')
+                    self.sys_library_dirs.remove('/usr/local/stow/icu/lib')
+
+                self.sys_include_dirs.insert(0, '/usr/local/stow/icu-debug/include')
+                self.sys_library_dirs.insert(0, '/usr/local/stow/icu-debug/lib')
+
             if exists('/opt/local/lib/libboost_regex-d.a'):
                 self.envvars['BOOST_HOME']   = '/opt/local'
                 self.envvars['BOOST_SUFFIX'] = '-d'
@@ -988,9 +1000,9 @@ class PrepareBuild(CommandLineApp):
 
                 self.sys_include_dirs.append('/opt/local/include/boost')
 
-            elif exists('/usr/local/lib/libboost_regex-xgcc44-sd-1_40.a'):
+            elif exists('/usr/local/lib/libboost_regex-xgcc44-d-1_40.a'):
                 self.envvars['BOOST_HOME']   = '/usr/local'
-                self.envvars['BOOST_SUFFIX'] = '-xgcc44-sd-1_40'
+                self.envvars['BOOST_SUFFIX'] = '-xgcc44-d-1_40'
                 self.log.info('Setting BOOST_SUFFIX  =&gt; %s' %
                               self.envvars['BOOST_SUFFIX'])
 
@@ -1005,9 +1017,9 @@ class PrepareBuild(CommandLineApp):
 
                 self.sys_include_dirs.append('/opt/local/include/boost')
 
-            elif exists('/usr/local/lib/libboost_regex-xgcc44-s-1_40.a'):
+            elif exists('/usr/local/lib/libboost_regex-xgcc44-1_40.a'):
                 self.envvars['BOOST_HOME']   = '/usr/local'
-                self.envvars['BOOST_SUFFIX'] = '-xgcc44-s-1_40'
+                self.envvars['BOOST_SUFFIX'] = '-xgcc44-1_40'
                 self.log.info('Setting BOOST_SUFFIX  =&gt; %s' %
                               self.envvars['BOOST_SUFFIX'])
 </diff>
      <filename>acprep</filename>
    </modified>
    <modified>
      <diff>@@ -24,7 +24,7 @@ N $
     Income:Salary
 
 2004/05/14 * Another d&#224;y in which there is P&#225;ying
-    &#1056;&#1091;&#1089;&#1089;&#1082;&#1080;&#1081; &#1103;&#1079;&#1099;&#1082;:&#1056;&#1091;&#1089;&#1089;&#1082;&#1080;&#1081; &#1103;&#1079;&#1099;&#1082;:&#1056;&#1091;&#1089;&#1089;&#1082;&#1080;&#1081; &#1103;&#1079;&#1099;&#1082;:&#1056;&#1091;&#1089;&#1089;&#1082;&#1080;&#1081; &#1103;&#1079;&#1099;&#1082;  $1000.00
+    &#1056;&#1091;&#1089;&#1089;&#1082;&#1080;&#1081; &#1103;&#1079;&#1099;&#1082;:&#1040;&#1082;&#1090;&#1080;&#1074;&#1099;:&#1056;&#1091;&#1089;&#1089;&#1082;&#1080;&#1081; &#1103;&#1079;&#1099;&#1082;:&#1056;&#1091;&#1089;&#1089;&#1082;&#1080;&#1081; &#1103;&#1079;&#1099;&#1082;    $1000.00
     Income:Salary
 
 2004/05/27 Book Store</diff>
      <filename>doc/sample.dat</filename>
    </modified>
    <modified>
      <diff>@@ -2,34 +2,57 @@
 # This is only important if you intend to produce a Ledger binary for
 # installation.
 
-#ARCH_CFLAGS = -g -arch i386 -arch ppc -isysroot /Developer/SDKs/MacOSX10.5.sdk
-#ARCH_LDFLAGS = -g -arch i386 -arch ppc -Wl,-syslibroot,/Developer/SDKs/MacOSX10.5.sdk
-
 STOW_ROOT = /usr/local/stow
 
 BOOST_SOURCE  = boost
 BOOST_VERSION = 1_40_0
 
-# architecture=combined
+icu-release:
+	-(cd icu/source; make distclean)
+	(cd icu/source; sh autogen.sh; \
+	 ./configure CPPFLAGS=&quot;&quot; \
+		     CFLAGS=&quot;$(ARCH_CFLAGS)&quot; \
+		     LDFLAGS=&quot;$(ARCH_LDFLAGS)&quot; \
+		     CC=&quot;$(CC)&quot; CXX=&quot;$(CXX)&quot; LD=&quot;$(LD)&quot; \
+		     --enable-static \
+		     --prefix=$(STOW_ROOT)/icu &amp;&amp; \
+	 make install)
+
+icu-debug:
+	-(cd icu/source; make distclean)
+	(cd icu/source; sh autogen.sh; \
+	 ./configure CPPFLAGS=&quot;-D_GLIBCXX_DEBUG=1&quot; \
+		     CFLAGS=&quot;-g $(ARCH_CFLAGS)&quot; \
+		     LDFLAGS=&quot;-g $(ARCH_LDFLAGS)&quot; \
+		     CC=&quot;$(CC)&quot; CXX=&quot;$(CXX)&quot; LD=&quot;$(LD)&quot; \
+		     --enable-static --enable-debug \
+		     --prefix=$(STOW_ROOT)/icu-debug &amp;&amp; \
+	 make install)
+
+icu-build: icu-release icu-debug
+
 boost-release:
 	(cd $(BOOST_SOURCE) &amp;&amp; \
 	bjam release --prefix=$(STOW_ROOT)/boost_$(BOOST_VERSION) \
 	    --build-dir=$(HOME)/Products/boost_$(BOOST_VERSION) \
-	    --toolset=darwin --build-type=complete --layout=versioned install)
+	    --toolset=darwin --build-type=complete --layout=versioned \
+	    -sHAVE_ICU=1 -sICU_PATH=/usr/local/stow/icu install)
 
 boost-debug:
 	(cd $(BOOST_SOURCE) &amp;&amp; \
 	bjam debug --prefix=$(STOW_ROOT)/boost_$(BOOST_VERSION) \
 	    --build-dir=$(HOME)/Products/boost_$(BOOST_VERSION) \
 	    --toolset=darwin --build-type=complete --layout=versioned \
-	    define=_GLIBCXX_DEBUG=1 install)
+	    define=_GLIBCXX_DEBUG=1 \
+	    -sHAVE_ICU=1 -sICU_PATH=/usr/local/stow/icu-debug install)
 
 boost-build: boost-release boost-debug
 
 cppunit-release:
 	-(cd cppunit; make distclean)
 	(cd cppunit; sh autogen.sh; \
-	 ./configure CFLAGS=&quot;$(ARCH_CFLAGS)&quot; \
+	 ./configure CPPFLAGS=&quot;&quot; \
+		     CFLAGS=&quot;$(ARCH_CFLAGS)&quot; \
 		     LDFLAGS=&quot;$(ARCH_LDFLAGS)&quot; \
 		     CC=&quot;$(CC)&quot; CXX=&quot;$(CXX)&quot; LD=&quot;$(LD)&quot; \
 		     --prefix=$(STOW_ROOT)/cppunit &amp;&amp; \
@@ -47,4 +70,4 @@ cppunit-debug:
 
 cppunit-build: cppunit-release cppunit-debug
 
-build-all: boost-build cppunit-build
+all: boost-build cppunit-build</diff>
      <filename>lib/Makefile</filename>
    </modified>
    <modified>
      <diff>@@ -307,7 +307,7 @@ namespace {
 	DEBUG(&quot;derive.xact&quot;, &quot;Setting note  from match: &quot; &lt;&lt; *added-&gt;note);
 #endif
     } else {
-      added-&gt;payee = tmpl.payee_mask.expr.str();
+      added-&gt;payee = tmpl.payee_mask.str();
       DEBUG(&quot;derive.xact&quot;, &quot;Setting payee from template: &quot; &lt;&lt; added-&gt;payee);
     }
 
@@ -403,14 +403,14 @@ namespace {
 
 	    account_t * acct = NULL;
 	    if (! acct) {
-	      acct = journal.find_account_re(post.account_mask-&gt;expr.str());
+	      acct = journal.find_account_re(post.account_mask-&gt;str());
 #if defined(DEBUG_ON)
 	      if (acct)
 		DEBUG(&quot;derive.xact&quot;, &quot;Found account as a regular expression&quot;);
 #endif
 	    }
 	    if (! acct) {
-	      acct = journal.find_account(post.account_mask-&gt;expr.str());
+	      acct = journal.find_account(post.account_mask-&gt;str());
 #if defined(DEBUG_ON)
 	      if (acct)
 		DEBUG(&quot;derive.xact&quot;, &quot;Found (or created) account by name&quot;);</diff>
      <filename>src/derive.cc</filename>
    </modified>
    <modified>
      <diff>@@ -43,7 +43,11 @@ mask_t::mask_t(const string&amp; pat) : expr()
 
 mask_t&amp; mask_t::operator=(const string&amp; pat)
 {
-  expr.assign(pat.c_str(), regex::perl | regex::icase);
+#if defined(HAVE_BOOST_REGEX_UNICODE)
+  expr = boost::make_u32regex(pat.c_str(), boost::regex::perl | boost::regex::icase);
+#else
+  expr.assign(pat.c_str(), boost::regex::perl | boost::regex::icase);
+#endif
   VERIFY(valid());
   return *this;
 }</diff>
      <filename>src/mask.cc</filename>
    </modified>
    <modified>
      <diff>@@ -45,6 +45,9 @@
 #define _MASK_H
 
 #include &quot;utils.h&quot;
+#if defined(HAVE_BOOST_REGEX_UNICODE)
+#include &quot;unistring.h&quot;
+#endif
 
 namespace ledger {
 
@@ -56,7 +59,11 @@ namespace ledger {
 class mask_t
 {
 public:
+#if defined(HAVE_BOOST_REGEX_UNICODE)
+  boost::u32regex expr;
+#else
   boost::regex expr;
+#endif
 
   explicit mask_t(const string&amp; pattern);
 
@@ -76,17 +83,41 @@ public:
     return expr == other.expr;
   }
 
-  bool match(const string&amp; str) const {
+  bool match(const string&amp; text) const {
+#if defined(HAVE_BOOST_REGEX_UNICODE)
     DEBUG(&quot;mask.match&quot;,
-	  &quot;Matching: \&quot;&quot; &lt;&lt; str &lt;&lt; &quot;\&quot; =~ /&quot; &lt;&lt; expr.str() &lt;&lt; &quot;/ = &quot;
-	  &lt;&lt; (boost::regex_search(str, expr) ? &quot;true&quot; : &quot;false&quot;));
-    return boost::regex_search(str, expr);
+	  &quot;Matching: \&quot;&quot; &lt;&lt; text &lt;&lt; &quot;\&quot; =~ /&quot; &lt;&lt; str() &lt;&lt; &quot;/ = &quot;
+	  &lt;&lt; (boost::u32regex_search(text, expr) ? &quot;true&quot; : &quot;false&quot;));
+    return boost::u32regex_search(text, expr);
+#else
+    DEBUG(&quot;mask.match&quot;,
+	  &quot;Matching: \&quot;&quot; &lt;&lt; text &lt;&lt; &quot;\&quot; =~ /&quot; &lt;&lt; str() &lt;&lt; &quot;/ = &quot;
+	  &lt;&lt; (boost::regex_search(text, expr) ? &quot;true&quot; : &quot;false&quot;));
+    return boost::regex_search(text, expr);
+#endif
   }
 
   bool empty() const {
     return expr.empty();
   }
 
+  string str() const {
+    if (! empty()) {
+#if defined(HAVE_BOOST_REGEX_UNICODE)
+      assert(sizeof(boost::uint32_t) == sizeof(UChar32));
+      unistring ustr;
+      std::basic_string&lt;UChar32&gt; expr_str = expr.str();
+      std::copy(expr_str.begin(), expr_str.end(),
+		std::back_inserter(ustr.utf32chars));
+      return ustr.extract();
+#else
+      return expr.str();
+#endif
+    } else {
+      return empty_string;
+    }
+  }
+
   bool valid() const {
     if (expr.status() != 0) {
       DEBUG(&quot;ledger.validate&quot;, &quot;mask_t: expr.status() != 0&quot;);
@@ -108,7 +139,7 @@ private:
       ar &amp; temp;
       *this = temp;
     } else {
-      temp = expr.str();
+      temp = str();
       ar &amp; temp;
     }
   }
@@ -116,7 +147,7 @@ private:
 };
 
 inline std::ostream&amp; operator&lt;&lt;(std::ostream&amp; out, const mask_t&amp; mask) {
-  out &lt;&lt; mask.expr.str();
+  out &lt;&lt; mask.str();
   return out;
 }
 </diff>
      <filename>src/mask.h</filename>
    </modified>
    <modified>
      <diff>@@ -246,7 +246,7 @@ namespace {
       if (env.value_at(0).is_string())
 	account = master-&gt;find_account(env.get&lt;string&gt;(0), false);
       else if (env.value_at(0).is_mask())
-	account = master-&gt;find_account_re(env.get&lt;mask_t&gt;(0).expr.str());
+	account = master-&gt;find_account_re(env.get&lt;mask_t&gt;(0).str());
     } else {
       account = env-&gt;reported_account();
     }</diff>
      <filename>src/post.cc</filename>
    </modified>
    <modified>
      <diff>@@ -322,7 +322,7 @@ value_t report_t::fn_account_total(call_scope_t&amp; args)
     acct = session.journal-&gt;find_account(name, false);
   }
   else if (args[0].is_mask()) {
-    name = args[0].as_mask().expr.str();
+    name = args[0].as_mask().str();
     acct = session.journal-&gt;find_account_re(name);
   }
   else {</diff>
      <filename>src/report.cc</filename>
    </modified>
    <modified>
      <diff>@@ -164,7 +164,11 @@ typedef std::ostream::pos_type ostream_pos_type;
 #include &lt;boost/random/uniform_int.hpp&gt;
 #include &lt;boost/random/uniform_real.hpp&gt;
 #include &lt;boost/random/variate_generator.hpp&gt;
+#if defined(HAVE_BOOST_REGEX_UNICODE)
+#include &lt;boost/regex/icu.hpp&gt;
+#else
 #include &lt;boost/regex.hpp&gt;
+#endif // HAVE_BOOST_REGEX_UNICODE
 #include &lt;boost/variant.hpp&gt;
 #include &lt;boost/version.hpp&gt;
 </diff>
      <filename>src/system.hh.in</filename>
    </modified>
    <modified>
      <diff>@@ -59,12 +59,15 @@ namespace ledger {
  */
 class unistring
 {
+public:
   std::vector&lt;boost::uint32_t&gt; utf32chars;
 
-public:
+  unistring() {
+    TRACE_CTOR(unistring, &quot;&quot;);
+  }
   unistring(const std::string&amp; input)
   {
-    TRACE_CTOR(unistring, &quot;&quot;);
+    TRACE_CTOR(unistring, &quot;std::string&quot;);
 
     const char * p   = input.c_str();
     std::size_t	 len = input.length();</diff>
      <filename>src/unistring.h</filename>
    </modified>
    <modified>
      <diff>@@ -193,6 +193,29 @@ else
   AC_MSG_FAILURE(&quot;Could not find boost_regex library (set CPPFLAGS and LDFLAGS?)&quot;)
 fi
 
+AC_CACHE_CHECK(
+  [if boost_regex w/ICU is available],
+  [boost_regex_icu_avail_cv_],
+  [boost_regex_icu_save_libs=$LIBS
+   LIBS=&quot;-licuuc $LIBS&quot;
+   AC_LANG_PUSH(C++)
+   AC_LINK_IFELSE(
+     [AC_LANG_PROGRAM(
+       [[#include &lt;boost/regex/icu.hpp&gt;
+         using namespace boost;]],
+       [[std::string text = &quot;&#1040;&#1082;&#1090;&#1080;&#1074;&#1099;&quot;;
+         u32regex r = make_u32regex(&quot;&#1072;&#1082;&#1090;&#1080;&#1074;&#1099;&quot;, regex::perl | regex::icase);
+	 return u32regex_search(text, r) ? 0 : 1;]])],
+     [boost_regex_icu_avail_cv_=true],
+     [boost_regex_icu_avail_cv_=false])
+   AC_LANG_POP
+   LIBS=$boost_regex_icu_save_libs])
+
+if [test x$boost_regex_icu_avail_cv_ = xtrue ]; then
+  AC_DEFINE([HAVE_BOOST_REGEX_UNICODE], [1], [If the boost_regex library w/ICU is available])
+  LIBS=&quot;-licuuc $LIBS&quot;
+fi
+
 # check for boost_date_time
 AC_CACHE_CHECK(
   [if boost_date_time is available],</diff>
      <filename>tools/configure.ac</filename>
    </modified>
  </modified>
  <removed type="array"/>
  <parents type="array">
    <parent>
      <id>95a068f5e4b0e5c06fd9824f7f999248e28fee7b</id>
    </parent>
  </parents>
  <author>
    <name>John Wiegley</name>
    <email>johnw@newartisans.com</email>
  </author>
  <url>http://github.com/jwiegley/ledger/commit/c8641a6de65670b8833992c94c51a586a6434a74</url>
  <id>c8641a6de65670b8833992c94c51a586a6434a74</id>
  <committed-date>2009-11-07T05:34:13-08:00</committed-date>
  <authored-date>2009-11-07T05:32:44-08:00</authored-date>
  <message>Added support for Boost.Regex w/ ICU

This allows for correct searching of UTF-8 encoded strings, such as
lower-case versions of Russian words to find mixed-case words.</message>
  <tree>eb59642cd3296a98ec4c7a73ca319b1c57c2f7ad</tree>
  <committer>
    <name>John Wiegley</name>
    <email>johnw@newartisans.com</email>
  </committer>
</commit>
