<?xml version="1.0" encoding="UTF-8"?>
<commit>
  <added type="array">
    <added>
      <filename>activesupport/lib/active_support/multibyte/utils.rb</filename>
    </added>
    <added>
      <filename>activesupport/test/multibyte_utils_test.rb</filename>
    </added>
  </added>
  <modified type="array">
    <modified>
      <diff>@@ -29,7 +29,35 @@ module ActiveSupport #:nodoc:
     #
     # Example:
     #   ActiveSupport::Multibyte.proxy_class = CharsForUTF32
-    mattr_accessor :proxy_class
-    self.proxy_class = ActiveSupport::Multibyte::Chars
+    def self.proxy_class=(klass)
+      @proxy_class = klass
+    end
+
+    # Returns the currect proxy class
+    def self.proxy_class
+      @proxy_class ||= ActiveSupport::Multibyte::Chars
+    end
+
+    # Regular expressions that describe valid byte sequences for a character
+    VALID_CHARACTER = {
+      # Borrowed from the Kconv library by Shinji KONO - (also as seen on the W3C site)
+      'UTF-8' =&gt; /\A(?:
+                  [\x00-\x7f]                                         |
+                  [\xc2-\xdf] [\x80-\xbf]                             |
+                  \xe0        [\xa0-\xbf] [\x80-\xbf]                 |
+                  [\xe1-\xef] [\x80-\xbf] [\x80-\xbf]                 |
+                  \xf0        [\x90-\xbf] [\x80-\xbf] [\x80-\xbf]     |
+                  [\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf]     |
+                  \xf4        [\x80-\x8f] [\x80-\xbf] [\x80-\xbf])\z /xn,
+      # Quick check for valid Shift-JIS characters, disregards the odd-even pairing
+      'Shift_JIS' =&gt; /\A(?:
+                  [\x00-\x7e \xa1-\xdf]                                     |
+                  [\x81-\x9f \xe0-\xef] [\x40-\x7e \x80-\x9e \x9f-\xfc])\z /xn
+    }
   end
 end
+
+require 'active_support/multibyte/chars'
+require 'active_support/multibyte/exceptions'
+require 'active_support/multibyte/unicode_database'
+require 'active_support/multibyte/utils'</diff>
      <filename>activesupport/lib/active_support/multibyte.rb</filename>
    </modified>
    <modified>
      <diff>@@ -74,16 +74,7 @@ module ActiveSupport #:nodoc:
       UNICODE_TRAILERS_PAT = /(#{codepoints_to_pattern(UNICODE_LEADERS_AND_TRAILERS)})+\Z/
       UNICODE_LEADERS_PAT = /\A(#{codepoints_to_pattern(UNICODE_LEADERS_AND_TRAILERS)})+/
 
-      # Borrowed from the Kconv library by Shinji KONO - (also as seen on the W3C site)
-      UTF8_PAT = /\A(?:
-                     [\x00-\x7f]                                     |
-                     [\xc2-\xdf] [\x80-\xbf]                         |
-                     \xe0        [\xa0-\xbf] [\x80-\xbf]             |
-                     [\xe1-\xef] [\x80-\xbf] [\x80-\xbf]             |
-                     \xf0        [\x90-\xbf] [\x80-\xbf] [\x80-\xbf] |
-                     [\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf] |
-                     \xf4        [\x80-\x8f] [\x80-\xbf] [\x80-\xbf]
-                    )*\z/xn
+      UTF8_PAT = ActiveSupport::Multibyte::VALID_CHARACTER['UTF-8']
 
       attr_reader :wrapped_string
       alias to_s wrapped_string
@@ -308,23 +299,23 @@ module ActiveSupport #:nodoc:
       def rstrip
         chars(@wrapped_string.gsub(UNICODE_TRAILERS_PAT, ''))
       end
-      
+
       # Strips entire range of Unicode whitespace from the left of the string.
       def lstrip
         chars(@wrapped_string.gsub(UNICODE_LEADERS_PAT, ''))
       end
-      
+
       # Strips entire range of Unicode whitespace from the right and left of the string.
       def strip
         rstrip.lstrip
       end
-      
+
       # Returns the number of codepoints in the string
       def size
         self.class.u_unpack(@wrapped_string).size
       end
       alias_method :length, :size
-      
+
       # Reverses all characters in the string.
       #
       # Example:
@@ -332,7 +323,7 @@ module ActiveSupport #:nodoc:
       def reverse
         chars(self.class.u_unpack(@wrapped_string).reverse.pack('U*'))
       end
-      
+
       # Implements Unicode-aware slice with codepoints. Slicing on one point returns the codepoints for that
       # character.
       #
@@ -647,7 +638,7 @@ module ActiveSupport #:nodoc:
           string.split(//u).map do |c|
             c.force_encoding(Encoding::ASCII) if c.respond_to?(:force_encoding)
 
-            if !UTF8_PAT.match(c)
+            if !ActiveSupport::Multibyte::VALID_CHARACTER['UTF-8'].match(c)
               n = c.unpack('C')[0]
               n &lt; 128 ? n.chr :
               n &lt; 160 ? [UCD.cp1252[n] || n].pack('U') :</diff>
      <filename>activesupport/lib/active_support/multibyte/chars.rb</filename>
    </modified>
  </modified>
  <removed type="array"/>
  <parents type="array">
    <parent>
      <id>5e6dab8b34152bc48c89032d20e5bda1511e28fb</id>
    </parent>
  </parents>
  <author>
    <name>Michael Koziarski</name>
    <login>NZKoz</login>
    <email>michael@koziarski.com</email>
  </author>
  <url>http://github.com/rails/rails/commit/9a73630d935e360f3dc896e50dd673afb97cf3b5</url>
  <id>9a73630d935e360f3dc896e50dd673afb97cf3b5</id>
  <committed-date>2009-09-03T14:25:38-07:00</committed-date>
  <authored-date>2009-08-31T12:16:22-07:00</authored-date>
  <message>Add verify and clean methods to ActiveSupport::Multibyte.

When accepting character input from outside of your application you can't
blindly trust that all strings are properly encoded. With these methods
you can check incoming strings and clean them up if necessary.

Signed-off-by: Michael Koziarski &lt;michael@koziarski.com&gt;

Conflicts:

	activesupport/lib/active_support/multibyte.rb</message>
  <tree>e404f7dbfc142a10b45b758f0cea23812abc9f23</tree>
  <committer>
    <name>Michael Koziarski</name>
    <login>NZKoz</login>
    <email>michael@koziarski.com</email>
  </committer>
</commit>
