0
module ActiveSupport #:nodoc:
0
module Multibyte #:nodoc:
0
- # Chars enables you to work transparently with
multibyte encodings in the Ruby String class without having extensive
0
+ # Chars enables you to work transparently with
UTF-8 encoding in the Ruby String class without having extensive
0
# knowledge about the encoding. A Chars object accepts a string upon initialization and proxies String methods in an
0
# encoding safe manner. All the normal String methods are also implemented on the proxy.
0
@@ -88,14 +88,14 @@ module ActiveSupport #:nodoc:
0
alias to_s wrapped_string
0
alias to_str wrapped_string
0
- # Creates a new Chars instance. +string+ is the wrapped string.
0
if '1.9'.respond_to?(:force_encoding)
0
+ # Creates a new Chars instance by wrapping _string_.
0
@wrapped_string = string
0
@wrapped_string.force_encoding(Encoding::UTF_8) unless @wrapped_string.frozen?
0
- def initialize(string)
0
+ def initialize(string)
#:nodoc:0
@wrapped_string = string
0
@@ -116,10 +116,10 @@ module ActiveSupport #:nodoc:
0
super || @wrapped_string.respond_to?(method, include_private) || false
0
- # Returns +true+ if the Chars class can and should act as a proxy for the string
+string+. Returns
0
+ # Returns +true+ if the Chars class can and should act as a proxy for the string
_string_. Returns
0
def self.wants?(string)
0
-
RUBY_VERSION < '1.9' && $KCODE == 'UTF8' && consumes?(string)
0
+
$KCODE == 'UTF8' && consumes?(string)
0
# Returns +true+ when the proxy class can handle the string. Returns +false+ otherwise.
0
@@ -133,9 +133,9 @@ module ActiveSupport #:nodoc:
0
- # Returns -1, 0 or +1 depending on whether the Chars object is to be sorted before, equal or after the
0
- # object on the right side of the operation. It accepts any object that implements +to_s+. See String.<=>
0
+ # Returns <tt>-1</tt>, <tt>0</tt> or <tt>+1</tt> depending on whether the Chars object is to be sorted before,
0
+ # equal or after the object on the right side of the operation. It accepts any object that implements +to_s+.
0
+ # See <tt>String#<=></tt> for more details.
0
# 'é'.mb_chars <=> 'ü'.mb_chars #=> -1
0
@@ -143,7 +143,7 @@ module ActiveSupport #:nodoc:
0
@wrapped_string <=> other.to_s
0
- # Returns a new Chars object containing the
other object concatenated to the string.
0
+ # Returns a new Chars object containing the
_other_ object concatenated to the string.
0
# ('Café'.mb_chars + ' périferôl').to_s #=> "Café périferôl"
0
@@ -151,7 +151,7 @@ module ActiveSupport #:nodoc:
0
- # Like
String.=~ only it returns the character offset (in codepoints) instead of the byte offset.
0
+ # Like
<tt>String#=~</tt> only it returns the character offset (in codepoints) instead of the byte offset.
0
# 'Café périferôl'.mb_chars =~ /ô/ #=> 12
0
@@ -159,7 +159,7 @@ module ActiveSupport #:nodoc:
0
translate_offset(@wrapped_string =~ other)
0
- # Works just like
String#split, with the exception that the items in the resulting list are Chars
0
+ # Works just like
<tt>String#split</tt>, with the exception that the items in the resulting list are Chars
0
# instances instead of String. This makes chaining methods easier.
0
@@ -168,7 +168,7 @@ module ActiveSupport #:nodoc:
0
@wrapped_string.split(*args).map { |i| i.mb_chars }
0
- # Inserts the passed string at specified codepoint offsets
0
+ # Inserts the passed string at specified codepoint offsets
.0
# 'Café'.mb_chars.insert(4, ' périferôl').to_s #=> "Café périferôl"
0
@@ -184,7 +184,7 @@ module ActiveSupport #:nodoc:
0
- # Returns
true if contained string contains +other+. Returns false otherwise.
0
+ # Returns
+true+ if contained string contains _other_. Returns +false+ otherwise.
0
# 'Café'.mb_chars.include?('é') #=> true
0
@@ -193,17 +193,17 @@ module ActiveSupport #:nodoc:
0
@wrapped_string.include?(other)
0
- # Returns the position
of the passed argument in the string, counting in codepoints0
+ # Returns the position
_needle_ in the string, counting in codepoints. Returns +nil+ if _needle_ isn't found.0
# 'Café périferôl'.mb_chars.index('ô') #=> 12
0
- index = @wrapped_string.index(*args)
0
+ # 'Café périferôl'.mb_chars.index(/\w/u) #=> 0
0
+ def index(needle, offset=0)
0
+ index = @wrapped_string.index(needle, offset)
0
index ? (self.class.u_unpack(@wrapped_string.slice(0...index)).size) : nil
0
- # Works just like the indexed replace method on string, except instead of byte offsets you specify
0
+ # Like <tt>String#[]=</tt>, except instead of byte offsets you specify character offsets.
0
@@ -243,7 +243,7 @@ module ActiveSupport #:nodoc:
0
- # Works just like
String#rjust, only integer specifies characters instead of bytes.
0
+ # Works just like
<tt>String#rjust</tt>, only integer specifies characters instead of bytes.
0
@@ -256,7 +256,7 @@ module ActiveSupport #:nodoc:
0
justify(integer, :right, padstr)
0
- # Works just like
String#ljust, only integer specifies characters instead of bytes.
0
+ # Works just like
<tt>String#ljust</tt>, only integer specifies characters instead of bytes.
0
@@ -269,7 +269,7 @@ module ActiveSupport #:nodoc:
0
justify(integer, :left, padstr)
0
- # Works just like
String#center, only integer specifies characters instead of bytes.
0
+ # Works just like
<tt>String#center</tt>, only integer specifies characters instead of bytes.
0
@@ -303,7 +303,7 @@ module ActiveSupport #:nodoc:
0
alias_method :length, :size
0
- # Reverses all characters in the string
0
+ # Reverses all characters in the string
.0
# 'Café'.mb_chars.reverse.to_s #=> 'éfaC'
0
@@ -338,7 +338,7 @@ module ActiveSupport #:nodoc:
0
alias_method :[], :slice
0
- # Convert characters in the string to uppercase
0
+ # Convert characters in the string to uppercase
.0
# 'Laurent, òu sont les tests?'.mb_chars.upcase.to_s #=> "LAURENT, ÒU SONT LES TESTS?"
0
@@ -346,7 +346,7 @@ module ActiveSupport #:nodoc:
0
apply_mapping :uppercase_mapping
0
- # Convert characters in the string to lowercase
0
+ # Convert characters in the string to lowercase
.0
# 'VĚDA A VÝZKUM'.mb_chars.downcase.to_s #=> "věda a výzkum"
0
@@ -354,7 +354,7 @@ module ActiveSupport #:nodoc:
0
apply_mapping :lowercase_mapping
0
- # Converts the first character to uppercase and the remainder to lowercase
0
+ # Converts the first character to uppercase and the remainder to lowercase
.0
# 'über'.mb_chars.capitalize.to_s #=> "Über"
0
@@ -413,6 +413,7 @@ module ActiveSupport #:nodoc:
0
self.class.g_unpack(@wrapped_string).length
0
+ # Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent resulting in a valid UTF-8 string.
0
chars(self.class.tidy_bytes(@wrapped_string))
0
@@ -430,24 +431,35 @@ module ActiveSupport #:nodoc:
0
- # Unpack the string at codepoints boundaries
0
+ # Unpack the string at codepoints boundaries. Raises an EncodingError when the encoding of the string isn't
0
+ # Chars.u_unpack('Café') #=> [67, 97, 102, 233]
0
raise EncodingError.new('malformed UTF-8 character')
0
- # Detect whether the codepoint is in a certain character class. Primarily used by the
0
- # grapheme cluster support.
0
+ # Detect whether the codepoint is in a certain character class. Returns +true+ when it's in the specified
0
+ # character class and +false+ otherwise. Valid character classes are: <tt>:cr</tt>, <tt>:lf</tt>, <tt>:l</tt>,
0
+ # <tt>:v</tt>, <tt>:lv</tt>, <tt>:lvt</tt> and <tt>:t</tt>.
0
+ # Primarily used by the grapheme cluster support.
0
def in_char_class?(codepoint, classes)
0
classes.detect { |c| UCD.boundary[c] === codepoint } ? true : false
0
- # Unpack the string at grapheme boundaries
0
- codepoints = u_unpack(str)
0
+ # Unpack the string at grapheme boundaries. Returns a list of character lists.
0
+ # Chars.g_unpack('क्षि') #=> [[2325, 2381], [2359], [2367]]
0
+ # Chars.g_unpack('Café') #=> [[67], [97], [102], [233]]
0
+ codepoints = u_unpack(string)
0
@@ -476,13 +488,15 @@ module ActiveSupport #:nodoc:
0
- # Reverse operation of g_unpack
0
+ # Reverse operation of g_unpack.
0
+ # Chars.g_pack(Chars.g_unpack('क्षि')) #=> 'क्षि'
0
(unpacked.flatten).pack('U*')
0
- # Generates a padding string of a certain size.
0
- def padding(padsize, padstr=' ')
0
+ def padding(padsize, padstr=' ') #:nodoc:
0
new(padstr * ((padsize / u_unpack(padstr).size) + 1)).slice(0, padsize)
0
@@ -490,7 +504,7 @@ module ActiveSupport #:nodoc:
0
- # Re-order codepoints so the string becomes canonical
0
+ # Re-order codepoints so the string becomes canonical
.0
def reorder_characters(codepoints)
0
length = codepoints.length- 1
0
@@ -506,7 +520,7 @@ module ActiveSupport #:nodoc:
0
- # Decompose composed characters to the decomposed form
0
+ # Decompose composed characters to the decomposed form
.0
def decompose_codepoints(type, codepoints)
0
codepoints.inject([]) do |decomposed, cp|
0
# if it's a hangul syllable starter character
0
@@ -527,7 +541,7 @@ module ActiveSupport #:nodoc:
0
- # Compose decomposed characters to the composed form
0
+ # Compose decomposed characters to the composed form
.0
def compose_codepoints(codepoints)
0
eoa = codepoints.length - 1
0
@@ -586,9 +600,9 @@ module ActiveSupport #:nodoc:
0
- # Replaces all the non-UTF-8 bytes by their iso-8859-1 or cp1252 equivalent resulting in a valid UTF-8 string
0
- str.split(//u).map do |c|
0
+ # Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent resulting in a valid UTF-8 string.
0
+ def tidy_bytes(string)
0
+ string.split(//u).map do |c|
0
@@ -603,8 +617,7 @@ module ActiveSupport #:nodoc:
0
- # Translate a byte offset in the wrapped string to a character offset by looking for the character boundary
0
- def translate_offset(byte_offset)
0
+ def translate_offset(byte_offset) #:nodoc:
0
return nil if byte_offset.nil?
0
return 0 if @wrapped_string == ''
0
chunk = @wrapped_string[0..byte_offset]
0
@@ -624,9 +637,7 @@ module ActiveSupport #:nodoc:
0
- # Justifies a string in a certain way. Valid values for <tt>way</tt> are <tt>:right</tt>, <tt>:left</tt> and
0
- def justify(integer, way, padstr=' ')
0
+ def justify(integer, way, padstr=' ') #:nodoc:
0
raise ArgumentError, "zero width padding" if padstr.length == 0
0
padsize = integer - size
0
padsize = padsize > 0 ? padsize : 0
0
@@ -643,8 +654,7 @@ module ActiveSupport #:nodoc:
0
- # Map codepoints to one of it's attributes.
0
- def apply_mapping(mapping)
0
+ def apply_mapping(mapping) #:nodoc:
0
chars(self.class.u_unpack(@wrapped_string).map do |codepoint|
0
cp = UCD.codepoints[codepoint]
0
if cp and (ncp = cp.send(mapping)) and ncp > 0
0
@@ -655,9 +665,8 @@ module ActiveSupport #:nodoc:
0
- # Creates a new instance
0
+ def chars(string) #:nodoc:
0
+ self.class.new(string)
Comments
No one has commented yet.