public
Fork of rails/rails
Description: Ruby on Rails
Homepage: http://rubyonrails.org
Clone URL: git://github.com/Manfred/rails.git
Improve documentation.
Manfred (author)
Fri Jun 13 05:27:07 -0700 2008
commit  1a3e4e46f132457946ce84725b5aaa6ebee2dfdb
tree    8dc77c72a8773a6aab37bbe5e323605b274d0dda
parent  0f98d27d4da5037ab1423f99226152531b60e52e
...
6
7
8
9
 
 
 
10
11
12
...
19
20
21
22
23
24
 
 
25
26
 
27
28
29
...
32
33
34
35
 
36
37
38
39
40
 
41
42
43
...
56
57
58
59
60
61
62
 
63
64
65
66
67
 
68
69
70
...
6
7
8
 
9
10
11
12
13
14
...
21
22
23
 
 
 
24
25
26
 
27
28
29
30
...
33
34
35
 
36
37
38
39
40
 
41
42
43
44
...
57
58
59
 
 
 
 
60
61
62
63
 
 
64
65
66
67
0
@@ -6,7 +6,9 @@ module ActiveSupport #:nodoc:
0
       # Implements multibyte methods for easier access to multibyte characters in a String instance.
0
       module Multibyte
0
         unless '1.9'.respond_to?(:force_encoding)
0
- # +mb_chars+ is a multibyte safe proxy method for string methods.
0
+ # == Multibyte proxy
0
+ #
0
+ # +mb_chars+ is a multibyte safe proxy for string methods.
0
           #
0
           # In Ruby 1.8 and older it creates and returns an instance of the ActiveSupport::Multibyte::Chars class which
0
           # encapsulates the original string. A Unicode safe version of all the String methods are defined on this proxy
0
@@ -19,11 +21,10 @@ module ActiveSupport #:nodoc:
0
           # name.mb_chars.reverse.to_s #=> "rellüM sualC"
0
           # name.mb_chars.length #=> 12
0
           #
0
- # In Ruby 1.9 and newer +mb_chars+ returns +self+ because String is (mostly) encoding aware so we don't need
0
- # a proxy class any more. This means that +mb_chars+ makes it easier to write code that runs on multiple Ruby
0
- # versions.
0
+ # In Ruby 1.9 and newer +mb_chars+ returns +self+ because String is (mostly) encoding aware. This means that
0
+ # it becomes easy to run one version of your code on multiple Ruby versions.
0
           #
0
- # == Method chaining
0
+ # == Method chaining
0
           #
0
           # All the methods on the Chars proxy which normally return a string will return a Chars object. This allows
0
           # method chaining on the result of any of these methods.
0
@@ -32,12 +33,12 @@ module ActiveSupport #:nodoc:
0
           #
0
           # == Interoperability and configuration
0
           #
0
- # The Char object tries to be as interchangeable with String objects as possible: sorting and comparing between
0
+ # The Chars object tries to be as interchangeable with String objects as possible: sorting and comparing between
0
           # String and Char work like expected. The bang! methods change the internal string representation in the Chars
0
           # object. Interoperability problems can be resolved easily with a +to_s+ call.
0
           #
0
           # For more information about the methods defined on the Chars proxy see ActiveSupport::Multibyte::Chars. For
0
- # information about how to change the default Multibyte behaviour, see ActiveSupport::Multibyte.
0
+ # information about how to change the default Multibyte behaviour see ActiveSupport::Multibyte.
0
           def mb_chars
0
             if ActiveSupport::Multibyte.proxy_class.wants?(self)
0
               ActiveSupport::Multibyte.proxy_class.new(self)
0
@@ -56,15 +57,11 @@ module ActiveSupport #:nodoc:
0
             alias chars mb_chars
0
           end
0
         else
0
- # In Ruby 1.9 and newer +mb_chars+ returns self. In Ruby 1.8 and older +mb_chars+ creates and returns an
0
- # Unicode safe proxy for string operations, this makes it easier to write code that runs on multiple Ruby
0
- # versions.
0
- def mb_chars
0
+ def mb_chars #:nodoc
0
             self
0
           end
0
           
0
- # Returns true if the string has valid UTF-8 encoding.
0
- def is_utf8?
0
+ def is_utf8? #:nodoc
0
             case encoding
0
             when Encoding::UTF_8
0
               valid_encoding?
...
5
6
7
8
 
9
10
11
...
30
31
32
33
34
 
...
5
6
7
 
8
9
10
11
...
30
31
32
 
33
34
0
@@ -5,7 +5,7 @@ require 'active_support/multibyte/exceptions'
0
 require 'active_support/multibyte/unicode_database'
0
 
0
 module ActiveSupport #:nodoc:
0
- module Multibyte #:nodoc:
0
+ module Multibyte
0
     # A list of all available normalization forms. See http://www.unicode.org/reports/tr15/tr15-29.html for more
0
     # information about normalization.
0
     NORMALIZATIONS_FORMS = [:c, :kc, :d, :kd]
0
@@ -30,4 +30,4 @@ module ActiveSupport #:nodoc:
0
     mattr_accessor :proxy_class
0
     self.proxy_class = ActiveSupport::Multibyte::Chars
0
   end
0
-end
0
\ No newline at end of file
0
+end
...
2
3
4
5
 
6
7
8
...
88
89
90
91
92
 
93
94
95
96
97
98
 
99
100
101
...
116
117
118
119
 
120
121
122
 
123
124
125
...
133
134
135
136
137
138
 
 
 
139
140
141
...
143
144
145
146
 
147
148
149
...
151
152
153
154
 
155
156
157
...
159
160
161
162
 
163
164
165
...
168
169
170
171
 
172
173
174
...
184
185
186
187
 
188
189
190
...
193
194
195
196
 
197
198
199
200
201
 
 
 
202
203
204
205
206
 
207
208
209
...
243
244
245
246
 
247
248
249
...
256
257
258
259
 
260
261
262
...
269
270
271
272
 
273
274
275
...
303
304
305
306
 
307
308
309
...
338
339
340
341
 
342
343
344
...
346
347
348
349
 
350
351
352
...
354
355
356
357
 
358
359
360
...
413
414
415
 
416
417
418
...
430
431
432
433
434
 
 
 
 
 
 
435
436
 
437
438
439
440
441
442
443
 
 
 
 
 
444
445
446
447
448
449
450
 
 
 
 
 
 
 
451
452
453
...
476
477
478
479
 
 
 
 
480
481
482
483
484
485
 
486
487
488
...
490
491
492
493
 
494
495
496
...
506
507
508
509
 
510
511
512
...
527
528
529
530
 
531
532
533
...
586
587
588
589
590
591
 
 
 
592
593
594
...
603
604
605
606
607
 
608
609
610
...
624
625
626
627
628
629
 
630
631
632
...
643
644
645
646
647
 
648
649
650
...
655
656
657
658
659
660
 
 
661
662
663
...
2
3
4
 
5
6
7
8
...
88
89
90
 
91
92
93
94
95
96
97
 
98
99
100
101
...
116
117
118
 
119
120
121
 
122
123
124
125
...
133
134
135
 
 
 
136
137
138
139
140
141
...
143
144
145
 
146
147
148
149
...
151
152
153
 
154
155
156
157
...
159
160
161
 
162
163
164
165
...
168
169
170
 
171
172
173
174
...
184
185
186
 
187
188
189
190
...
193
194
195
 
196
197
198
199
 
 
200
201
202
203
204
205
 
 
206
207
208
209
...
243
244
245
 
246
247
248
249
...
256
257
258
 
259
260
261
262
...
269
270
271
 
272
273
274
275
...
303
304
305
 
306
307
308
309
...
338
339
340
 
341
342
343
344
...
346
347
348
 
349
350
351
352
...
354
355
356
 
357
358
359
360
...
413
414
415
416
417
418
419
...
431
432
433
 
 
434
435
436
437
438
439
440
 
441
442
443
444
445
446
 
 
447
448
449
450
451
452
453
454
455
 
 
 
456
457
458
459
460
461
462
463
464
465
...
488
489
490
 
491
492
493
494
495
496
497
498
 
 
499
500
501
502
...
504
505
506
 
507
508
509
510
...
520
521
522
 
523
524
525
526
...
541
542
543
 
544
545
546
547
...
600
601
602
 
 
 
603
604
605
606
607
608
...
617
618
619
 
 
620
621
622
623
...
637
638
639
 
 
 
640
641
642
643
...
654
655
656
 
 
657
658
659
660
...
665
666
667
 
 
 
668
669
670
671
672
0
@@ -2,7 +2,7 @@
0
 
0
 module ActiveSupport #:nodoc:
0
   module Multibyte #:nodoc:
0
- # Chars enables you to work transparently with multibyte encodings in the Ruby String class without having extensive
0
+ # Chars enables you to work transparently with UTF-8 encoding in the Ruby String class without having extensive
0
     # knowledge about the encoding. A Chars object accepts a string upon initialization and proxies String methods in an
0
     # encoding safe manner. All the normal String methods are also implemented on the proxy.
0
     #
0
@@ -88,14 +88,14 @@ module ActiveSupport #:nodoc:
0
       alias to_s wrapped_string
0
       alias to_str wrapped_string
0
 
0
- # Creates a new Chars instance. +string+ is the wrapped string.
0
       if '1.9'.respond_to?(:force_encoding)
0
+ # Creates a new Chars instance by wrapping _string_.
0
         def initialize(string)
0
           @wrapped_string = string
0
           @wrapped_string.force_encoding(Encoding::UTF_8) unless @wrapped_string.frozen?
0
         end
0
       else
0
- def initialize(string)
0
+ def initialize(string) #:nodoc:
0
           @wrapped_string = string
0
         end
0
       end
0
@@ -116,10 +116,10 @@ module ActiveSupport #:nodoc:
0
         super || @wrapped_string.respond_to?(method, include_private) || false
0
       end
0
 
0
- # Returns +true+ if the Chars class can and should act as a proxy for the string +string+. Returns
0
+ # Returns +true+ if the Chars class can and should act as a proxy for the string _string_. Returns
0
       # +false+ otherwise.
0
       def self.wants?(string)
0
- RUBY_VERSION < '1.9' && $KCODE == 'UTF8' && consumes?(string)
0
+ $KCODE == 'UTF8' && consumes?(string)
0
       end
0
 
0
       # Returns +true+ when the proxy class can handle the string. Returns +false+ otherwise.
0
@@ -133,9 +133,9 @@ module ActiveSupport #:nodoc:
0
 
0
       include Comparable
0
 
0
- # Returns -1, 0 or +1 depending on whether the Chars object is to be sorted before, equal or after the
0
- # object on the right side of the operation. It accepts any object that implements +to_s+. See String.<=>
0
- # for more details.
0
+ # Returns <tt>-1</tt>, <tt>0</tt> or <tt>+1</tt> depending on whether the Chars object is to be sorted before,
0
+ # equal or after the object on the right side of the operation. It accepts any object that implements +to_s+.
0
+ # See <tt>String#<=></tt> for more details.
0
       #
0
       # Example:
0
       # 'é'.mb_chars <=> 'ü'.mb_chars #=> -1
0
@@ -143,7 +143,7 @@ module ActiveSupport #:nodoc:
0
         @wrapped_string <=> other.to_s
0
       end
0
 
0
- # Returns a new Chars object containing the other object concatenated to the string.
0
+ # Returns a new Chars object containing the _other_ object concatenated to the string.
0
       #
0
       # Example:
0
       # ('Café'.mb_chars + ' périferôl').to_s #=> "Café périferôl"
0
@@ -151,7 +151,7 @@ module ActiveSupport #:nodoc:
0
         self << other
0
       end
0
 
0
- # Like String.=~ only it returns the character offset (in codepoints) instead of the byte offset.
0
+ # Like <tt>String#=~</tt> only it returns the character offset (in codepoints) instead of the byte offset.
0
       #
0
       # Example:
0
       # 'Café périferôl'.mb_chars =~ /ô/ #=> 12
0
@@ -159,7 +159,7 @@ module ActiveSupport #:nodoc:
0
         translate_offset(@wrapped_string =~ other)
0
       end
0
 
0
- # Works just like String#split, with the exception that the items in the resulting list are Chars
0
+ # Works just like <tt>String#split</tt>, with the exception that the items in the resulting list are Chars
0
       # instances instead of String. This makes chaining methods easier.
0
       #
0
       # Example:
0
@@ -168,7 +168,7 @@ module ActiveSupport #:nodoc:
0
         @wrapped_string.split(*args).map { |i| i.mb_chars }
0
       end
0
 
0
- # Inserts the passed string at specified codepoint offsets
0
+ # Inserts the passed string at specified codepoint offsets.
0
       #
0
       # Example:
0
       # 'Café'.mb_chars.insert(4, ' périferôl').to_s #=> "Café périferôl"
0
@@ -184,7 +184,7 @@ module ActiveSupport #:nodoc:
0
         self
0
       end
0
 
0
- # Returns true if contained string contains +other+. Returns false otherwise.
0
+ # Returns +true+ if contained string contains _other_. Returns +false+ otherwise.
0
       #
0
       # Example:
0
       # 'Café'.mb_chars.include?('é') #=> true
0
@@ -193,17 +193,17 @@ module ActiveSupport #:nodoc:
0
         @wrapped_string.include?(other)
0
       end
0
 
0
- # Returns the position of the passed argument in the string, counting in codepoints
0
+ # Returns the position _needle_ in the string, counting in codepoints. Returns +nil+ if _needle_ isn't found.
0
       #
0
       # Example:
0
       # 'Café périferôl'.mb_chars.index('ô') #=> 12
0
- def index(*args)
0
- index = @wrapped_string.index(*args)
0
+ # 'Café périferôl'.mb_chars.index(/\w/u) #=> 0
0
+ def index(needle, offset=0)
0
+ index = @wrapped_string.index(needle, offset)
0
         index ? (self.class.u_unpack(@wrapped_string.slice(0...index)).size) : nil
0
       end
0
 
0
- # Works just like the indexed replace method on string, except instead of byte offsets you specify
0
- # character offsets.
0
+ # Like <tt>String#[]=</tt>, except instead of byte offsets you specify character offsets.
0
       #
0
       # Example:
0
       #
0
@@ -243,7 +243,7 @@ module ActiveSupport #:nodoc:
0
         end
0
       end
0
 
0
- # Works just like String#rjust, only integer specifies characters instead of bytes.
0
+ # Works just like <tt>String#rjust</tt>, only integer specifies characters instead of bytes.
0
       #
0
       # Example:
0
       #
0
@@ -256,7 +256,7 @@ module ActiveSupport #:nodoc:
0
         justify(integer, :right, padstr)
0
       end
0
 
0
- # Works just like String#ljust, only integer specifies characters instead of bytes.
0
+ # Works just like <tt>String#ljust</tt>, only integer specifies characters instead of bytes.
0
       #
0
       # Example:
0
       #
0
@@ -269,7 +269,7 @@ module ActiveSupport #:nodoc:
0
         justify(integer, :left, padstr)
0
       end
0
 
0
- # Works just like String#center, only integer specifies characters instead of bytes.
0
+ # Works just like <tt>String#center</tt>, only integer specifies characters instead of bytes.
0
       #
0
       # Example:
0
       #
0
@@ -303,7 +303,7 @@ module ActiveSupport #:nodoc:
0
       end
0
       alias_method :length, :size
0
       
0
- # Reverses all characters in the string
0
+ # Reverses all characters in the string.
0
       #
0
       # Example:
0
       # 'Café'.mb_chars.reverse.to_s #=> 'éfaC'
0
@@ -338,7 +338,7 @@ module ActiveSupport #:nodoc:
0
       end
0
       alias_method :[], :slice
0
 
0
- # Convert characters in the string to uppercase
0
+ # Convert characters in the string to uppercase.
0
       #
0
       # Example:
0
       # 'Laurent, òu sont les tests?'.mb_chars.upcase.to_s #=> "LAURENT, ÒU SONT LES TESTS?"
0
@@ -346,7 +346,7 @@ module ActiveSupport #:nodoc:
0
         apply_mapping :uppercase_mapping
0
       end
0
 
0
- # Convert characters in the string to lowercase
0
+ # Convert characters in the string to lowercase.
0
       #
0
       # Example:
0
       # 'VĚDA A VÝZKUM'.mb_chars.downcase.to_s #=> "věda a výzkum"
0
@@ -354,7 +354,7 @@ module ActiveSupport #:nodoc:
0
         apply_mapping :lowercase_mapping
0
       end
0
 
0
- # Converts the first character to uppercase and the remainder to lowercase
0
+ # Converts the first character to uppercase and the remainder to lowercase.
0
       #
0
       # Example:
0
       # 'über'.mb_chars.capitalize.to_s #=> "Über"
0
@@ -413,6 +413,7 @@ module ActiveSupport #:nodoc:
0
         self.class.g_unpack(@wrapped_string).length
0
       end
0
 
0
+ # Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent resulting in a valid UTF-8 string.
0
       def tidy_bytes
0
         chars(self.class.tidy_bytes(@wrapped_string))
0
       end
0
@@ -430,24 +431,35 @@ module ActiveSupport #:nodoc:
0
 
0
       class << self
0
 
0
- # Unpack the string at codepoints boundaries
0
- def u_unpack(str)
0
+ # Unpack the string at codepoints boundaries. Raises an EncodingError when the encoding of the string isn't
0
+ # valid UTF-8.
0
+ #
0
+ # Example:
0
+ # Chars.u_unpack('Café') #=> [67, 97, 102, 233]
0
+ def u_unpack(string)
0
           begin
0
- str.unpack 'U*'
0
+ string.unpack 'U*'
0
           rescue ArgumentError
0
             raise EncodingError.new('malformed UTF-8 character')
0
           end
0
         end
0
 
0
- # Detect whether the codepoint is in a certain character class. Primarily used by the
0
- # grapheme cluster support.
0
+ # Detect whether the codepoint is in a certain character class. Returns +true+ when it's in the specified
0
+ # character class and +false+ otherwise. Valid character classes are: <tt>:cr</tt>, <tt>:lf</tt>, <tt>:l</tt>,
0
+ # <tt>:v</tt>, <tt>:lv</tt>, <tt>:lvt</tt> and <tt>:t</tt>.
0
+ #
0
+ # Primarily used by the grapheme cluster support.
0
         def in_char_class?(codepoint, classes)
0
           classes.detect { |c| UCD.boundary[c] === codepoint } ? true : false
0
         end
0
 
0
- # Unpack the string at grapheme boundaries
0
- def g_unpack(str)
0
- codepoints = u_unpack(str)
0
+ # Unpack the string at grapheme boundaries. Returns a list of character lists.
0
+ #
0
+ # Example:
0
+ # Chars.g_unpack('क्षि') #=> [[2325, 2381], [2359], [2367]]
0
+ # Chars.g_unpack('Café') #=> [[67], [97], [102], [233]]
0
+ def g_unpack(string)
0
+ codepoints = u_unpack(string)
0
           unpacked = []
0
           pos = 0
0
           marker = 0
0
@@ -476,13 +488,15 @@ module ActiveSupport #:nodoc:
0
           unpacked
0
         end
0
 
0
- # Reverse operation of g_unpack
0
+ # Reverse operation of g_unpack.
0
+ #
0
+ # Example:
0
+ # Chars.g_pack(Chars.g_unpack('क्षि')) #=> 'क्षि'
0
         def g_pack(unpacked)
0
           (unpacked.flatten).pack('U*')
0
         end
0
 
0
- # Generates a padding string of a certain size.
0
- def padding(padsize, padstr=' ')
0
+ def padding(padsize, padstr=' ') #:nodoc:
0
           if padsize != 0
0
             new(padstr * ((padsize / u_unpack(padstr).size) + 1)).slice(0, padsize)
0
           else
0
@@ -490,7 +504,7 @@ module ActiveSupport #:nodoc:
0
           end
0
         end
0
 
0
- # Re-order codepoints so the string becomes canonical
0
+ # Re-order codepoints so the string becomes canonical.
0
         def reorder_characters(codepoints)
0
           length = codepoints.length- 1
0
           pos = 0
0
@@ -506,7 +520,7 @@ module ActiveSupport #:nodoc:
0
           codepoints
0
         end
0
 
0
- # Decompose composed characters to the decomposed form
0
+ # Decompose composed characters to the decomposed form.
0
         def decompose_codepoints(type, codepoints)
0
           codepoints.inject([]) do |decomposed, cp|
0
             # if it's a hangul syllable starter character
0
@@ -527,7 +541,7 @@ module ActiveSupport #:nodoc:
0
           end
0
         end
0
 
0
- # Compose decomposed characters to the composed form
0
+ # Compose decomposed characters to the composed form.
0
         def compose_codepoints(codepoints)
0
           pos = 0
0
           eoa = codepoints.length - 1
0
@@ -586,9 +600,9 @@ module ActiveSupport #:nodoc:
0
           codepoints
0
         end
0
 
0
- # Replaces all the non-UTF-8 bytes by their iso-8859-1 or cp1252 equivalent resulting in a valid UTF-8 string
0
- def tidy_bytes(str)
0
- str.split(//u).map do |c|
0
+ # Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent resulting in a valid UTF-8 string.
0
+ def tidy_bytes(string)
0
+ string.split(//u).map do |c|
0
             if !UTF8_PAT.match(c)
0
               n = c.unpack('C')[0]
0
               n < 128 ? n.chr :
0
@@ -603,8 +617,7 @@ module ActiveSupport #:nodoc:
0
 
0
       protected
0
 
0
- # Translate a byte offset in the wrapped string to a character offset by looking for the character boundary
0
- def translate_offset(byte_offset)
0
+ def translate_offset(byte_offset) #:nodoc:
0
           return nil if byte_offset.nil?
0
           return 0 if @wrapped_string == ''
0
           chunk = @wrapped_string[0..byte_offset]
0
@@ -624,9 +637,7 @@ module ActiveSupport #:nodoc:
0
           end
0
         end
0
 
0
- # Justifies a string in a certain way. Valid values for <tt>way</tt> are <tt>:right</tt>, <tt>:left</tt> and
0
- # <tt>:center</tt>.
0
- def justify(integer, way, padstr=' ')
0
+ def justify(integer, way, padstr=' ') #:nodoc:
0
           raise ArgumentError, "zero width padding" if padstr.length == 0
0
           padsize = integer - size
0
           padsize = padsize > 0 ? padsize : 0
0
@@ -643,8 +654,7 @@ module ActiveSupport #:nodoc:
0
           chars(result)
0
         end
0
 
0
- # Map codepoints to one of it's attributes.
0
- def apply_mapping(mapping)
0
+ def apply_mapping(mapping) #:nodoc:
0
           chars(self.class.u_unpack(@wrapped_string).map do |codepoint|
0
             cp = UCD.codepoints[codepoint]
0
             if cp and (ncp = cp.send(mapping)) and ncp > 0
0
@@ -655,9 +665,8 @@ module ActiveSupport #:nodoc:
0
           end.pack('U*'))
0
         end
0
 
0
- # Creates a new instance
0
- def chars(str)
0
- self.class.new(str)
0
+ def chars(string) #:nodoc:
0
+ self.class.new(string)
0
         end
0
     end
0
   end
...
2
3
4
 
5
6
7
8
...
2
3
4
5
6
7
8
9
0
@@ -2,6 +2,7 @@
0
 
0
 module ActiveSupport #:nodoc:
0
   module Multibyte #:nodoc:
0
+ # Raised when a problem with the encoding was found.
0
     class EncodingError < StandardError; end
0
   end
0
 end
0
\ No newline at end of file

Comments

    No one has commented yet.