public
Fork of mojombo/chronic
Description: Chronic is a pure Ruby natural language date parser.
Homepage: http://chronic.rubyforge.org
Clone URL: git://github.com/technoweenie/chronic.git
fix numerizer bugs
mojombo (author)
Sat Jan 05 18:33:21 -0800 2008
commit  79527a32052d3234454286595118a6aa5b0f0efa
tree    27c5c25015ef4cecb3e1c95169ae88f65d039cfd
parent  90530c515be41894982fd58da4fdd578b378f996
...
1
 
 
 
 
 
 
 
 
2
3
4
...
 
1
2
3
4
5
6
7
8
9
10
11
0
@@ -1,4 +1,11 @@
0
-= HEAD
0
+= 0.2.4
0
+
0
+* fixed numerizer number combination bug (27 Oct 2006 7:30pm works now) (reported by reynard h)
0
+* allow numeric timezone offset (e.g -0500)
0
+* disregard commas (so as to not return nil)
0
+* fix parse of (am|pm|oclock) separation to handle "Ham sandwich" properly
0
+
0
+= 0.2.3
0
 
0
 * fixed 12am/12pm (by Nicholas Schlueter)
0
 
...
9
10
11
 
 
12
13
14
...
39
40
41
42
 
 
 
 
 
43
44
 
45
46
47
...
9
10
11
12
13
14
15
16
...
41
42
43
 
44
45
46
47
48
49
 
50
51
52
53
0
@@ -9,6 +9,8 @@
0
 
0
 $:.unshift File.dirname(__FILE__) # For use/testing when no gem is installed
0
 
0
+require 'time'
0
+
0
 require 'chronic/chronic'
0
 require 'chronic/handlers'
0
 
0
@@ -39,9 +41,13 @@ require 'chronic/time_zone'
0
 require 'numerizer/numerizer'
0
 
0
 module Chronic
0
- VERSION = "0.2.2"
0
+ VERSION = "0.2.3"
0
+
0
+ class << self
0
+ attr_accessor :debug
0
+ end
0
   
0
- def self.debug; false; end
0
+ self.debug = false
0
 end
0
 
0
 alias p_orig p
...
19
20
21
22
 
23
24
25
...
39
40
41
 
 
42
43
44
45
46
47
 
 
 
 
 
48
49
50
...
102
103
104
105
 
 
106
107
108
...
117
118
119
120
 
121
122
123
...
19
20
21
 
22
23
24
25
...
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
...
109
110
111
 
112
113
114
115
116
...
125
126
127
 
128
129
130
131
0
@@ -19,7 +19,7 @@ module Chronic
0
     # Time (defaults to Time.now)
0
     #
0
     # By setting <tt>:now</tt> to a Time, all computations will be based off
0
- # of that time instead of Time.now
0
+ # of that time instead of Time.now. If set to nil, Chronic will use Time.now.
0
     #
0
     # [<tt>:guess</tt>]
0
     # +true+ or +false+ (defaults to +true+)
0
@@ -39,12 +39,19 @@ module Chronic
0
     # will be made, and the first matching instance of that time will
0
     # be used.
0
     def parse(text, specified_options = {})
0
+ @text = text
0
+
0
       # get options and set defaults if necessary
0
       default_options = {:context => :future,
0
                          :now => Time.now,
0
                          :guess => true,
0
                          :ambiguous_time_range => 6}
0
       options = default_options.merge specified_options
0
+
0
+ # handle options that were set to nil
0
+ options[:context] = :future unless options[:context]
0
+ options[:now] = Time.now unless options[:context]
0
+ options[:ambiguous_time_range] = 6 unless options[:ambiguous_time_range]
0
             
0
       # ensure the specified options are valid
0
       specified_options.keys.each do |key|
0
@@ -102,7 +109,8 @@ module Chronic
0
     def pre_normalize(text) #:nodoc:
0
       normalized_text = text.to_s.downcase
0
       normalized_text = numericize_numbers(normalized_text)
0
- normalized_text.gsub!(/['"\.]/, '')
0
+ normalized_text.gsub!(/['"\.,]/, '')
0
+ normalized_text.gsub!(/ \-(\d{4})\b/, ' tzminus\1')
0
       normalized_text.gsub!(/([\/\-\,\@])/) { ' ' + $1 + ' ' }
0
       normalized_text.gsub!(/\btoday\b/, 'this day')
0
       normalized_text.gsub!(/\btomm?orr?ow\b/, 'next day')
0
@@ -117,7 +125,7 @@ module Chronic
0
       normalized_text.gsub!(/\b(?:in|during) the (morning)\b/, '\1')
0
       normalized_text.gsub!(/\b(?:in the|during the|at) (afternoon|evening|night)\b/, '\1')
0
       normalized_text.gsub!(/\btonight\b/, 'this night')
0
- normalized_text.gsub!(/(?=\w)([ap]m|oclock)\b/, ' \1')
0
+ normalized_text.gsub!(/(\d)([ap]m|oclock)\b/, '\1 \2')
0
       normalized_text.gsub!(/\b(hence|after|from)\b/, 'future')
0
       normalized_text = numericize_ordinals(normalized_text)
0
     end
...
6
7
8
9
 
10
11
12
...
34
35
36
37
 
38
39
40
...
133
134
135
136
137
138
139
140
141
142
143
144
145
 
 
146
147
148
...
6
7
8
 
9
10
11
12
...
34
35
36
 
37
38
39
40
...
133
134
135
 
 
 
 
 
 
 
 
 
 
136
137
138
139
140
0
@@ -6,7 +6,7 @@ module Chronic
0
    @definitions ||=
0
       {:time => [Handler.new([:repeater_time, :repeater_day_portion?], nil)],
0
         
0
- :date => [Handler.new([:repeater_day_name, :repeater_month_name, :scalar_day, :repeater_time, :time_zone, :scalar_year], :handle_rdn_rmn_sd_t_tz_sy),
0
+ :date => [Handler.new([:repeater_day_name, :repeater_month_name, :scalar_day, :repeater_time, :separator_slash_or_dash?, :time_zone, :scalar_year], :handle_rdn_rmn_sd_t_tz_sy),
0
                  Handler.new([:repeater_month_name, :scalar_day, :scalar_year], :handle_rmn_sd_sy),
0
                  Handler.new([:repeater_month_name, :scalar_day, :scalar_year, :separator_at?, 'time?'], :handle_rmn_sd_sy),
0
                  Handler.new([:repeater_month_name, :scalar_day, :separator_at?, 'time?'], :handle_rmn_sd),
0
@@ -34,7 +34,7 @@ module Chronic
0
       }
0
     end
0
     
0
- def tokens_to_span(tokens, options) #:nodoc:
0
+ def tokens_to_span(tokens, options) #:nodoc:
0
       # maybe it's a specific date
0
       
0
       self.definitions[:date].each do |handler|
0
@@ -133,16 +133,8 @@ module Chronic
0
     end
0
     
0
     def handle_rdn_rmn_sd_t_tz_sy(tokens, options) #:nodoc:
0
- month = tokens[1].get_tag(RepeaterMonthName).index
0
- day = tokens[2].get_tag(ScalarDay).type
0
- year = tokens[5].get_tag(ScalarYear).type
0
-
0
- begin
0
- day_start = Time.local(year, month, day)
0
- day_or_time(day_start, [tokens[3]], options)
0
- rescue ArgumentError
0
- nil
0
- end
0
+ t = Time.parse(@text)
0
+ Span.new(t, t + 1)
0
     end
0
     
0
     def handle_rmn_sd_sy(tokens, options) #:nodoc:
...
26
27
28
 
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
48
49
...
26
27
28
29
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
0
@@ -26,24 +26,26 @@ class Chronic::RepeaterTime < Chronic::Repeater #:nodoc:
0
   
0
   def initialize(time, options = {})
0
     t = time.gsub(/\:/, '')
0
+
0
     @type =
0
- if (1..2) === t.size
0
- hours = t.to_i
0
- hours == 12 ? Tick.new(0 * 60 * 60, true) : Tick.new(hours * 60 * 60, true)
0
- elsif t.size == 3
0
- Tick.new((t[0..0].to_i * 60 * 60) + (t[1..2].to_i * 60), true)
0
- elsif t.size == 4
0
- ambiguous = time =~ /:/ && t[0..0].to_i != 0 && t[0..1].to_i <= 12
0
- hours = t[0..1].to_i
0
- hours == 12 ? Tick.new(0 * 60 * 60 + t[2..3].to_i * 60, ambiguous) : Tick.new(hours * 60 * 60 + t[2..3].to_i * 60, ambiguous)
0
- elsif t.size == 5
0
- Tick.new(t[0..0].to_i * 60 * 60 + t[1..2].to_i * 60 + t[3..4].to_i, true)
0
- elsif t.size == 6
0
- ambiguous = time =~ /:/ && t[0..0].to_i != 0 && t[0..1].to_i <= 12
0
- hours = t[0..1].to_i
0
- hours == 12 ? Tick.new(0 * 60 * 60 + t[2..3].to_i * 60 + t[4..5].to_i, ambiguous) : Tick.new(hours * 60 * 60 + t[2..3].to_i * 60 + t[4..5].to_i, ambiguous)
0
- else
0
- raise("Time cannot exceed six digits")
0
+ case t.size
0
+ when 1..2
0
+ hours = t.to_i
0
+ hours == 12 ? Tick.new(0 * 60 * 60, true) : Tick.new(hours * 60 * 60, true)
0
+ when 3
0
+ Tick.new((t[0..0].to_i * 60 * 60) + (t[1..2].to_i * 60), true)
0
+ when 4
0
+ ambiguous = time =~ /:/ && t[0..0].to_i != 0 && t[0..1].to_i <= 12
0
+ hours = t[0..1].to_i
0
+ hours == 12 ? Tick.new(0 * 60 * 60 + t[2..3].to_i * 60, ambiguous) : Tick.new(hours * 60 * 60 + t[2..3].to_i * 60, ambiguous)
0
+ when 5
0
+ Tick.new(t[0..0].to_i * 60 * 60 + t[1..2].to_i * 60 + t[3..4].to_i, true)
0
+ when 6
0
+ ambiguous = time =~ /:/ && t[0..0].to_i != 0 && t[0..1].to_i <= 12
0
+ hours = t[0..1].to_i
0
+ hours == 12 ? Tick.new(0 * 60 * 60 + t[2..3].to_i * 60 + t[4..5].to_i, ambiguous) : Tick.new(hours * 60 * 60 + t[2..3].to_i * 60 + t[4..5].to_i, ambiguous)
0
+ else
0
+ raise("Time cannot exceed six digits")
0
     end
0
   end
0
   
...
1
2
 
3
4
5
...
8
9
10
11
 
 
12
13
14
...
1
 
2
3
4
5
...
8
9
10
 
11
12
13
14
15
0
@@ -1,5 +1,5 @@
0
 module Chronic
0
- class TimeZone < Tag #:nodoc:
0
+ class TimeZone < Tag #:nodoc:
0
     def self.scan(tokens)
0
       tokens.each_index do |i|
0
         if t = self.scan_for_all(tokens[i]) then tokens[i].tag(t); next end
0
@@ -8,7 +8,8 @@ module Chronic
0
     end
0
 
0
     def self.scan_for_all(token)
0
- scanner = {/[PMCE][DS]T/i => :tz}
0
+ scanner = {/[PMCE][DS]T/i => :tz,
0
+ /(tzminus)?\d{4}/ => :tz}
0
       scanner.keys.each do |scanner_item|
0
         return self.new(scanner[scanner_item]) if scanner_item =~ token.word
0
       end
...
44
45
46
47
48
 
49
50
51
52
 
53
54
 
55
56
 
57
58
 
59
60
 
61
62
 
63
64
 
65
66
 
 
 
 
 
67
68
 
69
70
 
71
72
73
74
 
75
76
77
78
79
80
 
 
81
82
83
84
 
 
 
85
86
 
87
88
 
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
...
44
45
46
 
 
47
48
49
50
 
51
52
 
53
54
 
55
56
 
57
58
 
59
60
 
61
62
 
63
64
 
65
66
67
68
69
70
 
71
72
 
73
74
 
75
 
76
77
78
79
80
 
 
81
82
83
84
 
 
85
86
87
88
 
89
90
 
91
92
93
94
95
96
 
 
 
 
 
 
 
 
 
97
98
0
@@ -44,60 +44,54 @@ class Numerizer
0
                     ['trillion', 1_000_000_000_000],
0
                   ]
0
 
0
-class << self
0
- def numerize(string)
0
+ def self.numerize(string)
0
     string = string.dup
0
   
0
     # preprocess
0
- string.gsub!(/ +|([^\d])-([^d])/, '\1 \2') # will mutilate hyphenated-words but shouldn't matter for date extraction
0
+ string.gsub!(/ +|([^\d])-([^\d])/, '\1 \2') # will mutilate hyphenated-words but shouldn't matter for date extraction
0
     string.gsub!(/a half/, 'haAlf') # take the 'a' out so it doesn't turn into a 1, save the half for the end
0
-
0
+
0
     # easy/direct replacements
0
-
0
+
0
     DIRECT_NUMS.each do |dn|
0
- string.gsub!(/#{dn[0]}/i, dn[1])
0
+ string.gsub!(/#{dn[0]}/i, '<num>' + dn[1])
0
     end
0
-
0
+
0
     # ten, twenty, etc.
0
-
0
+
0
     TEN_PREFIXES.each do |tp|
0
- string.gsub!(/(?:#{tp[0]})( *\d(?=[^\d]|$))*/i) { (tp[1] + $1.to_i).to_s }
0
+ string.gsub!(/(?:#{tp[0]}) *<num>(\d(?=[^\d]|$))*/i) { '<num>' + (tp[1] + $1.to_i).to_s }
0
     end
0
-
0
+
0
+ TEN_PREFIXES.each do |tp|
0
+ string.gsub!(/#{tp[0]}/i) { '<num>' + tp[1].to_s }
0
+ end
0
+
0
     # hundreds, thousands, millions, etc.
0
-
0
+
0
     BIG_PREFIXES.each do |bp|
0
- string.gsub!(/(\d*) *#{bp[0]}/i) { (bp[1] * $1.to_i).to_s}
0
+ string.gsub!(/(?:<num>)?(\d*) *#{bp[0]}/i) { '<num>' + (bp[1] * $1.to_i).to_s}
0
       andition(string)
0
- #combine_numbers(string) # Should to be more efficient way to do this
0
     end
0
-
0
+
0
     # fractional addition
0
     # I'm not combining this with the previous block as using float addition complicates the strings
0
     # (with extraneous .0's and such )
0
     string.gsub!(/(\d+)(?: | and |-)*haAlf/i) { ($1.to_f + 0.5).to_s }
0
-
0
- string
0
+
0
+ string.gsub(/<num>/, '')
0
   end
0
 
0
-private
0
- def andition(string)
0
+ private
0
+
0
+ def self.andition(string)
0
     sc = StringScanner.new(string)
0
- while(sc.scan_until(/(\d+)( | and )(\d+)(?=[^\w]|$)/i))
0
+ while(sc.scan_until(/<num>(\d+)( | and )<num>(\d+)(?=[^\w]|$)/i))
0
       if sc[2] =~ /and/ || sc[1].size > sc[3].size
0
- string[(sc.pos - sc.matched_size)..(sc.pos-1)] = (sc[1].to_i + sc[3].to_i).to_s
0
+ string[(sc.pos - sc.matched_size)..(sc.pos-1)] = '<num>' + (sc[1].to_i + sc[3].to_i).to_s
0
         sc.reset
0
       end
0
     end
0
   end
0
 
0
-# def combine_numbers(string)
0
-# sc = StringScanner.new(string)
0
-# while(sc.scan_until(/(\d+)(?: | and |-)(\d+)(?=[^\w]|$)/i))
0
-# string[(sc.pos - sc.matched_size)..(sc.pos-1)] = (sc[1].to_i + sc[2].to_i).to_s
0
-# sc.reset
0
-# end
0
-# end
0
-
0
-end
0
 end
0
\ No newline at end of file
...
23
24
25
26
 
27
28
29
...
45
46
47
 
 
 
 
48
49
...
23
24
25
 
26
27
28
29
...
45
46
47
48
49
50
51
52
53
0
@@ -23,7 +23,7 @@ class ParseNumbersTest < Test::Unit::TestCase
0
                100 => 'a hundred',
0
                100 => 'one hundred',
0
                150 => 'one hundred and fifty',
0
- # 150 => 'one fifty',
0
+ # 150 => 'one fifty',
0
                200 => 'two-hundred',
0
                500 => '5 hundred',
0
                999 => 'nine hundred and ninety nine',
0
@@ -45,4 +45,8 @@ class ParseNumbersTest < Test::Unit::TestCase
0
       assert_equal key, Numerizer.numerize(strings[key]).to_i
0
     end
0
   end
0
+
0
+ def test_edges
0
+ assert_equal "27 Oct 2006 7:30am", Numerizer.numerize("27 Oct 2006 7:30am")
0
+ end
0
 end
0
\ No newline at end of file
...
1
2
3
4
5
...
70
71
72
 
 
 
73
74
75
...
87
88
89
 
 
 
90
91
92
...
140
141
142
143
 
144
145
146
...
163
164
165
166
167
168
169
170
171
172
...
424
425
426
 
 
 
 
 
 
427
428
429
...
563
564
565
 
 
 
566
567
568
...
591
592
593
 
 
 
 
 
594
595
596
...
601
602
603
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
604
605
606
...
1
 
2
3
4
...
69
70
71
72
73
74
75
76
77
...
89
90
91
92
93
94
95
96
97
...
145
146
147
 
148
149
150
151
...
168
169
170
 
 
 
 
171
172
173
...
425
426
427
428
429
430
431
432
433
434
435
436
...
570
571
572
573
574
575
576
577
578
...
601
602
603
604
605
606
607
608
609
610
611
...
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
0
@@ -1,5 +1,4 @@
0
 require 'chronic'
0
-require 'time'
0
 require 'test/unit'
0
 
0
 class TestParsing < Test::Unit::TestCase
0
@@ -70,6 +69,9 @@ class TestParsing < Test::Unit::TestCase
0
     #time = parse_now("January 12, '00")
0
     #assert_equal Time.local(2000, 1, 12, 12), time
0
     
0
+ time = parse_now("may 27, 1979")
0
+ assert_equal Time.local(1979, 5, 27, 12), time
0
+
0
     time = parse_now("may 27 79")
0
     assert_equal Time.local(1979, 5, 27, 12), time
0
     
0
@@ -87,6 +89,9 @@ class TestParsing < Test::Unit::TestCase
0
     time = parse_now("3 jan 2010 4pm")
0
     assert_equal Time.local(2010, 1, 3, 16), time
0
     
0
+ time = parse_now("27 Oct 2006 7:30pm")
0
+ assert_equal Time.local(2006, 10, 27, 19, 30), time
0
+
0
     # sm_sd_sy
0
     
0
     time = parse_now("5/27/1979")
0
@@ -140,7 +145,7 @@ class TestParsing < Test::Unit::TestCase
0
     # rdn_rm_rd_rt_rtz_ry
0
     
0
     time = parse_now("Mon Apr 02 17:00:00 PDT 2007")
0
- assert_equal Time.local(2007, 4, 2, 17), time
0
+ assert_equal 1175558400, time.to_i
0
     
0
     now = Time.now
0
     time = parse_now(now.to_s)
0
@@ -163,10 +168,6 @@ class TestParsing < Test::Unit::TestCase
0
     assert_equal nil, time
0
   end
0
   
0
- def test_foo
0
- Chronic.parse('two months ago this friday')
0
- end
0
-
0
   def test_parse_guess_r
0
     time = parse_now("friday")
0
     assert_equal Time.local(2006, 8, 18, 12), time
0
@@ -424,6 +425,12 @@ class TestParsing < Test::Unit::TestCase
0
     
0
     time = parse_now("tomorrow morning at 5:30")
0
     assert_equal Time.local(2006, 8, 17, 5, 30), time
0
+
0
+ time = parse_now("next monday at 12:01 am")
0
+ assert_equal Time.local(2006, 8, 21, 00, 1), time
0
+
0
+ time = parse_now("next monday at 12:01 pm")
0
+ assert_equal Time.local(2006, 8, 21, 12, 1), time
0
   end
0
   
0
   def test_parse_guess_rgr
0
@@ -563,6 +570,9 @@ class TestParsing < Test::Unit::TestCase
0
   def test_parse_guess_nonsense
0
     time = parse_now("some stupid nonsense")
0
     assert_equal nil, time
0
+
0
+ time = parse_now("Ham Sandwich")
0
+ assert_equal nil, time
0
   end
0
   
0
   def test_parse_span
0
@@ -591,6 +601,11 @@ class TestParsing < Test::Unit::TestCase
0
     assert_equal parse_now("meeting today at 2pm"), @time_2006_08_16_14_00_00
0
   end
0
   
0
+ def test_am_pm
0
+ assert_equal Time.local(2006, 8, 16), parse_now("8/16/2006 at 12am")
0
+ assert_equal Time.local(2006, 8, 16, 12), parse_now("8/16/2006 at 12pm")
0
+ end
0
+
0
   def test_argument_validation
0
     assert_raise(Chronic::InvalidArgumentException) do
0
       time = Chronic.parse("may 27", :foo => :bar)
0
@@ -601,6 +616,31 @@ class TestParsing < Test::Unit::TestCase
0
     end
0
   end
0
   
0
+ # regression
0
+
0
+ # def test_partial
0
+ # assert_equal '', parse_now("2 hours")
0
+ # end
0
+
0
+ def test_days_in_november
0
+ t1 = Chronic.parse('1st thursday in november', :now => Time.local(2007))
0
+ assert_equal Time.local(2007, 11, 1, 12), t1
0
+
0
+ t1 = Chronic.parse('1st friday in november', :now => Time.local(2007))
0
+ assert_equal Time.local(2007, 11, 2, 12), t1
0
+
0
+ t1 = Chronic.parse('1st saturday in november', :now => Time.local(2007))
0
+ assert_equal Time.local(2007, 11, 3, 12), t1
0
+
0
+ t1 = Chronic.parse('1st sunday in november', :now => Time.local(2007))
0
+ assert_equal Time.local(2007, 11, 4, 11), t1
0
+
0
+ # Chronic.debug = true
0
+ #
0
+ # t1 = Chronic.parse('1st monday in november', :now => Time.local(2007))
0
+ # assert_equal Time.local(2007, 11, 5, 11), t1
0
+ end
0
+
0
   private
0
   def parse_now(string, options={})
0
     Chronic.parse(string, {:now => TIME_2006_08_16_14_00_00 }.merge(options))

Comments

    No one has commented yet.