public
Description: Generic support for extracting GMail-style search keywords/values from strings
Homepage: http://codefluency.rubyforge.org/keyword_search
Clone URL: git://github.com/bruce/keyword_search.git
Search Repo:
Switch to lexer

git-svn-id: 
svn+ssh://rubyforge.org/var/svn/codefluency/keyword_search/trunk@20 
a36c4c08-a44a-49c9-8a5d-6074d9d18ea7
wbruce (author)
Sat Mar 24 22:08:41 -0700 2007
commit  a4afe8183fc747c1f1bf0049ac6ad3949eef596f
tree    29ba98a6ba672307e46de4d87c984edc554449d0
parent  31ccbf78eae0c09a287d7811d041caa59ee4c75b
...
23
24
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
...
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
0
@@ -23,4 +23,20 @@ task :rebuild_parser do
0
   end
0
 end
0
 
0
+task :rebuild_lexer do
0
+ require 'dhaka'
0
+ lexer = Dhaka::Lexer.new(KeywordSearch::LexerSpec)
0
+ File.open('lib/keyword_search/lexer.rb', 'w') do |file|
0
+ file << lexer.compile_to_ruby_source_as('KeywordSearch::Lexer')
0
+ end
0
+end
0
+
0
+task :rebuild_lexer do
0
+ require 'dhaka'
0
+ lexer = Dhaka::Lexer.new(KeywordSearch::LexerSpec)
0
+ File.open('lib/keyword_search/lexer.rb', 'w') do |file|
0
+ file << lexer.compile_to_ruby_source_as('KeywordSearch::Lexer')
0
+ end
0
+end
0
+
0
 # vim: syntax=Ruby
...
1
2
3
4
 
5
6
7
...
13
14
15
16
17
 
18
19
20
...
1
2
3
 
4
5
6
7
...
13
14
15
 
 
16
17
18
19
0
@@ -1,7 +1,7 @@
0
 require 'dhaka'
0
 
0
 dirname = File.join(File.dirname(__FILE__), 'keyword_search')
0
-%w|grammar tokenizer parser evaluator definition|.each do |dependency|
0
+%w|grammar parser lexer_spec lexer evaluator definition|.each do |dependency|
0
   require File.join(dirname, dependency)
0
 end
0
 
0
@@ -13,8 +13,7 @@ module KeywordSearch
0
     def search(input_string, definition=nil, &block)
0
       @evaluator ||= Evaluator.new
0
       definition ||= Definition.new(&block)
0
- tokens = Tokenizer.tokenize(input_string)
0
- parse_result = Parser.parse(tokens)
0
+ parse_result = Parser.parse(Lexer.lex(input_string))
0
       unless parse_result.has_error?
0
         results = @evaluator.evaluate(parse_result.parse_tree)
0
         results.each do |key, terms|
...
12
13
14
15
 
16
17
18
...
12
13
14
 
15
16
17
18
0
@@ -12,7 +12,7 @@ module KeywordSearch
0
     end
0
   
0
     for_symbol 'Pair' do
0
- keyword_and_term ['k', 's']
0
+ keyword_and_term ['s', ':', 's']
0
       default_keyword_term ['s']
0
     end
0
     
...
4
5
6
7
8
9
10
11
12
 
 
 
13
14
15
16
 
 
17
18
19
20
21
22
23
 
 
24
25
26
27
 
28
29
30
31
32
33
34
 
 
 
35
36
37
38
 
 
 
 
 
 
 
39
40
41
42
...
4
5
6
 
 
 
 
7
 
8
9
10
11
12
 
 
13
14
15
16
 
 
 
 
 
17
18
19
20
21
 
22
23
24
25
 
 
 
 
26
27
28
29
30
 
 
31
32
33
34
35
36
37
38
39
40
41
0
@@ -4,38 +4,37 @@ class KeywordSearch::Parser < Dhaka::CompiledParser
0
 
0
   start_with 0
0
 
0
- at_state(3) {
0
- for_symbols("k", "_End_", "s") { reduce_with "one_pair" }
0
- }
0
-
0
   at_state(1) {
0
- for_symbols("s") { shift_to 2 }
0
+ for_symbols("_End_") { reduce_with "start" }
0
+ for_symbols("s") { shift_to 3 }
0
+ for_symbols("Pair") { shift_to 2 }
0
   }
0
 
0
- at_state(6) {
0
- for_symbols("k", "_End_", "s") { reduce_with "multiple_pairs" }
0
+ at_state(5) {
0
+ for_symbols("_End_", "s") { reduce_with "keyword_and_term" }
0
   }
0
 
0
- at_state(5) {
0
- for_symbols("s") { shift_to 4 }
0
- for_symbols("_End_") { reduce_with "start" }
0
- for_symbols("k") { shift_to 1 }
0
- for_symbols("Pair") { shift_to 6 }
0
+ at_state(4) {
0
+ for_symbols("s") { shift_to 5 }
0
   }
0
 
0
   at_state(2) {
0
- for_symbols("k", "_End_", "s") { reduce_with "keyword_and_term" }
0
+ for_symbols("_End_", "s") { reduce_with "multiple_pairs" }
0
   }
0
 
0
   at_state(0) {
0
- for_symbols("s") { shift_to 4 }
0
- for_symbols("k") { shift_to 1 }
0
- for_symbols("Pair") { shift_to 3 }
0
- for_symbols("Pairs") { shift_to 5 }
0
+ for_symbols("Pair") { shift_to 6 }
0
+ for_symbols("s") { shift_to 3 }
0
+ for_symbols("Pairs") { shift_to 1 }
0
   }
0
 
0
- at_state(4) {
0
- for_symbols("k", "_End_", "s") { reduce_with "default_keyword_term" }
0
+ at_state(6) {
0
+ for_symbols("_End_", "s") { reduce_with "one_pair" }
0
+ }
0
+
0
+ at_state(3) {
0
+ for_symbols(":") { shift_to 4 }
0
+ for_symbols("_End_", "s") { reduce_with "default_keyword_term" }
0
   }
0
 
0
 end
0
\ No newline at end of file
...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
@@ -1,105 +0,0 @@
0
-module KeywordSearch
0
-
0
- class Tokenizer < Dhaka::Tokenizer
0
-
0
- def accumulator
0
- @accumulator ||= ''
0
- end
0
-
0
- def accumulate(string)
0
- accumulator << string
0
- end
0
-
0
- def clear_accumulator
0
- @accumulator = ''
0
- end
0
-
0
-
0
- # TODO: Add further character support; this is just for initial release
0
- letters = ('a'..'z').to_a + ('A'..'Z').to_a
0
- numbers = ('0'..'9').to_a
0
- extras = %w|_ - ' / \ [ ] { } 1 @ # $ % ^ & * ( ) . , ? < > |
0
- printables = letters + numbers + extras
0
- whitespace = [' ']
0
- quotes = %w|' "|
0
- keyword_separator = [':']
0
- all_characters = keyword_separator + printables + whitespace + quotes
0
-
0
- for_state :idle_state do
0
-
0
- for_characters(printables) do
0
- clear_accumulator
0
- switch_to :unquoted_literal_state
0
- end
0
-
0
- for_characters(quotes) do
0
- advance unless accumulator.empty?
0
- clear_accumulator
0
- case curr_char
0
- when %<">
0
- advance
0
- switch_to :double_quoted_literal_state
0
- when %<'>
0
- advance
0
- switch_to :single_quoted_literal_state
0
- end
0
- end
0
-
0
- for_characters whitespace do
0
- advance
0
- end
0
-
0
- end
0
-
0
- for_state :unquoted_literal_state do
0
-
0
- for_characters(printables) do
0
- accumulate curr_char
0
- advance
0
- create_token('s', accumulator) unless curr_char
0
- end
0
-
0
- for_characters(keyword_separator) do
0
- create_token 'k', accumulator
0
- clear_accumulator
0
- advance
0
- switch_to :idle_state
0
- end
0
-
0
- for_characters(whitespace) do
0
- create_token 's', accumulator
0
- clear_accumulator
0
- switch_to :idle_state
0
- end
0
-
0
- end
0
-
0
- for_state :double_quoted_literal_state do
0
- for_characters(all_characters - %w<">) do
0
- accumulate curr_char
0
- advance
0
- end
0
- for_characters %w<"> do
0
- create_token 's', accumulator
0
- clear_accumulator
0
- advance
0
- switch_to :idle_state
0
- end
0
- end
0
-
0
- for_state :single_quoted_literal_state do
0
- for_characters(all_characters - %w<'>) do
0
- accumulate curr_char
0
- advance
0
- end
0
- for_characters %w<'> do
0
- create_token 's', accumulator
0
- clear_accumulator
0
- advance
0
- switch_to :idle_state
0
- end
0
- end
0
-
0
- end
0
-
0
-end

Comments

    No one has commented yet.