public
Rubygem
Description: Client API for Sphinx
Homepage: http://riddle.freelancing-gods.com
Clone URL: git://github.com/freelancing-god/riddle.git
Changes for 0.9.8 release candidate 1

git-svn-id: 
http://rails-oceania.googlecode.com/svn/patallan/riddle/trunk@567 
44c325e4-a42c-0410-87d1-89eb90bb738d
Wed Mar 12 23:56:55 -0700 2008
commit  242a432d99bde389f288ea2e39cdca0b5636d385
tree    5e21cf39f2b124e894952a4ebfa93c1d9b807580
parent  0149837b1e76bf9d7d784a08cfc9d1c43a51f64a
0
...
5
6
7
8
 
 
 
 
 
9
10
11
...
14
15
16
17
 
 
 
 
 
 
 
 
18
19
20
...
57
58
59
60
61
 
 
 
62
...
5
6
7
 
8
9
10
11
12
13
14
15
...
18
19
20
 
21
22
23
24
25
26
27
28
29
30
31
...
68
69
70
 
71
72
73
74
75
0
@@ -5,7 +5,11 @@ client and the original PHP client - credit where credit's due, after all.
0
 It does not follow the same syntax as those two, though (not much point writing this otherwise) -
0
 opting for a more Ruby-like structure.
0
 
0
-While it doesn't (yet) exist as a gem, you can obtain the sourcecode via subversion. If you
0
+The easiest way to install is to grab the gem (available since 0.9.8r1112 only):
0
+
0
+ sudo gem install riddle
0
+
0
+However, if you're so inclined, you can grab sourcecode via subversion. If you
0
 are after a specific release, use the tag as follows:
0
 
0
   svn co http://rails-oceania.googlecode.com/svn/patallan/riddle/tags/0.9.8-r1112 riddle
0
@@ -14,7 +18,14 @@ Or for the most current, just use trunk:
0
 
0
   svn co http://rails-oceania.googlecode.com/svn/patallan/riddle/trunk riddle
0
 
0
-Please note that at the time of writing, only 0.9.8r871 through to 0.9.8r1112 are supported.
0
+Please note that at the time of writing, the following versions are supported (if you get the appropriate tag):
0
+
0
+* 0.9.8r871
0
+* 0.9.8r909
0
+* 0.9.8r985
0
+* 0.9.8r1065
0
+* 0.9.8r1112
0
+* 0.9.8 rc1 (gem version is 0.9.8.1198)
0
 
0
 To get started, just instantiate a Client object:
0
 
0
@@ -57,4 +68,6 @@ total number of matches (which may be greater than the maximum available), and t
0
 that the query took to run.
0
 
0
 <tt>:status</tt> is the error code for the query - and if there was a related warning, it will be under
0
-the <tt>:warning</tt> key. Fatal errors will be described under <tt>:error</tt>.
0
\ No newline at end of file
0
+the <tt>:warning</tt> key. Fatal errors will be described under <tt>:error</tt>.
0
+
0
+If you've installed the gem and wondering why there's no tests - check out the svn version. I've kept the specs out of the gem as I have a decent amount of test data in there, which really isn't needed unless you want to submit patches.
0
\ No newline at end of file
...
1
 
2
3
4
...
1
2
3
4
5
0
@@ -1,4 +1,5 @@
0
 require 'rake'
0
+require 'rake/packagetask'
0
 require 'rake/rdoctask'
0
 require 'spec/rake/spectask'
0
 require 'rdoc/rdoc'
...
12
13
14
15
 
 
 
16
17
 
 
18
19
20
...
12
13
14
 
15
16
17
18
 
19
20
21
22
23
0
@@ -12,8 +12,11 @@ module Riddle #:nodoc:
0
     Major = 0
0
     Minor = 9
0
     Tiny = 8
0
- Rev = 1112
0
+ # Revision number for RubyForge's sake, taken from what Sphinx
0
+ # outputs to the command line.
0
+ Rev = 1198
0
     
0
- String = [Major, Minor, Tiny].join('.') + "r#{Rev}"
0
+ String = [Major, Minor, Tiny].join('.') + "rc1"
0
+ GemVersion = [Major, Minor, Tiny, Rev].join('.')
0
   end
0
 end
0
\ No newline at end of file
...
26
27
28
29
30
31
 
 
 
 
32
33
34
35
36
37
 
 
 
 
38
39
40
...
158
159
160
161
162
 
 
163
164
165
...
275
276
277
278
279
 
 
280
281
282
...
358
359
360
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
362
363
...
439
440
441
442
 
443
444
445
...
500
501
502
 
 
503
504
505
...
544
545
546
 
 
 
 
 
 
 
 
 
 
 
547
548
...
26
27
28
 
 
 
29
30
31
32
33
34
35
 
 
 
36
37
38
39
40
41
42
...
160
161
162
 
 
163
164
165
166
167
...
277
278
279
 
 
280
281
282
283
284
...
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
...
465
466
467
 
468
469
470
471
...
526
527
528
529
530
531
532
533
...
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
0
@@ -26,15 +26,17 @@ module Riddle
0
   #
0
   class Client
0
     Commands = {
0
- :search => 0, # SEARCHD_COMMAND_SEARCH
0
- :excerpt => 1, # SEARCHD_COMMAND_EXCERPT
0
- :update => 2 # SEARCHD_COMMAND_UPDATE
0
+ :search => 0, # SEARCHD_COMMAND_SEARCH
0
+ :excerpt => 1, # SEARCHD_COMMAND_EXCERPT
0
+ :update => 2, # SEARCHD_COMMAND_UPDATE
0
+ :keywords => 3 # SEARCHD_COMMAND_KEYWORDS
0
     }
0
     
0
     Versions = {
0
- :search => 0x112, # VER_COMMAND_SEARCH
0
- :excerpt => 0x100, # VER_COMMAND_EXCERPT
0
- :update => 0x101 # VER_COMMAND_UPDATE
0
+ :search => 0x113, # VER_COMMAND_SEARCH
0
+ :excerpt => 0x100, # VER_COMMAND_EXCERPT
0
+ :update => 0x101, # VER_COMMAND_UPDATE
0
+ :keywords => 0x100 # VER_COMMAND_KEYWORDS
0
     }
0
     
0
     Statuses = {
0
@@ -158,8 +160,8 @@ module Riddle
0
     
0
     # Append a query to the queue. This uses the same parameters as the query
0
     # method.
0
- def append_query(search, index = '*')
0
- @queue << query_message(search, index)
0
+ def append_query(search, index = '*', comments = '')
0
+ @queue << query_message(search, index, comments)
0
     end
0
     
0
     # Run all the queries currently in the queue. This will return an array of
0
@@ -275,8 +277,8 @@ module Riddle
0
     # related warning, it will be under the <tt>:warning</tt> key. Fatal errors
0
     # will be described under <tt>:error</tt>.
0
     #
0
- def query(search, index = '*')
0
- @queue << query_message(search, index)
0
+ def query(search, index = '*', comments = '')
0
+ @queue << query_message(search, index, comments)
0
       self.run.first
0
     end
0
     
0
@@ -358,6 +360,30 @@ module Riddle
0
       response.next_int
0
     end
0
     
0
+ # Generates a keyword list for a given query. Each keyword is represented
0
+ # by a hash, with keys :tokenised and :normalised. If return_hits is set to
0
+ # true it will also report on the number of hits and documents for each
0
+ # keyword (see :hits and :docs keys respectively).
0
+ def keywords(query, index, return_hits = false)
0
+ response = Response.new request(
0
+ :keywords,
0
+ keywords_message(query, index, return_hits)
0
+ )
0
+
0
+ (0...response.next_int).collect do
0
+ hash = {}
0
+ hash[:tokenised] = response.next
0
+ hash[:normalised] = response.next
0
+
0
+ if return_hits
0
+ hash[:docs] = response.next_int
0
+ hash[:hits] = response.next_int
0
+ end
0
+
0
+ hash
0
+ end
0
+ end
0
+
0
     private
0
     
0
     # Connects to the Sphinx daemon, and yields a socket to use. The socket is
0
@@ -439,7 +465,7 @@ module Riddle
0
     end
0
     
0
     # Generation of the message to send to Sphinx for a search.
0
- def query_message(search, index)
0
+ def query_message(search, index, comments = '')
0
       message = Message.new
0
       
0
       # Mode, Limits, Sort Mode
0
@@ -500,6 +526,8 @@ module Riddle
0
         message.append_int val
0
       end
0
       
0
+ message.append_string comments
0
+
0
       message.to_s
0
     end
0
     
0
@@ -544,5 +572,16 @@ module Riddle
0
       
0
       message.to_s
0
     end
0
+
0
+ # Generates the simple message to send to the daemon for a keywords request.
0
+ def keywords_message(query, index, return_hits)
0
+ message = Message.new
0
+
0
+ message.append_string query
0
+ message.append_string index
0
+ message.append_int return_hits ? 1 : 0
0
+
0
+ message.to_s
0
+ end
0
   end
0
 end
...
122
123
124
 
 
 
 
 
125
126
127
128
129
 
 
 
 
 
 
 
 
 
 
130
131
...
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
0
@@ -122,9 +122,24 @@ fclose($file);
0
 
0
 $client->SetFieldWeights(array());
0
 
0
+// comment
0
+$file = fopen("spec/fixtures/data/comment.bin", "w");
0
+fwrite($file, $client->_reqs[$client->AddQuery("test ", "*", "commenting")]);
0
+fclose($file);
0
+
0
 // update_simple
0
 $file = fopen("spec/fixtures/data/update_simple.bin", "w");
0
 fwrite($file, $client->UpdateAttributes("people", array("birthday"), array(1 => array(191163600))));
0
 fclose($file);
0
 
0
+// keywords_without_hits
0
+$file = fopen("spec/fixtures/data/keywords_without_hits.bin", "w");
0
+fwrite($file, $client->BuildKeywords("pat", "people", false));
0
+fclose($file);
0
+
0
+// keywords_with_hits
0
+$file = fopen("spec/fixtures/data/keywords_with_hits.bin", "w");
0
+fwrite($file, $client->BuildKeywords("pat", "people", true));
0
+fclose($file);
0
+
0
 ?>
0
\ No newline at end of file
...
21
22
23
24
25
26
27
 
 
 
 
28
29
30
...
21
22
23
 
 
 
 
24
25
26
27
28
29
30
0
@@ -21,10 +21,10 @@ source peoples
0
   sql_pass = <%= @password %>
0
   sql_db = riddle_sphinx_spec
0
 
0
- sql_query = SELECT id, first_name, middle_initial, last_name, gender, street_address, city, state, postcode, email, UNIX_TIMESTAMP(birthday) AS birthday FROM people WHERE id >= $start AND id <= $end
0
- sql_query_range = SELECT MIN(id), MAX(id) FROM people
0
- sql_query_info = SELECT * FROM people WHERE id = $id
0
- sql_date_column = birthday
0
+ sql_query = SELECT id, first_name, middle_initial, last_name, gender, street_address, city, state, postcode, email, UNIX_TIMESTAMP(birthday) AS birthday FROM people WHERE id >= $start AND id <= $end
0
+ sql_query_range = SELECT MIN(id), MAX(id) FROM people
0
+ sql_query_info = SELECT * FROM people WHERE id = $id
0
+ sql_attr_timestamp = birthday
0
 }
0
 
0
 index people
...
1
2
3
4
 
5
6
7
...
21
22
23
 
24
25
26
 
27
28
 
29
30
31
...
591
592
593
594
 
595
596
597
598
 
599
600
601
...
619
620
621
622
 
623
624
625
...
696
697
698
 
 
 
699
700
701
...
817
818
819
820
 
 
 
 
 
 
 
 
 
 
821
822
823
...
989
990
991
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
992
993
994
...
1060
1061
1062
1063
 
1064
1065
1066
1067
...
1
2
3
 
4
5
6
7
...
21
22
23
24
25
26
 
27
28
29
30
31
32
33
...
593
594
595
 
596
597
598
599
 
600
601
602
603
...
621
622
623
 
624
625
626
627
...
698
699
700
701
702
703
704
705
706
...
822
823
824
 
825
826
827
828
829
830
831
832
833
834
835
836
837
...
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
...
1171
1172
1173
 
1174
1175
1176
1177
1178
0
@@ -1,7 +1,7 @@
0
 <?php
0
 
0
 //
0
-// $Id: sphinxapi.php 1103 2008-01-24 18:42:57Z shodan $
0
+// $Id: sphinxapi.php 1163 2008-02-19 21:00:40Z glook $
0
 //
0
 
0
 //
0
@@ -21,11 +21,13 @@
0
 define ( "SEARCHD_COMMAND_SEARCH",  0 );
0
 define ( "SEARCHD_COMMAND_EXCERPT",  1 );
0
 define ( "SEARCHD_COMMAND_UPDATE",  2 );
0
+define ( "SEARCHD_COMMAND_KEYWORDS",3 );
0
 
0
 /// current client-side command implementation versions
0
-define ( "VER_COMMAND_SEARCH", 0x112 );
0
+define ( "VER_COMMAND_SEARCH", 0x113 );
0
 define ( "VER_COMMAND_EXCERPT",    0x100 );
0
 define ( "VER_COMMAND_UPDATE",    0x101 );
0
+define ( "VER_COMMAND_KEYWORDS", 0x100 );
0
 
0
 /// known searchd status codes
0
 define ( "SEARCHD_OK",        0 );
0
@@ -591,11 +593,11 @@ class SphinxClient
0
 
0
   /// connect to searchd server, run given search query through given indexes,
0
   /// and return the search results
0
- function Query ( $query, $index="*" )
0
+ function Query ( $query, $index="*", $comment="" )
0
   {
0
     assert ( empty($this->_reqs) );
0
 
0
- $this->AddQuery ( $query, $index );
0
+ $this->AddQuery ( $query, $index, $comment );
0
     $results = $this->RunQueries ();
0
 
0
     if ( !is_array($results) )
0
@@ -619,7 +621,7 @@ class SphinxClient
0
 
0
   /// add query to multi-query batch
0
   /// returns index into results array from RunQueries() call
0
- function AddQuery ( $query, $index="*" )
0
+ function AddQuery ( $query, $index="*", $comment="" )
0
   {
0
     // mbstring workaround
0
     $this->_MBPush ();
0
@@ -696,6 +698,9 @@ class SphinxClient
0
     foreach ( $this->_fieldweights as $field=>$weight )
0
       $req .= pack ( "N", strlen($field) ) . $field . pack ( "N", $weight );
0
 
0
+ // comment
0
+ $req .= pack ( "N", strlen($comment) ) . $comment;
0
+
0
     // mbstring workaround
0
     $this->_MBPop ();
0
 
0
@@ -817,7 +822,16 @@ class SphinxClient
0
           list ( $doc, $weight ) = array_values ( unpack ( "N*N*",
0
             substr ( $response, $p, 8 ) ) );
0
           $p += 8;
0
- $doc = sprintf ( "%u", $doc ); // workaround for php signed/unsigned braindamage
0
+
0
+ if ( PHP_INT_SIZE>=8 )
0
+ {
0
+ // x64 route, workaround broken unpack() in 5.2.2+
0
+ if ( $doc<0 ) $doc += (1<<32);
0
+ } else
0
+ {
0
+ // x32 route, workaround php signed/unsigned braindamage
0
+ $doc = sprintf ( "%u", $doc );
0
+ }
0
         }
0
         $weight = sprintf ( "%u", $weight );
0
 
0
@@ -989,6 +1003,103 @@ class SphinxClient
0
     return $res;
0
   }
0
 
0
+
0
+ /////////////////////////////////////////////////////////////////////////////
0
+ // keyword generation
0
+ /////////////////////////////////////////////////////////////////////////////
0
+
0
+ /// connect to searchd server, and generate keyword list for a given query
0
+ /// returns false on failure,
0
+ /// an array of words on success
0
+ function BuildKeywords ( $query, $index, $hits )
0
+ {
0
+ assert ( is_string($query) );
0
+ assert ( is_string($index) );
0
+ assert ( is_bool($hits) );
0
+
0
+ // Commented out for testing Riddle
0
+ // $this->_MBPush ();
0
+ //
0
+ // if (!( $fp = $this->_Connect() ))
0
+ // {
0
+ // $this->_MBPop();
0
+ // return false;
0
+ // }
0
+
0
+ /////////////////
0
+ // build request
0
+ /////////////////
0
+
0
+ // v.1.0 req
0
+ $req = pack ( "N", strlen($query) ) . $query; // req query
0
+ $req .= pack ( "N", strlen($index) ) . $index; // req index
0
+ $req .= pack ( "N", (int)$hits );
0
+
0
+ // Line for testing Riddle:
0
+ return $req;
0
+
0
+ ////////////////////////////
0
+ // send query, get response
0
+ ////////////////////////////
0
+
0
+ $len = strlen($req);
0
+ $req = pack ( "nnN", SEARCHD_COMMAND_KEYWORDS, VER_COMMAND_KEYWORDS, $len ) . $req; // add header
0
+ $wrote = fwrite ( $fp, $req, $len+8 );
0
+ if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_KEYWORDS ) ))
0
+ {
0
+ $this->_MBPop ();
0
+ return false;
0
+ }
0
+
0
+ //////////////////
0
+ // parse response
0
+ //////////////////
0
+
0
+ $pos = 0;
0
+ $res = array ();
0
+ $rlen = strlen($response);
0
+ list(,$nwords) = unpack ( "N*", substr ( $response, $pos, 4 ) );
0
+ $pos += 4;
0
+ for ( $i=0; $i<$nwords; $i++ )
0
+ {
0
+ list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos += 4;
0
+ $tokenized = $len ? substr ( $response, $pos, $len ) : "";
0
+ $pos += $len;
0
+
0
+ list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos += 4;
0
+ $normalized = $len ? substr ( $response, $pos, $len ) : "";
0
+ $pos += $len;
0
+
0
+ $res[] = array ( "tokenized"=>$tokenized, "normalized"=>$normalized );
0
+
0
+ if ( $hits )
0
+ {
0
+ list($ndocs,$nhits) = array_values ( unpack ( "N*N*", substr ( $response, $pos, 8 ) ) );
0
+ $pos += 8;
0
+ $res [$i]["docs"] = $ndocs;
0
+ $res [$i]["hits"] = $nhits;
0
+ }
0
+
0
+ if ( $pos > $rlen )
0
+ {
0
+ $this->_error = "incomplete reply";
0
+ $this->_MBPop ();
0
+ return false;
0
+ }
0
+ }
0
+
0
+ $this->_MBPop ();
0
+ return $res;
0
+ }
0
+
0
+ function EscapeString ( $string )
0
+ {
0
+ $from = array ( '(',')','|','-','!','@','~','\"','&' );
0
+ $to = array ( '\\(','\\)','\\|','\\-','\\!','\\@','\\~','\\\"', '\\&' );
0
+
0
+ return str_replace ( $from, $to, $string );
0
+ }
0
+
0
   /////////////////////////////////////////////////////////////////////////////
0
   // attribute updates
0
   /////////////////////////////////////////////////////////////////////////////
0
@@ -1060,7 +1171,7 @@ class SphinxClient
0
 }
0
 
0
 //
0
-// $Id: sphinxapi.php 1103 2008-01-24 18:42:57Z shodan $
0
+// $Id: sphinxapi.php 1163 2008-02-19 21:00:40Z glook $
0
 //
0
 
0
 ?>
0
\ No newline at end of file
...
1
 
2
3
4
...
1
2
3
4
5
0
@@ -1,4 +1,5 @@
0
 require 'riddle'
0
+require 'spec'
0
 require 'spec/sphinx_helper'
0
 
0
 Spec::Runner.configure do |config|
...
123
124
125
 
 
 
 
 
 
126
127
128
...
151
152
153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
155
...
123
124
125
126
127
128
129
130
131
132
133
134
...
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
0
@@ -123,6 +123,12 @@ describe Riddle::Client do
0
     client.queue.first.should == query_contents(:field_weights)
0
   end
0
   
0
+ it "should build a message with acomment correctly" do
0
+ client = Riddle::Client.new
0
+ client.append_query "test ", "*", "commenting"
0
+ client.queue.first.should == query_contents(:comment)
0
+ end
0
+
0
   it "should keep multiple messages in the queue" do
0
     client = Riddle::Client.new
0
     client.weights = [100, 1]
0
@@ -151,4 +157,24 @@ describe Riddle::Client do
0
       {1 => [191163600]}
0
     ).should == query_contents(:update_simple)
0
   end
0
+
0
+ it "should build a keywords request without hits correctly" do
0
+ client = Riddle::Client.new
0
+ client.send(
0
+ :keywords_message,
0
+ "pat",
0
+ "people",
0
+ false
0
+ ).should == query_contents(:keywords_without_hits)
0
+ end
0
+
0
+ it "should build a keywords request with hits correctly" do
0
+ client = Riddle::Client.new
0
+ client.send(
0
+ :keywords_message,
0
+ "pat",
0
+ "people",
0
+ true
0
+ ).should == query_contents(:keywords_with_hits)
0
+ end
0
 end
0
\ No newline at end of file

Comments

    No one has commented yet.