dougal / acts_as_indexed

Acts As Indexed is a plugin which provides a pain-free way to add fulltext search to your Ruby on Rails app

acts_as_indexed / lib / acts_as_indexed.rb
100644 224 lines (184 sloc) 8.6 kb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
# ActsAsIndexed
# Copyright (c) 2007 Douglas F Shearer.
# http://douglasfshearer.com
# Distributed under the MIT license as included with this plugin.
 
require 'active_record'
 
require File.dirname(__FILE__) + '/search_index'
require File.dirname(__FILE__) + '/search_atom'
 
module Foo #:nodoc:
  module Acts #:nodoc:
    module Indexed #:nodoc:
 
      def self.included(mod)
        mod.extend(ClassMethods)
      end
 
      module ClassMethods
 
        # Declares a class as searchable.
        #
        # ====options:
        # fields:: Names of fields to include in the index. Symbols pointing to
        # instance methods of your model may also be given here.
        # index_file_depth:: Tuning value for the index partitioning. Larger
        # values result in quicker searches, but slower
        # indexing. Default is 3.
        # min_word_size:: Sets the minimum length for a word in a query. Words
        # shorter than this value are ignored in searches
        # unless preceded by the '+' operator. Default is 3.
        # index_file:: Sets the location for the index. By default this is
        # RAILS_ROOT/index. Specify as an array. Heroku, for
        # example would use RAILS_ROOT/tmp/index, which would be
        # set as [RAILS_ROOT,'tmp','index]
 
        def acts_as_indexed(options = {})
          class_eval do
            extend Foo::Acts::Indexed::SingletonMethods
          end
          include Foo::Acts::Indexed::InstanceMethods
 
          after_create :add_to_index
          before_update :update_index
          after_destroy :remove_from_index
 
          cattr_accessor :aai_config
 
          # default config
          self.aai_config = {
            :index_file => [RAILS_ROOT,'index'],
            :index_file_depth => 3,
            :min_word_size => 3,
            :fields => []
          }
 
          aai_config[:index_file] += [RAILS_ENV,name]
 
          aai_config.merge!(options)
 
          raise(ArgumentError, 'no fields specified') unless aai_config.include?(:fields)
          raise(ArgumentError, 'index_file_depth cannot be less than one (1)') if aai_config[:index_file_depth].to_i < 1
        end
 
        # Adds the passed +record+ to the index. Index is built if it does not already exist. Clears the query cache.
 
        def index_add(record)
          build_index if !File.exists?(File.join(aai_config[:index_file]))
          index = SearchIndex.new(aai_config[:index_file], aai_config[:index_file_depth], aai_config[:fields], aai_config[:min_word_size])
          index.add_record(record)
          index.save
          @query_cache = {}
          true
        end
 
        # Removes the passed +record+ from the index. Clears the query cache.
 
        def index_remove(record)
          index = SearchIndex.new(aai_config[:index_file], aai_config[:index_file_depth], aai_config[:fields], aai_config[:min_word_size])
          # record won't be in index if it doesn't exist. Just return true.
          return true if !index.exists?
          index.remove_record(record)
          index.save
          @query_cache = {}
          true
        end
        
        # Updates the index.
        # 1. Removes the previous version of the record from the index
        # 2. Adds the new version to the index.
        
        def index_update(record)
          build_index if !File.exists?(File.join(aai_config[:index_file]))
          index = SearchIndex.new(aai_config[:index_file], aai_config[:index_file_depth], aai_config[:fields], aai_config[:min_word_size])
          #index.remove_record(find(record.id))
          #index.add_record(record)
          index.update_record(record,find(record.id))
          index.save
          @query_cache = {}
          true
        end
 
        # Finds instances matching the terms passed in +query+. Terms are ANDed by
        # default. Returns an array of model instances or, if +ids_only+ is
        # true, an array of integer IDs.
        #
        # Keeps a cache of matched IDs for the current session to speed up
        # multiple identical searches.
        #
        # ====find_options
        # Same as ActiveRecord#find options hash. An :order key will override
        # the relevance ranking
        #
        # ====options
        # ids_only:: Method returns an array of integer IDs when set to true.
        # no_query_cache:: Turns off the query cache when set to true. Useful for testing.
 
        def search_index(query, find_options={}, options={})
          # Clear the query cache off if the key is set.
          @query_cache = {} if (options.has_key?('no_query_cache') || options[:no_query_cache])
          if !@query_cache || !@query_cache[query]
            logger.debug('Query not in cache, running search.')
            build_index if !File.exists?(File.join(aai_config[:index_file]))
            index = SearchIndex.new(aai_config[:index_file], aai_config[:index_file_depth], aai_config[:fields], aai_config[:min_word_size])
            @query_cache = {} if !@query_cache
            @query_cache[query] = index.search(query)
          else
            logger.debug('Query held in cache.')
          end
          return @query_cache[query].sort.reverse.map(&:first) if (options.has_key?(:ids_only) && options[:ids_only]) || @query_cache[query].empty?
          
          # slice up the results by offset and limit
          offset = find_options[:offset] || 0
          limit = find_options.include?(:limit) ? find_options[:limit] : @query_cache[query].size
          part_query = @query_cache[query].sort.reverse.slice(offset,limit).map(&:first)
          
          # Set these to nil as we are dealing with the pagination by setting
          # exactly what records we want.
          find_options[:offset] = nil
          find_options[:limit] = nil
          
          with_scope :find => find_options do
            # Doing the find like this eliminates the possibility of errors occuring
            # on either missing records (out-of-sync) or an empty results array.
            records = find(:all, :conditions => [ "#{class_name.tableize}.id IN (?)", part_query])
            
            if find_options.include?(:order)
             records # Just return the records without ranking them.
           else
             # Results come back in random order from SQL, so order again.
             ranked_records = {}
             records.each do |r|
               ranked_records[r] = @query_cache[query][r.id]
             end
          
             ranked_records.to_a.sort_by{|a| a.last }.reverse.map(&:first)
           end
          end
          
        end
 
        private
 
        # Builds an index from scratch for the current model class.
        def build_index
          increment = 500
          offset = 0
          while (records = find(:all, :limit => increment, :offset => offset)).size > 0
            #p "offset is #{offset}, increment is #{increment}"
            index = SearchIndex.new(aai_config[:index_file], aai_config[:index_file_depth], aai_config[:fields], aai_config[:min_word_size])
            offset += increment
            index.add_records(records)
            index.save
          end
        end
 
      end
 
      # Adds model class singleton methods.
      module SingletonMethods
 
        # Finds instances matching the terms passed in +query+.
        #
        # See Foo::Acts::Indexed::ClassMethods#search_index.
        def find_with_index(query='', find_options = {}, options = {})
          search_index(query, find_options, options)
        end
 
      end
 
      # Adds model class instance methods.
      # Methods are called automatically by ActiveRecord on +save+, +destroy+,
      # and +update+ of model instances.
      module InstanceMethods
 
        # Adds the current model instance to index.
        # Called by ActiveRecord on +save+.
        def add_to_index
          self.class.index_add(self)
        end
 
        # Removes the current model instance to index.
        # Called by ActiveRecord on +destroy+.
        def remove_from_index
          self.class.index_remove(self)
        end
 
        # Updates current model instance index.
        # Called by ActiveRecord on +update+.
        def update_index
          self.class.index_update(self)
        end
      end
 
    end
  end
end
 
# reopen ActiveRecord and include all the above to make
# them available to all our models if they want it
 
ActiveRecord::Base.class_eval do
  include Foo::Acts::Indexed
end