# ActsAsIndexed
# Copyright (c) 2007 Douglas F Shearer.
# http://douglasfshearer.com
# Distributed under the MIT license as included with this plugin.
require 'active_record'
require File.dirname(__FILE__) + '/search_index'
require File.dirname(__FILE__) + '/search_atom'
module Foo #:nodoc:
module Acts #:nodoc:
module Indexed #:nodoc:
def self.included(mod)
mod.extend(ClassMethods)
end
module ClassMethods
# Declares a class as searchable.
#
# ====options:
# fields:: Names of fields to include in the index. Symbols pointing to
# instance methods of your model may also be given here.
# index_file_depth:: Tuning value for the index partitioning. Larger
# values result in quicker searches, but slower
# indexing. Default is 3.
# min_word_size:: Sets the minimum length for a word in a query. Words
# shorter than this value are ignored in searches
# unless preceded by the '+' operator. Default is 3.
# index_file:: Sets the location for the index. By default this is
# RAILS_ROOT/index. Specify as an array. Heroku, for
# example would use RAILS_ROOT/tmp/index, which would be
# set as [RAILS_ROOT,'tmp','index]
def acts_as_indexed(options = {})
class_eval do
extend Foo::Acts::Indexed::SingletonMethods
end
include Foo::Acts::Indexed::InstanceMethods
after_create :add_to_index
before_update :update_index
after_destroy :remove_from_index
cattr_accessor :aai_config
# default config
self.aai_config = {
:index_file => [RAILS_ROOT,'index'],
:index_file_depth => 3,
:min_word_size => 3,
:fields => []
}
aai_config[:index_file] += [RAILS_ENV,name]
aai_config.merge!(options)
raise(ArgumentError, 'no fields specified') unless aai_config.include?(:fields)
raise(ArgumentError, 'index_file_depth cannot be less than one (1)') if aai_config[:index_file_depth].to_i < 1
end
# Adds the passed +record+ to the index. Index is built if it does not already exist. Clears the query cache.
def index_add(record)
build_index if !File.exists?(File.join(aai_config[:index_file]))
index = SearchIndex.new(aai_config[:index_file], aai_config[:index_file_depth], aai_config[:fields], aai_config[:min_word_size])
index.add_record(record)
index.save
@query_cache = {}
true
end
# Removes the passed +record+ from the index. Clears the query cache.
def index_remove(record)
index = SearchIndex.new(aai_config[:index_file], aai_config[:index_file_depth], aai_config[:fields], aai_config[:min_word_size])
# record won't be in index if it doesn't exist. Just return true.
return true if !index.exists?
index.remove_record(record)
index.save
@query_cache = {}
true
end
# Updates the index.
# 1. Removes the previous version of the record from the index
# 2. Adds the new version to the index.
def index_update(record)
build_index if !File.exists?(File.join(aai_config[:index_file]))
index = SearchIndex.new(aai_config[:index_file], aai_config[:index_file_depth], aai_config[:fields], aai_config[:min_word_size])
#index.remove_record(find(record.id))
#index.add_record(record)
index.update_record(record,find(record.id))
index.save
@query_cache = {}
true
end
# Finds instances matching the terms passed in +query+. Terms are ANDed by
# default. Returns an array of model instances or, if +ids_only+ is
# true, an array of integer IDs.
#
# Keeps a cache of matched IDs for the current session to speed up
# multiple identical searches.
#
# ====find_options
# Same as ActiveRecord#find options hash. An :order key will override
# the relevance ranking
#
# ====options
# ids_only:: Method returns an array of integer IDs when set to true.
# no_query_cache:: Turns off the query cache when set to true. Useful for testing.
def search_index(query, find_options={}, options={})
# Clear the query cache off if the key is set.
@query_cache = {} if (options.has_key?('no_query_cache') || options[:no_query_cache])
if !@query_cache || !@query_cache[query]
logger.debug('Query not in cache, running search.')
build_index if !File.exists?(File.join(aai_config[:index_file]))
index = SearchIndex.new(aai_config[:index_file], aai_config[:index_file_depth], aai_config[:fields], aai_config[:min_word_size])
@query_cache = {} if !@query_cache
@query_cache[query] = index.search(query)
else
logger.debug('Query held in cache.')
end
return @query_cache[query].sort.reverse.map(&:first) if (options.has_key?(:ids_only) && options[:ids_only]) || @query_cache[query].empty?
# slice up the results by offset and limit
offset = find_options[:offset] || 0
limit = find_options.include?(:limit) ? find_options[:limit] : @query_cache[query].size
part_query = @query_cache[query].sort.reverse.slice(offset,limit).map(&:first)
# Set these to nil as we are dealing with the pagination by setting
# exactly what records we want.
find_options[:offset] = nil
find_options[:limit] = nil
with_scope :find => find_options do
# Doing the find like this eliminates the possibility of errors occuring
# on either missing records (out-of-sync) or an empty results array.
records = find(:all, :conditions => [ "#{class_name.tableize}.id IN (?)", part_query])
if find_options.include?(:order)
records # Just return the records without ranking them.
else
# Results come back in random order from SQL, so order again.
ranked_records = {}
records.each do |r|
ranked_records[r] = @query_cache[query][r.id]
end
ranked_records.to_a.sort_by{|a| a.last }.reverse.map(&:first)
end
end
end
private
# Builds an index from scratch for the current model class.
def build_index
increment = 500
offset = 0
while (records = find(:all, :limit => increment, :offset => offset)).size > 0
#p "offset is #{offset}, increment is #{increment}"
index = SearchIndex.new(aai_config[:index_file], aai_config[:index_file_depth], aai_config[:fields], aai_config[:min_word_size])
offset += increment
index.add_records(records)
index.save
end
end
end
# Adds model class singleton methods.
module SingletonMethods
# Finds instances matching the terms passed in +query+.
#
# See Foo::Acts::Indexed::ClassMethods#search_index.
def find_with_index(query='', find_options = {}, options = {})
search_index(query, find_options, options)
end
end
# Adds model class instance methods.
# Methods are called automatically by ActiveRecord on +save+, +destroy+,
# and +update+ of model instances.
module InstanceMethods
# Adds the current model instance to index.
# Called by ActiveRecord on +save+.
def add_to_index
self.class.index_add(self)
end
# Removes the current model instance to index.
# Called by ActiveRecord on +destroy+.
def remove_from_index
self.class.index_remove(self)
end
# Updates current model instance index.
# Called by ActiveRecord on +update+.
def update_index
self.class.index_update(self)
end
end
end
end
end
# reopen ActiveRecord and include all the above to make
# them available to all our models if they want it
ActiveRecord::Base.class_eval do
include Foo::Acts::Indexed
end