From 64028541c889bd44b906a26d2484167eff376683 Mon Sep 17 00:00:00 2001 From: Mike Boone Date: Thu, 15 May 2008 22:42:15 -0400 Subject: [PATCH 1/6] Minor fixes to README.txt --- README.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.txt b/README.txt index 84fa14f..70ef086 100644 --- a/README.txt +++ b/README.txt @@ -96,11 +96,12 @@ e.g. :texts => [ :title, :body ] * :values, things which have a range of values for sorting, or for collapsing. Specify an array quadruple of [ field, identifier, prefix, type ] where -** number is an arbitary numeric identifier for use in the Xapian database +** identifier is an arbitary numeric identifier for use in the Xapian database ** prefix is the part to use in search queries that goes before the : ** type can be any of :string, :number or :date -e.g. :values => [ [ :created_at, 0, "created_at" ], [ :size, 1, "size"] ] +e.g. :values => [ [ :created_at, 0, "created_at", :date ], +[ :size, 1, "size", :string ] ] * :terms, things which come after a : in search queries. Specify an array triple of [ field, char, prefix ] where @@ -139,7 +140,7 @@ object isn't indexed end end -3. Call 'rake xapian::rebuild_index models="ModelName1 ModelName2"' to build the index +3. Call 'rake xapian:rebuild_index models="ModelName1 ModelName2"' to build the index the first time (you must specify all your indexed models). It's put in a development/test/production dir in acts_as_xapian/xapiandbs. From 1ac2a13f3218c6c872686976b0ec851159782ec2 Mon Sep 17 00:00:00 2001 From: Mike Boone Date: Thu, 15 May 2008 23:05:16 -0400 Subject: [PATCH 2/6] Fixed migration rollback text. --- README.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.txt b/README.txt index 70ef086..29e761c 100644 --- a/README.txt +++ b/README.txt @@ -136,7 +136,7 @@ object isn't indexed add_index :acts_as_xapian_jobs, [:model, :model_id], :unique => true end def self.down - remove_table :acts_as_xapian_jobs + drop_table :acts_as_xapian_jobs end end From 6ac85e6c604d29674708e04e88c0b629233e421a Mon Sep 17 00:00:00 2001 From: Mike Boone Date: Thu, 15 May 2008 23:31:00 -0400 Subject: [PATCH 3/6] Created generator for the acts_as_xapian database migration Created a generator for the acts_as_xapian database migration that can be run with script/generate acts_as_xapian. Updated the README.txt file to remove the published migration and show the generate command instead. --- README.txt | 19 ++++--------------- generators/acts_as_xapian/USAGE | 1 + .../acts_as_xapian_generator.rb | 13 +++++++++++++ .../acts_as_xapian/templates/migration.rb | 14 ++++++++++++++ 4 files changed, 32 insertions(+), 15 deletions(-) create mode 100644 generators/acts_as_xapian/USAGE create mode 100644 generators/acts_as_xapian/acts_as_xapian_generator.rb create mode 100644 generators/acts_as_xapian/templates/migration.rb diff --git a/README.txt b/README.txt index 29e761c..a74b30f 100644 --- a/README.txt +++ b/README.txt @@ -124,21 +124,10 @@ database * :if, either an attribute or a function which if returns false means the object isn't indexed -2. Make and run this database migration to create the ActsAsXapianJob model. - - class ActsAsXapianMigration < ActiveRecord::Migration - def self.up - create_table :acts_as_xapian_jobs do |t| - t.column :model, :string, :null => false - t.column :model_id, :integer, :null => false - t.column :action, :string, :null => false - end - add_index :acts_as_xapian_jobs, [:model, :model_id], :unique => true - end - def self.down - drop_table :acts_as_xapian_jobs - end - end +2. Generate a database migration to create the ActsAsXapianJob model: + + script/generate acts_as_xapian + rake db:migrate 3. Call 'rake xapian:rebuild_index models="ModelName1 ModelName2"' to build the index the first time (you must specify all your indexed models). It's put in a diff --git a/generators/acts_as_xapian/USAGE b/generators/acts_as_xapian/USAGE new file mode 100644 index 0000000..2d027c4 --- /dev/null +++ b/generators/acts_as_xapian/USAGE @@ -0,0 +1 @@ +./script/generate acts_as_xapian diff --git a/generators/acts_as_xapian/acts_as_xapian_generator.rb b/generators/acts_as_xapian/acts_as_xapian_generator.rb new file mode 100644 index 0000000..a1cd180 --- /dev/null +++ b/generators/acts_as_xapian/acts_as_xapian_generator.rb @@ -0,0 +1,13 @@ +class ActsAsXapianGenerator < Rails::Generator::Base + def manifest + record do |m| + m.migration_template 'migration.rb', 'db/migrate', + :migration_file_name => "create_acts_as_xapian" + end + end + + protected + def banner + "Usage: #{$0} acts_as_xapian" + end +end diff --git a/generators/acts_as_xapian/templates/migration.rb b/generators/acts_as_xapian/templates/migration.rb new file mode 100644 index 0000000..84a9dd7 --- /dev/null +++ b/generators/acts_as_xapian/templates/migration.rb @@ -0,0 +1,14 @@ +class CreateActsAsXapian < ActiveRecord::Migration + def self.up + create_table :acts_as_xapian_jobs do |t| + t.column :model, :string, :null => false + t.column :model_id, :integer, :null => false + t.column :action, :string, :null => false + end + add_index :acts_as_xapian_jobs, [:model, :model_id], :unique => true + end + def self.down + drop_table :acts_as_xapian_jobs + end +end + From 7bbdaa2510c89f848a2b1d74824137c71dbfe17d Mon Sep 17 00:00:00 2001 From: Mike Boone Date: Fri, 16 May 2008 01:29:00 -0400 Subject: [PATCH 4/6] Updated rake tasks to work in my environment. --- tasks/xapian.rake | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tasks/xapian.rake b/tasks/xapian.rake index 4489a0c..3b8d0ae 100644 --- a/tasks/xapian.rake +++ b/tasks/xapian.rake @@ -1,6 +1,8 @@ require 'rubygems' require 'rake' require 'rake/testtask' +require 'activerecord' +require File.dirname(__FILE__) + '/../lib/acts_as_xapian.rb' namespace :xapian do # Parameters - specify "flush=true" to save changes to the Xapian database @@ -16,7 +18,7 @@ namespace :xapian do # web server afterwards to make sure it gets the changes, rather than # still pointing to the old deleted database. desc 'Completely rebuilds Xapian search index (must specify all models)' - task :rebuild_index do + task (:rebuild_index => :environment) do raise "specify ALL your models with models=\"ModelName1 ModelName2\" as parameter" if ENV['models'].nil? ActsAsXapian.rebuild_index(ENV['models'].split(" ").map{|m| m.constantize}) end @@ -24,7 +26,7 @@ namespace :xapian do # Parameters - are models, query, offset, limit, sort_by_prefix, # collapse_by_prefix desc 'Run a query, return YAML of results' - task :query do + task (:query => :environment) do raise "specify models=\"ModelName1 ModelName2\" as parameter" if ENV['models'].nil? raise "specify query=\"your terms\" as parameter" if ENV['query'].nil? s = ActsAsXapian::Search.new(ENV['models'].split(" ").map{|m| m.constantize}, From 9fc8d615eb680b50b8e51042158db56c17957f1f Mon Sep 17 00:00:00 2001 From: Mike Boone Date: Fri, 16 May 2008 01:30:37 -0400 Subject: [PATCH 5/6] Modified to allow terms, values and texts to be optional. These changes allowed me to get a simple model working with just :texts specified and using the rake tasks. --- lib/acts_as_xapian.rb | 90 ++++++++++++++++++++++++------------------- 1 file changed, 50 insertions(+), 40 deletions(-) diff --git a/lib/acts_as_xapian.rb b/lib/acts_as_xapian.rb index a8944bb..43bf924 100644 --- a/lib/acts_as_xapian.rb +++ b/lib/acts_as_xapian.rb @@ -249,40 +249,44 @@ def ActsAsXapian.init_query_parser # and error check them - i.e. check for consistency between models @@query_parser.add_boolean_prefix("model", "M") @@query_parser.add_boolean_prefix("modelid", "I") - for term in options[:terms] - raise "Use a single capital letter for term code" if not term[1].match(/^[A-Z]$/) - raise "M and I are reserved for use as the model/id term" if term[1] == "M" or term[1] == "I" - raise "model and modelid are reserved for use as the model/id prefixes" if term[2] == "model" or term[2] == "modelid" - raise "Z is reserved for stemming terms" if term[1] == "Z" - raise "Already have code '" + term[1] + "' in another model but with different prefix '" + @@terms_by_capital[term[1]] + "'" if @@terms_by_capital.include?(term[1]) && @@terms_by_capital[term[1]] != term[2] - @@terms_by_capital[term[1]] = term[2] - @@query_parser.add_boolean_prefix(term[2], term[1]) + if options[:terms] + for term in options[:terms] + raise "Use a single capital letter for term code" if not term[1].match(/^[A-Z]$/) + raise "M and I are reserved for use as the model/id term" if term[1] == "M" or term[1] == "I" + raise "model and modelid are reserved for use as the model/id prefixes" if term[2] == "model" or term[2] == "modelid" + raise "Z is reserved for stemming terms" if term[1] == "Z" + raise "Already have code '" + term[1] + "' in another model but with different prefix '" + @@terms_by_capital[term[1]] + "'" if @@terms_by_capital.include?(term[1]) && @@terms_by_capital[term[1]] != term[2] + @@terms_by_capital[term[1]] = term[2] + @@query_parser.add_boolean_prefix(term[2], term[1]) + end end - for value in options[:values] - raise "Value index '"+value[1].to_s+"' must be an integer, is " + value[1].class.to_s if value[1].class != 1.class - raise "Already have value index '" + value[1].to_s + "' in another model but with different prefix '" + @@values_by_number[value[1]].to_s + "'" if @@values_by_number.include?(value[1]) && @@values_by_number[value[1]] != value[2] - - # date types are special, mark them so the first model they're seen for - if !@@values_by_number.include?(value[1]) - if value[3] == :date - value_range = Xapian::DateValueRangeProcessor.new(value[1]) - elsif value[3] == :string - value_range = Xapian::StringValueRangeProcessor.new(value[1]) - elsif value[3] == :number - value_range = Xapian::NumberValueRangeProcessor.new(value[1]) - else - raise "Unknown value type '" + value[3].to_s + "'" - end - - @@query_parser.add_valuerangeprocessor(value_range) - - # stop it being garbage collected, as - # add_valuerangeprocessor ref is outside Ruby's GC - @@value_ranges_store.push(value_range) - end - - @@values_by_number[value[1]] = value[2] - @@values_by_prefix[value[2]] = value[1] + if options[:values] + for value in options[:values] + raise "Value index '"+value[1].to_s+"' must be an integer, is " + value[1].class.to_s if value[1].class != 1.class + raise "Already have value index '" + value[1].to_s + "' in another model but with different prefix '" + @@values_by_number[value[1]].to_s + "'" if @@values_by_number.include?(value[1]) && @@values_by_number[value[1]] != value[2] + + # date types are special, mark them so the first model they're seen for + if !@@values_by_number.include?(value[1]) + if value[3] == :date + value_range = Xapian::DateValueRangeProcessor.new(value[1]) + elsif value[3] == :string + value_range = Xapian::StringValueRangeProcessor.new(value[1]) + elsif value[3] == :number + value_range = Xapian::NumberValueRangeProcessor.new(value[1]) + else + raise "Unknown value type '" + value[3].to_s + "'" + end + + @@query_parser.add_valuerangeprocessor(value_range) + + # stop it being garbage collected, as + # add_valuerangeprocessor ref is outside Ruby's GC + @@value_ranges_store.push(value_range) + end + + @@values_by_number[value[1]] = value[2] + @@values_by_prefix[value[2]] = value[1] + end end end end @@ -564,15 +568,21 @@ def xapian_index doc.add_term("M" + self.class.to_s) doc.add_term("I" + doc.data) - for term in self.xapian_options[:terms] - doc.add_term(term[1] + xapian_value(term[0])) + if self.xapian_options[:terms] + for term in self.xapian_options[:terms] + doc.add_term(term[1] + xapian_value(term[0])) + end end - for value in self.xapian_options[:values] - doc.add_value(value[1], xapian_value(value[0], value[3])) + if self.xapian_options[:values] + for value in self.xapian_options[:values] + doc.add_value(value[1], xapian_value(value[0], value[3])) + end end - for text in self.xapian_options[:texts] - ActsAsXapian.term_generator.increase_termpos # stop phrases spanning different text fields - ActsAsXapian.term_generator.index_text(xapian_value(text)) + if self.xapian_options[:texts] + for text in self.xapian_options[:texts] + ActsAsXapian.term_generator.increase_termpos # stop phrases spanning different text fields + ActsAsXapian.term_generator.index_text(xapian_value(text)) + end end ActsAsXapian.writable_db.replace_document("I" + doc.data, doc) From a23c6d1df44c4074dd957af6369c88b9d18b1660 Mon Sep 17 00:00:00 2001 From: Mike Boone Date: Fri, 16 May 2008 01:35:16 -0400 Subject: [PATCH 6/6] Updated documentation. --- lib/acts_as_xapian.rb | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/lib/acts_as_xapian.rb b/lib/acts_as_xapian.rb index 43bf924..8aff96d 100644 --- a/lib/acts_as_xapian.rb +++ b/lib/acts_as_xapian.rb @@ -88,7 +88,7 @@ # e.g. :texts => [ :title, :body ] # :values, things which have a range of values for indexing, or for collapsing. # Specify an array quadruple of [ field, identifier, prefix, type ] where -# - number is an arbitary numeric identifier for use in the Xapian database +# - identifier is an arbitary numeric identifier for use in the Xapian database # - prefix is the part to use in search queries that goes before the : # - type can be any of :string, :number or :date # e.g. :values => [ [ :created_at, 0, "created_at" ], [ :size, 1, "size"] ] @@ -111,8 +111,10 @@ # :if, either an attribute or a function which if returns false means the # object isn't indexed # -# 2. Make and run the migration to create the ActsAsXapianJob model, code below -# (search for ActsAsXapianJob). +# 2. Make and run the migration to create the ActsAsXapianJob model: +# script/generate acts_as_xapian +# +# (Or see code below in under ActsAsXapianJob). # # 3. Call 'rake xapian::rebuild_index models="ModelName1 ModelName2"' to build the index # the first time (you must specify all your indexed models). It's put in a @@ -437,6 +439,7 @@ def results # Index # Offline indexing job queue model, create with this migration: + # (or use script/generate acts_as_xapian) # class ActsAsXapianMigration < ActiveRecord::Migration # def self.up # create_table :acts_as_xapian_jobs do |t| @@ -449,7 +452,7 @@ def results # end # # def self.down - # remove_table :acts_as_xapian_jobs + # drop_table :acts_as_xapian_jobs # end # end class ActsAsXapianJob < ActiveRecord::Base