From e86e250a2d4cb0debb7a3631c16341341b0be11f Mon Sep 17 00:00:00 2001 From: andreas Date: Wed, 3 Mar 2010 00:01:51 +0100 Subject: [PATCH] Updated IMDB example to use migrations. Updated som documentation. [#111 state:resolved] [#108 state:resolved] --- README.rdoc | 7 +++ examples/imdb/1_create_neo_db.rb | 66 +++++++++++++++++++++++ examples/imdb/2_index_db.rb | 21 ++++++++ examples/imdb/README | 11 +++- examples/imdb/create_neo_db.rb | 70 ------------------------- examples/imdb/find_actors.rb | 61 ++++++++++++++------- examples/imdb/model.rb | 6 --- lib/lucene/config.rb | 16 ++---- lib/lucene/index.rb | 8 ++- lib/neo4j/indexer.rb | 26 --------- lib/neo4j/mixins/java_property_mixin.rb | 3 +- lib/neo4j/mixins/migration_mixin.rb | 2 +- test/neo4j/batch_inserter_spec.rb | 7 +++ 13 files changed, 163 insertions(+), 141 deletions(-) create mode 100644 examples/imdb/1_create_neo_db.rb create mode 100644 examples/imdb/2_index_db.rb delete mode 100644 examples/imdb/create_neo_db.rb diff --git a/README.rdoc b/README.rdoc index a2542981d..304c0a152 100644 --- a/README.rdoc +++ b/README.rdoc @@ -1491,12 +1491,19 @@ The neo database starts at version 0 by default. If the code above has been loaded before the neo database starts it will automatically upgrade to version 1 (running all the migrations to the higest migration available). You can force the neo to go to a specific version by using Neo4j#migrate! method. +For more information see the example/imdb application or the RSpecs. === Lazy Migration The example above can also be run as lazy migration. i.e. perform the upgrade/downgrade when the node is loaded instead of all at once. The following example demonstrates this feature: + class Person + include Neo4j::NodeMixin + include Neo4j::MigrationMixin # you need to include this in order to use lazy migrations + ... + end + Person.migration 1, :split_name do up do surname = self[:name].split[0] diff --git a/examples/imdb/1_create_neo_db.rb b/examples/imdb/1_create_neo_db.rb new file mode 100644 index 000000000..cb9190c79 --- /dev/null +++ b/examples/imdb/1_create_neo_db.rb @@ -0,0 +1,66 @@ +IMDB_FILE = 'data/test-actors.list' + +Neo4j.migration 1, "Create DB by parsing IMDB file" do + up do + puts "Migration 1, processing #{IMDB_FILE} file ..." + Neo4j::Transaction.run do + movies = {} + current_actor = nil + actors = 0 + no_films = 0 + + File.open(IMDB_FILE).each_line do |line| + next if line.strip.empty? + + tab_items = line.split("\t") + + unless tab_items.empty? + if !tab_items[0].empty? + current_actor = Actor.new + current_actor.name = tab_items.shift.strip + actors += 1 +# puts "Parse new actor no. #{actors} '#{current_actor.name}'" + end + tab_items.shift + + film = tab_items.shift.strip + + # already created film ? + movie = movies[film] + if (movie.nil?) + movie = Movie.new + movie.title = film + movie.year = /\((\d+)(\/.)?\)/.match(film)[1] + movies[film] = movie +# puts "Created new film #{film}" + no_films += 1 + end + + role = tab_items.shift + roleNode = current_actor.acted_in.new(movie) + + unless (role.nil?) + role.strip! + # remove [] + role.slice!(0) + role.chop! + title, character = role.split('-') + roleNode.title = title.strip unless title.nil? + roleNode.character = character.strip unless character.nil? + end + + #puts "Actor: '#{current_actor}' Film '#{film}' Year '#{year}' Title '#{title}' Character '#{character}'" + end + end + puts "created #{actors} actors and #{no_films} films" + end + end + + down do + puts "deleting all movies and actors" + Neo4j::Transaction.run do + Actor.all.each {|a| a.del} + Movie.all.each {|m| m.del} + end + end +end diff --git a/examples/imdb/2_index_db.rb b/examples/imdb/2_index_db.rb new file mode 100644 index 000000000..e86f1e2bd --- /dev/null +++ b/examples/imdb/2_index_db.rb @@ -0,0 +1,21 @@ +Neo4j.migration 2, "Index DB" do + up do + + puts "Migration 2, Index DB on #{Lucene::Config[:storage_path]}" + + Neo4j::Transaction.run do + puts "Creating lucene index ..." + Actor.index :name, :tokenized => true + Actor.update_index + end + # only possible to access and query the index after the transaction commits + end + + down do + puts "removing lucene index" + Actor.remove_index :name + # Actor.update_index # maybe nicer way of deleting indexes - hmm, does it work ? + require 'fileutils' + FileUtils.rm_rf Lucene::Config[:storage_path] # quick and dirty way of killing the lucene index + end +end diff --git a/examples/imdb/README b/examples/imdb/README index 85b4bea0f..2611666a6 100644 --- a/examples/imdb/README +++ b/examples/imdb/README @@ -1,5 +1,12 @@ Shows how to store and query a neo4j database - 1. Download the database type: ./install.sh -2. Create the database: jruby create_neo_db.rb +2. Run the application: jruby find_actors.rb willis + +The find_actors.rb uses migration which means that the first time the it is used it will take +a while to process the imdb file and create the index. + +Notice that you can play around with migrations by require the 'find_actor' in JIRB and then for example + Neo4j.migrate! 1 +which will delete the lucene index (migration number 2) + diff --git a/examples/imdb/create_neo_db.rb b/examples/imdb/create_neo_db.rb deleted file mode 100644 index c9e287b1d..000000000 --- a/examples/imdb/create_neo_db.rb +++ /dev/null @@ -1,70 +0,0 @@ -$LOAD_PATH << File.expand_path(File.dirname(__FILE__) + "/../../lib") - -require "rubygems" -require "neo4j" -require "model" - -def parse_actors(file) - movies = {} - current_actor = nil - actors = 0 - no_films = 0 - - File.open(file).each_line do |line| - next if line.strip.empty? - - tab_items = line.split("\t") - - unless tab_items.empty? - if !tab_items[0].empty? - current_actor = Actor.new - current_actor.name = tab_items.shift.strip - actors += 1 - puts "Parse new actor no. #{actors} '#{current_actor.name}'" - end - tab_items.shift - - film = tab_items.shift.strip - - # already created film ? - movie = movies[film] - if (movie.nil?) - movie = Movie.new - movie.title = film - movie.year = /\((\d+)(\/.)?\)/.match(film)[1] - movies[film] = movie - puts "Created new film #{film}" - no_films += 1 - end - - role = tab_items.shift - roleNode = current_actor.acted_in.new(movie) - - unless (role.nil?) - role.strip! - # remove [] - role.slice!(0) - role.chop! - title, character = role.split('-') - roleNode.title = title.strip unless title.nil? - roleNode.character = character.strip unless character.nil? - end - - #puts "Actor: '#{current_actor}' Film '#{film}' Year '#{year}' Title '#{title}' Character '#{character}'" - end - end - puts "created #{actors} actors and #{no_films} films" -end - - -#Neo4j::Config[:storage_path] = DB_NEO_DIR -Neo4j.start - -t1 = Time.now -Neo4j::Transaction.run do - parse_actors('data/test-actors.list') -end - -# For me it takes 33.78 sec -puts "Created database in #{Time.now - t1} seconds" -Neo4j.stop diff --git a/examples/imdb/find_actors.rb b/examples/imdb/find_actors.rb index fec2d015d..dcd34a094 100644 --- a/examples/imdb/find_actors.rb +++ b/examples/imdb/find_actors.rb @@ -1,29 +1,54 @@ $LOAD_PATH << File.expand_path(File.dirname(__FILE__) + "/../../lib") -#require "rubygems" +require "rubygems" require "neo4j" + +# we have to configure these before the model is loaded +Lucene::Config[:store_on_file] = true +Lucene::Config[:storage_path] = "tmp/lucene" + + require "model" +require "neo4j/extensions/reindexer" -#Neo4j::Config[:storage_path] = DB_NEO_DIR -Neo4j.start -Neo4j::Transaction.run do - Actor.index :name, :tokenized => true - Actor.update_index -end -# have to let the previous transaction finish in order to lucene indexing -# to take place +# Keep lucene index on file system instead of in memory + -Neo4j::Transaction.run do - puts "Find all actors named willis" - result = Actor.find(:name => "willis") #, Bruce") +# Load Migrations +# Create Database +require '1_create_neo_db' - puts "Found #{result.size} actors" - result.each {|x| puts x} +# just for fun I have two migrations - first one for importing the database and second for indexing it. +require '2_index_db' - willis = result[0] - puts "#{willis} acted in:" - willis.rels.outgoing.each {|r| puts r.to_s } +def find_actor(name) + Neo4j::Transaction.run do + puts "Find all actors named #{name}" + result = Actor.find(:name => name) - willis.acted_in.each { |movie| puts movie } + puts "Found #{result.size} actors" + result.each {|x| puts "#{x.neo_id}\t#{x}"} + end end + +def find_movies(neo_id) + Neo4j::Transaction.run do + actor = Neo4j.load_node(neo_id) + puts "No actor found with neo id #{neo_id}" if actor.nil? + return if actor.nil? + + puts "#{actor} acted in:" + actor.acted_in_rels.each {|r| puts "Movie #{r.end_node.title} title: #{r.title}"} + end +end + +Neo4j.start +if (ARGV.size == 1) + find_actor(ARGV[0]) +elsif ARGV.size == 2 && ARGV[0] == "-m" + find_movies(ARGV[1]) +else + puts "Usage: jruby find_actors.rb [-m] \n\n -m \tfinds the movies for the given actor neo_id" +end + Neo4j.stop diff --git a/examples/imdb/model.rb b/examples/imdb/model.rb index 35f47dec5..672eedb1f 100644 --- a/examples/imdb/model.rb +++ b/examples/imdb/model.rb @@ -1,9 +1,3 @@ -#DB_NEO_DIR = File.expand_path(File.dirname(__FILE__) + "/db/neo") -#DB_LUCENE_DIR = File.expand_path(File.dirname(__FILE__) + "/db/lucene") - -require 'rubygems' -require 'neo4j' -require "neo4j/extensions/reindexer" class Movie; end diff --git a/lib/lucene/config.rb b/lib/lucene/config.rb index c6bd04410..e8a1a99e9 100644 --- a/lib/lucene/config.rb +++ b/lib/lucene/config.rb @@ -8,6 +8,11 @@ module Lucene # as specific configuration for each index (TODO). # This code is copied from merb-core/config.rb. # + # Contains three default configurations (Config.defaults) + # * :store_on_file:: default false, which will only keep the index in memory + # * :id_field:: default :id + # * :storage_path:: where the index is kept on file system if stored as a file (instead of just in memory) + # class Config class << self # Returns the hash of default config values for lucene. @@ -15,7 +20,6 @@ class << self # ==== Returns # Hash:: The defaults for the config. # - # :api: private def defaults @defaults ||= { :store_on_file => false, @@ -38,7 +42,6 @@ def defaults # ==== Returns # nil # - # :api: public def use @configuration ||= {} yield @configuration @@ -52,7 +55,6 @@ def use # key:: The key to set the parameter for. # val:: The value of the parameter. # - # :api: public def []=(key, val) (@configuration ||= setup)[key] = val end @@ -63,8 +65,6 @@ def []=(key, val) # ==== Parameters # key:: The key of the config entry value we want # - # :api: public - # def [](key) (@configuration ||= setup)[key] end @@ -78,7 +78,6 @@ def [](key) # ==== Returns # Object:: The value of the removed entry. # - # :api: public def delete(key) @configuration.delete(key) end @@ -90,7 +89,6 @@ def delete(key) # ==== Returns # nil # - # :api: private def delete_all @configuration = nil IndexInfo.delete_all @@ -107,7 +105,6 @@ def delete_all # ==== Returns # Object:: The value of the configuration parameter or the default. # - # :api: public def fetch(key, default) @configuration.fetch(key, default) end @@ -117,7 +114,6 @@ def fetch(key, default) # ==== Returns # The configuration as a hash. # - # :api: private def setup() @configuration = {} @configuration.merge!(defaults) @@ -130,7 +126,6 @@ def setup() # ==== Returns # Hash:: The config as a hash. # - # :api: public def to_hash @configuration end @@ -140,7 +135,6 @@ def to_hash # ==== Returns # String:: The config as YAML. # - # :api: public def to_yaml require "yaml" @configuration.to_yaml diff --git a/lib/lucene/index.rb b/lib/lucene/index.rb index 5cf12ab34..5bd5c8cd4 100644 --- a/lib/lucene/index.rb +++ b/lib/lucene/index.rb @@ -18,7 +18,7 @@ class IdFieldMissingException < StandardError; end # Represents a Lucene Index. # The index is written/updated only when the commit method is called. # This is done since writing to the index file should be done as a batch operation. - # (Performace will be bad otherwise). + # (Performance will be bad otherwise). # class Index attr_reader :path, :uncommited @@ -58,7 +58,6 @@ def field_infos # ==== Returns # Returns a new or an already existing Index # - # :api: public def self.new(path) # make sure no one modifies the index specified at given path lock(path).synchronize do @@ -114,7 +113,6 @@ def self.clear(path) # ==== Returns # Returns the index instance so that this method can be chained # - # :api: public def <<(key_values) doc = Document.new(field_infos, key_values) lock.synchronize do @@ -252,7 +250,7 @@ def exist? # private - def update_documents + def update_documents # :nodoc: index_writer = org.apache.lucene.index.IndexWriter.new(@index_info.storage, @index_info.analyzer, ! exist?) @uncommited.each_value do |doc| # removes the document and adds it again @@ -264,7 +262,7 @@ def update_documents end - def delete_documents + def delete_documents # :nodoc: return unless exist? # if no index exists then there is nothing to do writer = org.apache.lucene.index.IndexWriter.new(@index_info.storage, @index_info.analyzer, false) diff --git a/lib/neo4j/indexer.rb b/lib/neo4j/indexer.rb index dadf09e91..7f2b4075c 100644 --- a/lib/neo4j/indexer.rb +++ b/lib/neo4j/indexer.rb @@ -19,7 +19,6 @@ def initialize(indexed_class, query_for_nodes) end # Returns the Indexer for the given Neo4j::NodeMixin class - # :api:private def self.instance(clazz, query_for_nodes = true) @instances ||= {} @instances[clazz.root_class] ||= Indexer.new(clazz.root_class, query_for_nodes) @@ -33,28 +32,23 @@ def self.remove_instance(clazz) # (Re)index the given node - # :api: private def self.index(node) indexer = instance(node.class) indexer.index(node) end - # :api: private def find(query,block) SearchResult.new lucene_index, query, @query_for_nodes, &block end - # :api: private def add_index_on_property(prop) @property_indexer.properties << prop.to_sym end - # :api: private def remove_index_on_property(prop) @property_indexer.properties.delete prop.to_sym end - # :api: private def add_index_in_relationship_on_property(updater_clazz, rel_name, rel_type, prop, namespace_type) unless relationship_indexer_for?(namespace_type) indexer = new_relationship_indexer_for(namespace_type, rel_name.to_sym) @@ -65,7 +59,6 @@ def add_index_in_relationship_on_property(updater_clazz, rel_name, rel_type, pro relationship_indexer_for(namespace_type).properties << prop.to_sym end - # :api: private def index(node) document = {:id => node.neo_id } @@ -76,49 +69,40 @@ def index(node) lucene_index << document end - # :api: private def delete_index(node) lucene_index.delete(node.neo_id) end - # :api: private def lucene_index Lucene::Index.new(@index_id) end - # :api: private def field_infos lucene_index.field_infos end - # :api: private def on_property_changed(node, prop) @relationship_indexers.values.each {|indexer| indexer.on_property_changed(node, prop.to_sym)} @property_indexer.on_property_changed(node,prop.to_sym) end - # :api: private def on_relationship_created(node, rel_type) @relationship_indexers.values.each {|indexer| indexer.on_relationship_created(node, rel_type.to_sym)} end - # :api: private def on_relationship_deleted(node, rel_type) @relationship_indexers.values.each {|indexer| indexer.on_relationship_deleted(node, rel_type.to_sym)} end - # :api: private def relationship_indexer_for(rel_type) @relationship_indexers[rel_type.to_sym] end - # :api: private def relationship_indexer_for?(rel_type) !relationship_indexer_for(rel_type.to_sym).nil? end - # :api: private def new_relationship_indexer_for(rel_type, rel_name) @relationship_indexers[rel_type.to_sym] = RelationshipIndexer.new(rel_name.to_sym, rel_type.to_sym) end @@ -126,7 +110,6 @@ def new_relationship_indexer_for(rel_type, rel_name) end - # :api: private class PropertyIndexer #:nodoc: attr_reader :properties @@ -134,12 +117,10 @@ def initialize @properties = [] end - # :api: private def on_property_changed(node, prop) Indexer.index(node) if @properties.include?(prop) end - # :api: private def update_document(document, node) @properties.each {|prop| document[prop.to_sym] = node.send(prop)} end @@ -153,7 +134,6 @@ def update_document(document, node) # index document with key field 'd.y' and values of property y of all nodes in the # relationship 'd' # - # :api: private class RelationshipIndexer #:nodoc: attr_reader :rel_type, :properties @@ -163,18 +143,15 @@ def initialize(rel_name, rel_type) @rel_name = rel_name end - # :api: private def on_property_changed(node, prop) # make sure we're interested in indexing this property reindex_related_nodes(node) if @properties.include?(prop) end - # :api: private def on_relationship_deleted(node, rel_type) Indexer.index(node) if @rel_type == rel_type end - # :api: private def on_relationship_created(node, rel_type) # make sure we're interested in indexing this relationship if @rel_type == rel_type @@ -183,7 +160,6 @@ def on_relationship_created(node, rel_type) end end - # :api: private def reindex_related_nodes(node) related_nodes = node.rels.both(@rel_type).nodes related_nodes.each do |related_node| @@ -191,12 +167,10 @@ def reindex_related_nodes(node) end end - # :api: private def index_key(property) "#@rel_name.#{property}".to_sym end - # :api: private def update_document(document, node) relationships = node.rels.both(@rel_type).nodes relationships.each do |other_node| diff --git a/lib/neo4j/mixins/java_property_mixin.rb b/lib/neo4j/mixins/java_property_mixin.rb index 5f84429f3..86a5e377d 100644 --- a/lib/neo4j/mixins/java_property_mixin.rb +++ b/lib/neo4j/mixins/java_property_mixin.rb @@ -135,7 +135,6 @@ def ==(o) # Same as neo_id but returns a String instead of a Fixnum. # Used by Ruby on Rails. # - # :api: public def to_param neo_id.to_s end @@ -152,7 +151,7 @@ def wrapper? property?(CLASSNAME_PROPERTY) end - def wrapper_class + def wrapper_class # :nodoc: return nil unless wrapper? classname = get_property(CLASSNAME_PROPERTY) classname.split("::").inject(Kernel) do |container, name| diff --git a/lib/neo4j/mixins/migration_mixin.rb b/lib/neo4j/mixins/migration_mixin.rb index 89e470573..23d6c0aa0 100644 --- a/lib/neo4j/mixins/migration_mixin.rb +++ b/lib/neo4j/mixins/migration_mixin.rb @@ -137,7 +137,7 @@ class << self # version:: optional, if given then will set the property db_version on the context def execute(context, version=nil, &block) context.instance_eval &block - context[:db_version] = version if version + Neo4j::Transaction.run { context[:db_version] = version} if version end end end diff --git a/test/neo4j/batch_inserter_spec.rb b/test/neo4j/batch_inserter_spec.rb index 4c1346c8b..221f7ad93 100644 --- a/test/neo4j/batch_inserter_spec.rb +++ b/test/neo4j/batch_inserter_spec.rb @@ -9,6 +9,13 @@ before(:all) { stop } after(:each) { stop } + it "should yield the Java Batch Inserter object" do + Neo4j::BatchInserter.new do |b| + b.should respond_to(:createNode) + b.should respond_to(:createRelationship) + end + end + it "should accept Neo4j::Node.new and Neo4j::Relationship.new" do class Foo include Neo4j::NodeMixin