## Exploration of full-text indexing

In [1]:
import set_path      # Importing this module will add the project's home directory to sys.path

Added 'D:\Docs\- MY CODE\Brain Annex\BA-Win7' to sys.path


In [2]:
import os
import sys
import getpass

from neoaccess import NeoAccess

from BrainAnnex.modules.neo_schema.neo_schema import NeoSchema
from BrainAnnex.modules.full_text_indexing.full_text_indexing import FullTextIndexing
from BrainAnnex.modules.media_manager.media_manager import MediaManager

# Connect to the database

In [3]:
# Save your credentials here - or use the prompts given by the next cell
host = ""      # EXAMPLES:  bolt://123.456.789.012   OR   neo4j://localhost
password = ""

In [4]:
print("To create a database connection, enter the host IP, but leave out the port number: (EXAMPLES:  bolt://1.2.3.4  OR  neo4j://localhost )\n")

host = input("Enter host IP WITHOUT the port number.  EXAMPLE: bolt://123.456.789.012 ")
host += ":7687"    # EXAMPLE of host value:  "bolt://123.456.789.012:7687"

password = getpass.getpass("Enter the database password:")

print(f"\n=> Will be using: host='{host}', username='neo4j', password=**********")

To create a database connection, enter the host IP, but leave out the port number: (EXAMPLES:  bolt://1.2.3.4  OR  neo4j://localhost )



Enter host IP WITHOUT the port number.  EXAMPLE: bolt://123.456.789.012  bolt://123.456.789.012
Enter the database password: ········



=> Will be using: host='bolt://123.456.789.012:7687', username='neo4j', password=**********


In [8]:
db = NeoAccess(host=host,
               credentials=("neo4j", password), debug=False)   # Notice the debug option being OFF

Attempting to connect to Neo4j database


In [9]:
print("Version of the Neo4j driver: ", db.version())

Version of the Neo4j driver:  4.4.11


# Explorations

In [10]:
# Verify that the database is empty
q = "MATCH (n) RETURN COUNT(n) AS number_nodes"

db.query(q, single_cell="number_nodes")

0

In [None]:
q = "MATCH (n:Notes) RETURN n LIMIT 3"

In [7]:
db.query(q)

[]

In [8]:
match = db.match(labels="Notes")

In [9]:
# Retrieve the car node 
db.get_nodes(match, limit=3)

[]

In [11]:
NeoSchema.db = db
FullTextIndexing.db = db

In [12]:
MediaManager.set_media_folder("D:/tmp/")

In [13]:
FullTextIndexing.initialize_schema()

In [16]:
filename = "test1.txt"
file_contents = MediaManager.get_from_file(filename)
file_contents

'hello to the world !!! ?  Welcome to learning how she cooks with potatoes...'

In [17]:
word_list = FullTextIndexing.extract_unique_good_words(file_contents)
word_list

['hello', 'world', 'welcome', 'learning', 'cooks', 'potatoes']

In [18]:
content_item_id = NeoSchema.create_data_node(class_node="Content Item", properties = {"name": filename})
content_item_id

25

In [19]:
FullTextIndexing.new_indexing(content_item_id = content_item_id, unique_words = word_list)

In [20]:
filename = "test2.htm"
file_contents = MediaManager.get_from_file(filename)
file_contents

"<p>Let's make a <i>much better world</i>, shall we?  What do you say to that enticing prospect?</p>\n\n<p>Starting on a small scale &ndash; we&rsquo;ll learn cooking a potato well.</p>"

In [21]:
word_list = FullTextIndexing.extract_unique_good_words(file_contents)
word_list

['world', 'say', 'enticing', 'prospect', 'scale', 'learn', 'cooking', 'potato']

In [22]:
content_item_id = NeoSchema.create_data_node(class_node="Content Item", properties = {"name": filename})
content_item_id

0

In [23]:
FullTextIndexing.new_indexing(content_item_id = content_item_id, unique_words = word_list)

In [30]:
def search_word(word :str):
    q= f'''MATCH (w:Word)-[:occurs]->(:Indexer)<-[:has_index]-(ci:`Content Item`)
         WHERE w.name CONTAINS toLower('{word}')
         RETURN ci.name AS content_name
         '''
    result = db.query(q, single_column="content_name")
    return result

In [31]:
search_word("hello")

['test1.txt']

In [32]:
search_word("world")

['test2.htm', 'test1.txt']

In [33]:
search_word("POTATO")

['test2.htm', 'test1.txt']

In [34]:
search_word("POTATOES")

['test1.txt']

In [35]:
search_word("Learn")

['test2.htm', 'test1.txt']

In [37]:
search_word("Learning")

['test1.txt']