In [6]:
from langchain_neo4j import Neo4jVector
from langchain_ollama import ChatOllama
from langchain_ollama.embeddings import OllamaEmbeddings

local_embeddings = OllamaEmbeddings(model="nomic-embed-text")

vector_store = Neo4jVector.from_existing_graph(
    url="bolt+ssc://10.20.13.89:7687",
    username="neo4j",
    password="password",  # noqa: S106
    embedding=local_embeddings,
    node_label="Resource",
    text_node_properties=["uri", "rdfs_comment"],
    embedding_node_property="embedding",
)

llm = ChatOllama(model="qwen2.5-coder:7b")

In [None]:
query = llm.invoke(
    "Identify the features in the following log. \
        Please output just the comma-separated names of the features, \
            without the value of the features or any other text. \
                Log: '2022-01-21 00:09:11 jhall/192.168.230.165:46011 VERIFY KU O'.",
)

In [8]:
query.content

'timestamp,username,ip_address,application_name,action'

In [9]:
res = vector_store.similarity_search(query.content, k=30)
res

[Document(metadata={'rdfs__comment': 'Event Start Time'}, page_content='\nuri: https://w3id.org/sepses/ns/log#time\nrdfs_comment: '),
 Document(metadata={'rdfs__comment': 'Name of the application that generated the event'}, page_content='\nuri: https://w3id.org/sepses/ns/log#appname\nrdfs_comment: '),
 Document(metadata={'rdfs__comment': 'Application name'}, page_content='\nuri: https://w3id.org/sepses/ns/log#app.name\nrdfs_comment: '),
 Document(metadata={'rdfs__comment': 'IPv4 address'}, page_content='\nuri: https://w3id.org/sepses/ns/log#address.ipv4\nrdfs_comment: '),
 Document(metadata={'rdfs__label': 'Application', 'rdfs__comment': 'Application'}, page_content='\nuri: https://w3id.org/sepses/ns/log#App\nrdfs_comment: '),
 Document(metadata={}, page_content='\nuri: http://purl.org/vocab/vann/preferredNamespacePrefix\nrdfs_comment: '),
 Document(metadata={'rdfs__comment': 'IPv6 address'}, page_content='\nuri: https://w3id.org/sepses/ns/log#address.ipv6\nrdfs_comment: '),
 Document(