#### Imports

In [1]:
import os
import json
from tqdm import tqdm

from configs import ConfigPath
from data_preprocessing.text_splitter import TextSplitter
from utils.utils import read_json_file
from llms.embedding_model import EmbeddingModel
from knowledge_graph.loader import GraphLoader
from knowledge_graph.crud import GraphCrud
from configs.config import ConfigEnv
from knowledge_graph.connection import Neo4jConnection
from llms.llm import LLM
from retrieval.tools.vector_search_tool import VectorSearchTool


#### Initializations

In [2]:
# models
embedding_model = EmbeddingModel()
llm = LLM.from_name(model_name="mixtral-8x7b-32768")

2025-03-05 23:38:17,061 [DEBUG] embedding_model - CUDA is available, using GPU
2025-03-05 23:38:33,451 [DEBUG] embedding_model - Embedding model initialized: neuml/pubmedbert-base-embeddings


In [3]:
# data
data = read_json_file(file_path=os.path.join(ConfigPath.RAW_DATA_DIR, "pqa_labeled.json"))  

# modules
text_splitter = TextSplitter()
neo4j_connection = Neo4jConnection(uri=ConfigEnv.NEO4J_URI, 
                 user=ConfigEnv.NEO4J_USER,
                 password=ConfigEnv.NEO4J_PASSWORD,
                 database=ConfigEnv.NEO4J_DB)
crud = GraphCrud(neo4j_connection=neo4j_connection)
graph_loader = GraphLoader(text_splitter=text_splitter,
                           embedding_model=embedding_model,
                           crud=crud,
                           data=data)

vector_search_tool = VectorSearchTool(
    llm=llm,
    embedding_model=embedding_model,
    neo4j_connection=neo4j_connection,
    return_direct=False
)


2025-03-05 23:38:38,565 [DEBUG] connection - Connection successful!


In [4]:
answer = vector_search_tool.invoke("Can tailored interventions increase mammography use among HMO women?")
print(answer)

2025-03-05 23:38:39,539 [INFO] vector_search_tool - Results: [{'content': 'Telephone counseling and tailored print communications have emerged as promising methods for promoting mammography screening. However, there has been little research testing, within the same randomized field trial, of the efficacy of these two methods compared to a high-quality usual care system for enhancing screening. This study addressed the question: Compared to usual care, is tailored telephone counseling more effective than tailored print materials for promoting mammography screening? Three-year randomized field trial. One thousand ninety-nine women aged 50 and older recruited from a health maintenance organization in North Carolina. Women were randomized to 1 of 3 groups: (1) usual care, (2) tailored print communications, and (3) tailored telephone counseling. Adherence to mammography screening based on self-reports obtained during 1995, 1996, and 1997. Compared to usual care alone, telephone counseling p

No.

The context provided does not contain information about tailored interventions increasing mammography use among HMO women. Instead, it discusses the efficacy of telephone counseling and tailored print materials compared to usual care in promoting mammography screening.


### Nodes deletion

In [None]:
context_alias = "context"
context_label = "CONTEXT"
context_index_name = "contextEmbeddings"
k = 5
threshold = 0.5
query = "Programmed cell death (PCD) is the regulated death of cells within an organism. "
query_embedding = embedding_model.embed_query(text=query)

VECTOR_SEARCH_QUERY = f"""MATCH ({context_alias}:{context_label})
WITH {context_alias},
    vector.similarity.cosine($query, {context_alias}.embedding) AS score
    ORDER BY score DESCENDING
    LIMIT $k WHERE score > $threshold
RETURN {context_alias}.text_content as content, score as score"""
print(VECTOR_SEARCH_QUERY)

In [None]:
results = neo4j_connection.execute_query(VECTOR_SEARCH_QUERY, 
                                           {"k": k, 
                                            "query": query_embedding, 
                                            "threshold": threshold,
                                            "context_alias": context_alias,
                                            "context_label": context_label})

results

