#### Imports

In [1]:
import os
import json
from tqdm import tqdm

from configs import ConfigPath
from data_preprocessing.text_splitter import TextSplitter
from utils.utils import read_json_file
from llms.embedding_model import EmbeddingModel
from knowledge_graph.loader import GraphLoader
from knowledge_graph.crud import GraphCrud
from configs.config import ConfigEnv
from knowledge_graph.connection import Neo4jConnection
from llms.llm import LLM
from retrieval.tools.vector_search_tool import VectorSearchTool


#### Initializations

In [2]:
# models
embedding_model = EmbeddingModel()
llm = LLM.initialize_model(provider="google", model_name="gemini-2.0-flash")

2025-03-06 22:55:14,407 [DEBUG] embedding_model - CUDA is available, using GPU
2025-03-06 22:55:33,462 [DEBUG] embedding_model - Embedding model initialized: neuml/pubmedbert-base-embeddings


In [3]:
# data
data = read_json_file(file_path=os.path.join(ConfigPath.RAW_DATA_DIR, "pqa_labeled.json"))  

# modules
# text_splitter = TextSplitter()
neo4j_connection = Neo4jConnection(uri=ConfigEnv.NEO4J_URI, 
                 user=ConfigEnv.NEO4J_USER,
                 password=ConfigEnv.NEO4J_PASSWORD,
                 database=ConfigEnv.NEO4J_DB)
# crud = GraphCrud(neo4j_connection=neo4j_connection)
# graph_loader = GraphLoader(text_splitter=text_splitter,
#                            embedding_model=embedding_model,
#                            crud=crud,
#                            data=data)

vector_search_tool = VectorSearchTool(
    llm=llm,
    embedding_model=embedding_model,
    neo4j_connection=neo4j_connection,
    return_direct=False
)


2025-03-06 22:55:37,668 [DEBUG] connection - Connection successful!


In [None]:
# answer = vector_search_tool.invoke("Can tailored interventions increase mammography use among HMO women?")
# print(answer)

yes


In [None]:
from evalution.evaluation import Evaluator

evaluator = Evaluator(ground_truth_data=data, retriever=vector_search_tool)

2025-03-06 22:55:49,097 [INFO] evaluation - Generating results...
Generating results...:   6%|▋         | 64/1000 [05:07<1:16:50,  4.93s/it]

In [5]:
evaluator.compute_metrics()

2025-03-06 22:04:24,015 [INFO] evaluation - Computing metrics...
2025-03-06 22:04:24,020 [INFO] evaluation - Accuracy: 0.5000
2025-03-06 22:04:24,020 [INFO] evaluation - Precision: 0.5000
2025-03-06 22:04:24,022 [INFO] evaluation - Recall: 1.0000
2025-03-06 22:04:24,023 [INFO] evaluation - F1: 0.6667


{'accuracy': 0.5, 'precision': 0.5, 'recall': 1.0, 'f1': 0.6666666666666666}

In [5]:
llm.invoke("hey")

AIMessage(content='Hey there! How can I help you today?', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': []}, id='run-7c4fea5e-cdb8-4395-9fd1-6f27607d40e4-0', usage_metadata={'input_tokens': 1, 'output_tokens': 11, 'total_tokens': 12, 'input_token_details': {'cache_read': 0}})