#### Imports

In [None]:
import os
from configs import ConfigPath
from utils.utils import read_json_file
from llms.embedding_model import EmbeddingModel
from configs.config import ConfigEnv
from knowledge_graph.connection import Neo4jConnection
from llms.llm import ChatModel
from retrieval.tools.vector_search_tool import VectorSimilaritySearchTool
from data_collection.reader import BioASQDataReader
from evaluation.retrieval_evaluation import run_evaluation_on_retrieved_chunks
from tqdm import tqdm

#### Initializations

In [None]:
# models
embedding_model = EmbeddingModel()

llm = ChatModel(provider="google", model_name="gemini-2.0-flash-lite").initialize_model()

# neo4j connection
neo4j_connection = Neo4jConnection(uri=ConfigEnv.NEO4J_URI, 
                 user=ConfigEnv.NEO4J_USER,
                 password=ConfigEnv.NEO4J_PASSWORD,
                 database=ConfigEnv.NEO4J_DB)

# retriever
vector_search_tool = VectorSimilaritySearchTool(
    llm=llm,
    embedding_model=embedding_model,
    neo4j_connection=neo4j_connection,
    return_direct=True
)

retriever_model_name = vector_search_tool.get_model_name()

# data
data_path = os.path.join(ConfigPath.RAW_DATA_DIR, "bioasq_train.parquet")
reader = BioASQDataReader(samples_limit=3)
data = reader.read_parquet_file(file_path=data_path) 
print(f"Data length: {len(data)}")

2025-04-01 23:57:33,572 [DEBUG] embedding_model - CUDA is available, using GPU
2025-04-01 23:57:53,555 [DEBUG] embedding_model - Embedding model initialized: neuml/pubmedbert-base-embeddings
2025-04-01 23:57:53,569 [DEBUG] llm - Initialized model gemini-2.0-flash-lite
2025-04-01 23:57:57,675 [DEBUG] connection - Connection successful!
2025-04-01 23:57:57,717 [INFO] reader - Limiting the number of rows to 3...
2025-04-01 23:57:57,719 [INFO] reader - Data file loaded with shape: (3, 4)


Data length: 3


In [3]:


def run_retriever(benchmark_data: list, retriever) -> dict:
    results = {}
    for sample in tqdm(benchmark_data, desc="Executing retriever..."):
        sample_id = sample.get('id')
        question = sample.get('question')
        retrieved_data = retriever.invoke(question)
        results[sample_id] = retrieved_data
    return results

results = run_retriever(benchmark_data=data, retriever=vector_search_tool)
results

Executing retriever...: 100%|██████████| 3/3 [00:00<00:00,  3.21it/s]


{1682: [{'pmid': '26598646',
   'content': 'Covalent post-translational modifications (PTMs) of proteins can regulate the structural and functional state of a protein in the absence of primary changes in the underlying sequence. Common PTMs include phosphorylation, acetylation, and methylation. Histone proteins are critical regulators of the genome and are subject to a highly abundant and diverse array of PTMs. To highlight the functional complexity added to the proteome by lysine methylation signaling, here we will focus on lysine methylation of histone proteins, an important modification in the regulation of chromatin and epigenetic processes. We review the signaling pathways and functions associated with a single residue, H4K20, as a model chromatin and clinically important mark that regulates biological processes ranging from the DNA damage response and DNA replication to gene expression and silencing.',
   'score': 0.8392342925071716},
  {'pmid': '20735237',
   'content': 'The mol

In [None]:


metrics, new_evaluator = run_evaluation_on_retrieved_chunks(
    retrieval_results=results,
    benchmark_data=data
)
print(metrics)

defaultdict(<class 'dict'>, {'precision@1': 0.6666666666666666, 'recall@1': 0.056644880174291944, 'f1@1': 0.1037037037037037, 'mrr@1': 0.6666666666666666, 'ndcg@1': 0.6666666666666666, 'success@1': 0.6666666666666666, 'map@1': 0.056644880174291944, 'coverage@1': 0.06896551724137931, 'precision@3': 0.7777777777777777, 'recall@3': 0.24400871459694992, 'f1@3': 0.3323232323232323, 'mrr@3': 0.7777777777777777, 'ndcg@3': 0.6666666666666666, 'success@3': 1.0, 'map@3': 0.1699346405228758, 'coverage@3': 0.20689655172413793, 'precision@5': 0.6666666666666666, 'recall@5': 0.26361655773420484, 'f1@5': 0.3257575757575757, 'mrr@5': 0.7777777777777777, 'ndcg@5': 0.5959286110065073, 'success@5': 1.0, 'map@5': 0.20915032679738563, 'coverage@5': 0.2413793103448276, 'precision@10': 0.6222222222222222, 'recall@10': 0.4139433551198257, 'f1@10': 0.37762237762237766, 'mrr@10': 0.7777777777777777, 'ndcg@10': 0.5633377760765029, 'success@10': 1.0, 'map@10': 0.30980392156862746, 'coverage@10': 0.344827586206896

In [None]:
answer_pip_executor = RetrieverExecutor(source_qa_data=data)
results = answer_pip_executor.generate_answers(retriever=vector_search_tool, model_name=retriever_model_name, answer_type=answer_type)

In [None]:
import pandas as pd

results_df = pd.DataFrame(results)
results_df['response_match'] = results_df['actual_response'] == results_df['generated_response']

In [None]:
results_df.head()

In [None]:
results_df[["context_found", "context_order", "response_match"]]

In [None]:
evaluator = Evaluator(ground_truth_data=data, short_answer_results=results)
evaluator.evaluate()

In [None]:
evaluator.compute_short_answer_metrics()

In [None]:
contexts = [element['content'] for element in results['context']]
contexts

In [None]:
from ragas import EvaluationDataset


dataset = []
dataset.append(
        {
            "user_input": "Do mitochondria play a role in remodelling lace plant leaves during programmed cell death?",
            "retrieved_contexts": contexts,
            "response": "yes",
            "reference": "yes", # expected response
        }
    )
evaluation_dataset = EvaluationDataset.from_list(dataset)

In [None]:
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.metrics import ContextPrecision, ContextRecall, ResponseRelevancy, FactualCorrectness

evaluator_llm = LangchainLLMWrapper(llm)
evaluator_embedding = LangchainEmbeddingsWrapper(embedding_model)

context_precision = ContextPrecision()
context_recall = ContextRecall()
response_relevancy = ResponseRelevancy()
factual_correctness = FactualCorrectness()

result = evaluate(
    dataset=evaluation_dataset,
    metrics=[context_precision, context_recall, response_relevancy, factual_correctness],
    llm=evaluator_llm,
    embeddings=evaluator_embedding
)

result

In [None]:
# read paquet data
import os
import pandas as pd
from configs.config import ConfigPath

from data_collection.reader import BioASQDataReader

In [None]:
asq_reader = BioASQDataReader()
data = asq_reader.read_parquet_file(file_path=os.path.join(ConfigPath.RAW_DATA_DIR, "bioasq_train.parquet"))

In [None]:
for sample in data:
    if 20007090 in sample["relevant_passage_ids"]:
        print(sample)
        print(data.index(sample))
        break

In [None]:
asq_reader.get_data_to_dict()

In [None]:
from data_collection.fetcher import PubMedArticleFetcher

fetcher = PubMedArticleFetcher()

In [None]:
results = fetcher.fetch_articles(pmids=['20007090'])