# Setup

In [None]:
from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy, # uses Embeddings
    context_precision, # uses Embeddings
    context_recall, # uses ground_truth dataset (only LLM used as a jugdge)
)
from datasets import Dataset

from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.chat_models import ChatOllama
from ragas.run_config import RunConfig
import pandas as pd

# Modules

In [5]:
def extract_queries() -> pd.DataFrame:
    url_source: str = (
        "https://docs.google.com/spreadsheets/d/e/2PACX-1vR1hUlRhTJQgNzSbTyRtDNh1mCrbfy0iUm6oiHK7oHb_iQQ5t7XCB_xyUCwoZ2fdg/pub?output=xlsx"
    )
    queries = pd.read_excel(url_source, sheet_name="queries")
    return queries


def transform_queries(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    return df


def load_queries() -> pd.DataFrame:
    df = extract_queries()
    df = transform_queries(df)
    return df

In [None]:
def get_contexts_and_answers(chatbot: RAG_Chatbot) -> tuple[list[list[str]], list[str]]:
    retrieved_texts = []
    model_answers = []
    for query in queries['query']:
        relevant_docs = chatbot._retrieved_relevant_docs(query, top_k=5)
        messages = chatbot._get_messages(query=query, context=relevant_docs)
        model_answer = ""
        for response_text in chatbot._generate_response(messages):
            model_answer += response_text
        retrieved_texts.append(relevant_docs)
        model_answers.append(model_answer)
    return retrieved_texts, model_answers

# Main

## Dataset

In [6]:
queries = load_queries()
queries

Unnamed: 0,query_id,query,answer
0,1,Paano magapply ng Japan Visa?,Complete all the requirements based on the pur...
1,2,Saan pwede magprocess ng Japan Visa?,Pwede mag process ng Japan Visa sa Attic Tours...
2,3,Magkano ang processing fees ng Japan Visa?,Ang processing fee ng Japan Visa sa Attic Tour...
3,4,Ano ang mga requirements pag mag tourist sa Ja...,"Kailangan ng Philippine Passport original, Col..."
4,5,Ano ang mga types ng Japan Visa?,"Ang mga types ng Japan Visa ay tourist, visiti..."


## Chatbot

In [7]:
from rag_chatbot import RAG_Chatbot

chatbot = RAG_Chatbot()
chatbot.prepare_docs()



Requirements has been removed!
New Collection Requirements created!
1/28 📄 Processing: MULTIPLE-ENTRY VISA FOR TEMPORARY VISITOR.pdf


  return forward_call(*args, **kwargs)


Inserted object chunk_id=MULTIPLE-ENTRY VISA FOR TEMPORARY VISITOR.pdf_chunk_0 title=MULTIPLE-ENTRY VISA FOR TEMPORARY VISITOR
Inserted object chunk_id=MULTIPLE-ENTRY VISA FOR TEMPORARY VISITOR.pdf_chunk_1 title=MULTIPLE-ENTRY VISA FOR TEMPORARY VISITOR
Inserted object chunk_id=MULTIPLE-ENTRY VISA FOR TEMPORARY VISITOR.pdf_chunk_2 title=MULTIPLE-ENTRY VISA FOR TEMPORARY VISITOR
2/28 📄 Processing: FREQUENTLY ASKED QUESTIONS.pdf
Inserted object chunk_id=FREQUENTLY ASKED QUESTIONS.pdf_chunk_0 title=Frequently Asked Questions (Visa
Inserted object chunk_id=FREQUENTLY ASKED QUESTIONS.pdf_chunk_1 title=Frequently Asked Questions (Visa
Inserted object chunk_id=FREQUENTLY ASKED QUESTIONS.pdf_chunk_2 title=Frequently Asked Questions (Visa
3/28 📄 Processing: ATTIC TOURS.pdf
Inserted object chunk_id=ATTIC TOURS.pdf_chunk_0 title=ATTIC TOURS
Inserted object chunk_id=ATTIC TOURS.pdf_chunk_1 title=ATTIC TOURS
Inserted object chunk_id=ATTIC TOURS.pdf_chunk_2 title=ATTIC TOURS
4/28 📄 Processing: VISIT

In [None]:
retrieved_texts, model_answers = get_contexts_and_answers(chatbot)

## Evaluations

In [10]:
eval_llm = ChatOllama(model="gemma3:1b", temperature=0.0)
ragas_llm = LangchainLLMWrapper(eval_llm)

In [11]:
eval_embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-base")
ragas_embeddings = LangchainEmbeddingsWrapper(eval_embeddings)

  eval_embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-base")


In [12]:
data = {
    "question": queries["query"].to_list(),
    "answer": model_answers,
    "contexts": retrieved_texts,
    "ground_truth": queries["answer"].to_list(),
}
ds = Dataset.from_dict(data)

In [17]:
metrics = [
    faithfulness,
    answer_relevancy, # uses Embeddings
    context_precision, # uses Embeddings
    context_recall, # uses ground_truth dataset (only LLM used as a jugdge)
]

In [18]:
# Define a custom timeout, for example, 300 seconds (5 minutes)
# Adjust this value based on your hardware and dataset size
run_config = RunConfig(timeout=7200)

result = evaluate(
    ds,
    metrics=metrics,
    llm=ragas_llm,
    embeddings=ragas_embeddings,
    run_config=run_config
)

  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
Evaluating:  70%|███████   | 14/20 [36:47<19:52, 198.83s/it]Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt context_recall_classification_prompt failed to parse output: The output parser failed to parse the output including retries.
Exception raised in Job[15]: RagasOutputParserException(The output parser failed to parse the output including retries.)
Evaluating:  85%|████████▌ | 17/20 [44:39<08:12, 164.12s/it]Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt fix_output_f

In [20]:
result

{'faithfulness': 0.7956, 'answer_relevancy': 0.0000, 'context_precision': 1.0000, 'context_recall': 0.8042}