In [1]:
import json
import os
from dotenv import load_dotenv
from ragas import EvaluationDataset, evaluate
from ragas.llms import LangchainLLMWrapper
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
)
from langchain_ollama import ChatOllama, OllamaEmbeddings
from ragas import evaluate

In [3]:
def load_data_from_json(file_path):
    """Loads data from a single JSON file."""
    with open(file_path, "r") as f:
        data = json.load(f)
    return data

def load_data_from_multiple_files(directory_path, file_extension="json"):
    """Loads data from multiple files (either JSON or JSONL)."""
    data = []
    for filename in os.listdir(directory_path):
        if filename.endswith(file_extension):
            file_path = os.path.join(directory_path, filename)
            if file_extension == "json":
                data.append(load_data_from_json(file_path))
    return data

In [7]:
directory_path = "qa_results_simple_retriever"
file_extension = "json"
evaluation_file = "evaluation_results_simple_retriever.json"

# Load the data
print("Loading data...")
data = load_data_from_multiple_files(directory_path, file_extension=file_extension)
dataset = []
for item in data:
    dataset.append(
        {
            "user_input": item["question"],
            "retrieved_contexts": item["retrieved_documents"],
            "response":item["generated_answer"],
            "reference":item["ground_truth_answer"]
        }
    )
evaluation_dataset = EvaluationDataset.from_list(dataset)

Loading data...


In [8]:
langchain_llm = ChatOllama(model="llama3.2")
langchain_embeddings = OllamaEmbeddings(model="llama3.2")

In [None]:
result = evaluate(evaluation_dataset,
                  metrics=[
                            context_precision,
                            faithfulness,
                            answer_relevancy,
                            context_recall], 
                    llm=langchain_llm, 
                    embeddings=langchain_embeddings)

In [None]:
result