Evaluating RAG (Retrieval-Augmented Generation) augmented pipelines is crucial for assessing their performance. However, manually creating hundreds of QA (Question-Context-Answer) samples from documents can be time-consuming and labor-intensive. Additionally, human-generated questions may struggle to reach the level of complexity required for a thorough evaluation, ultimately impacting the quality of the assessment. By using synthetic data generation developer time in data aggregation process can be reduced by 90%.

In [22]:
import os
import Constants
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = Constants.LANGCHAIN_API_KEY

In [23]:
HFHUB_API_KEY = Constants.HFHUB_API_KEY

In [24]:
import os
import pandas as pd
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from ragas.testset import TestsetGenerator
from langchain_community.llms import HuggingFaceHub
from langchain_chroma import Chroma
from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader

In [25]:
from langchain_community.chat_models import ChatOllama
from ragas import evaluate
from langchain_community.embeddings import OllamaEmbeddings
# information found here: https://docs.ragas.io/en/latest/howtos/customisations/bring-your-own-llm-or-embs.html


In [26]:
#data_generation_model = HuggingFaceHub(repo_id="gpt2", huggingfacehub_api_token=HFHUB_API_KEY,model_kwargs={"max_length": 200})
data_generation_model = ChatOllama(model="llama2:latest")

In [27]:
#critic_model = HuggingFaceHub(repo_id="google/mt5-large", huggingfacehub_api_token=HFHUB_API_KEY,model_kwargs={"max_length": 200})
critic_model = ChatOllama(model="llama3:latest")

In [28]:
model_name = "BAAI/bge-small-en" 
model_kwargs = {"device": "cpu"} 
encode_kwargs = {"normalize_embeddings": True}

embeddings = HuggingFaceBgeEmbeddings( 
          model_name=model_name, 
          model_kwargs=model_kwargs, 
          encode_kwargs=encode_kwargs )

In [29]:
generator = TestsetGenerator.from_langchain( data_generation_model, critic_model, embeddings)

In [30]:
distributions = { 'simple': 0.5, 'multi_context': 0.4, 'reasoning': 0.1 }

In [31]:
# Load PDFs from the specified directory
loader = DirectoryLoader("../Data", glob="*.pdf", loader_cls=PyPDFLoader)
docs = loader.load()  # This returns a list of Document objects

In [None]:
testset = generator.generate_with_langchain_docs(docs, 5)  # Wrap `doc` in a list