In [1]:
from dotenv import load_dotenv
import pandas as pd
import os
from tqdm import tqdm
import time 
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.document_loaders import DirectoryLoader


load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [3]:
loader = DirectoryLoader("../data/documents")
documents = loader.load()

In [4]:
# generator with openai models
generator_llm = ChatOpenAI(model="gpt-4o-mini")#"gpt-3.5-turbo-16k")
critic_llm = ChatOpenAI(model="gpt-4o-mini")
embeddings = OpenAIEmbeddings()

generator = TestsetGenerator.from_langchain(
    generator_llm,
    critic_llm,
    embeddings
)

In [5]:
# generate testset
testset = generator.generate_with_langchain_docs(documents, test_size=10, distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25})

df_testset = testset.to_pandas()

print(df_testset.head())

embedding nodes:   0%|          | 0/12 [00:00<?, ?it/s]

Filename and doc_id are the same for all nodes.


Generating:   0%|          | 0/10 [00:00<?, ?it/s]

                                            question  \
0  What are the guidelines for caring for a joey ...   
1  What is the significance of the Red Kangaroo w...   
2  What is the process of wildlife rehabilitation...   
3  What characteristics define aquatic birds, spe...   
4  What are the characteristics of birds in the A...   

                                            contexts  \
0  [ case of a fatality from a kangaroo attack oc...   
1  [kangaroo\n\nA kangaroo is a marsupial from th...   
2  [ case of a fatality from a kangaroo attack oc...   
3  [duck\n\nDuck is the common name for a number ...   
4  [duck\n\nDuck is the common name for a number ...   

                                        ground_truth evolution_type  \
0  After a collision involving its mother, if a f...         simple   
1  The significance of the Red Kangaroo within th...         simple   
2  The process of wildlife rehabilitation for inj...         simple   
3  Ducks are primarily aquatic birds that 

In [None]:
from datasets import load_dataset

# loading the V2 dataset
amnesty_qa = load_dataset("explodinggradients/amnesty_qa", "english_v2")
amnesty_qa

In [None]:
from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
)