In [23]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
from langchain_community.document_loaders import DirectoryLoader

In [None]:
import os
os.environ["OPENAI_API_KEY"] = "your-openai-key"

In [32]:
# Initialize OpenAI models (You need to have OpenAI API key)    
data_generation_model = ChatOpenAI(model='gpt-4o-mini')
critic_model = ChatOpenAI(model='gpt-4o')

In [33]:
embeddings = OpenAIEmbeddings(
    model="text-embedding-ada-002",  # Change with your model if needed
    chunk_size=1000
)

In [34]:
# Load documents
path = "Sample_Docs_Markdown/"
loader = DirectoryLoader(path, glob="**/*.md")
documents = loader.load()

In [35]:
generator = TestsetGenerator.from_langchain(
    data_generation_model,
    critic_model,
    embeddings
)

In [36]:
# Define distribution of question types, feel free to adjust the values as needed
distributions = {
    simple: 0.4,
    multi_context: 0.4,
    reasoning: 0.2
}

In [38]:
testset = generator.generate_with_langchain_docs(documents, 10, distributions)

test_df = testset.to_pandas()
print(test_df)

Filename and doc_id are the same for all nodes.                   
Generating: 100%|██████████| 10/10 [01:47<00:00, 10.73s/it]


                                            question  \
0  What advantages does the expert choice method ...   
1  What evaluation benchmarks are used for assess...   
2  What role do language modeling and machine tra...   
3  What is the impact of fine-grained expert segm...   
4  How does fine-grained expert segmentation impr...   
5  How does supervised fine-tuning boost DeepSeek...   
6  What eval strategies assess NLI performance in...   
7  How does the binary tensor route tokens to exp...   
8  What gains in capacity and efficiency come fro...   
9  What gains does Switch Transformer have over T...   

                                            contexts  \
0  [ number of experts to 2 degrades the perplexi...   
1  [ our MoE model to a larger scale with 16B tot...   
2  [Outrageously Large Neural Networks: The Spars...   
3  [ based on GShard. From Figure 3, we observe t...   
4  [ based on GShard. From Figure 3, we observe t...   
5  [, we take a broader set of open source mode

In [39]:
test_df.to_csv('generated_testset.csv', index=False)

In [41]:
import pandas as pd

df = pd.read_csv('generated_testset.csv')
df

Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,What advantages does the expert choice method ...,[' number of experts to 2 degrades the perplex...,The expert choice method outperforms the top-2...,simple,[{'source': 'Sample_Docs_Markdown/2202.09368v2...,True
1,What evaluation benchmarks are used for assess...,[' our MoE model to a larger scale with 16B to...,The evaluation benchmarks used for assessing t...,simple,[{'source': 'Sample_Docs_Markdown/DeepSeekMoE_...,True
2,What role do language modeling and machine tra...,['Outrageously Large Neural Networks: The Spar...,Language modeling and machine translation are ...,simple,[{'source': 'Sample_Docs_Markdown/1701.06538v1...,True
3,What is the impact of fine-grained expert segm...,"[' based on GShard. From Figure 3, we observe ...",The impact of fine-grained expert segmentation...,simple,[{'source': 'Sample_Docs_Markdown/DeepSeekMoE_...,True
4,How does fine-grained expert segmentation impr...,"[' based on GShard. From Figure 3, we observe ...",Fine-grained expert segmentation improves mode...,multi_context,[{'source': 'Sample_Docs_Markdown/DeepSeekMoE_...,True
5,How does supervised fine-tuning boost DeepSeek...,"[', we take a broader set of open source model...",Supervised fine-tuning boosts DeepSeekMoE 16B'...,multi_context,[{'source': 'Sample_Docs_Markdown/DeepSeekMoE_...,True
6,What eval strategies assess NLI performance in...,"["" For both versions, we design a FLOP-matched...",The evaluation strategies that assess NLI perf...,multi_context,[{'source': 'Sample_Docs_Markdown/switch_trans...,True
7,How does the binary tensor route tokens to exp...,[' d model] router weights = mtf.Variable(shap...,The binary dispatch tensor in Switch Transform...,multi_context,[{'source': 'Sample_Docs_Markdown/switch_trans...,True
8,What gains in capacity and efficiency come fro...,[' The existing literature on conditional comp...,The Sparsely-Gated Mixture-of-Experts (MoE) ap...,reasoning,[{'source': 'Sample_Docs_Markdown/1701.06538v1...,True
9,What gains does Switch Transformer have over T...,"[""T5 (Xue et al., 2020), a multilingual extens...",Switch Transformer shows significant gains ove...,reasoning,[{'source': 'Sample_Docs_Markdown/switch_trans...,True
