In [1]:
from typing import List
from sentence_transformers import SentenceTransformer
from deepeval.models import DeepEvalBaseEmbeddingModel, OllamaModel
from deepeval.synthesizer import Synthesizer
from deepeval.synthesizer.config import ContextConstructionConfig

In [2]:
class HuggingFaceEmbeddingModel(DeepEvalBaseEmbeddingModel):
    def __init__(self, model_name="sentence-transformers/all-MiniLM-L6-v2"):
        self.model = SentenceTransformer(model_name)
        self.model_name = model_name

    def load_model(self):
        return self.model

    def embed_text(self, text: str) -> List[float]:
        return self.model.encode(text).tolist()

    def embed_texts(self, texts: List[str]) -> List[List[float]]:
        return self.model.encode(texts).tolist()
    async def a_embed_text(self, text: str) -> List[float]:
            # Simply call the sync version
            return self.embed_text(text)

    async def a_embed_texts(self, texts: List[str]) -> List[List[float]]:
            # Simply call the sync version
            return self.embed_texts(texts)

    def get_model_name(self):
        return f"Hugging Face ({self.model_name})"

In [3]:
local_qwen = OllamaModel(model="qwen2.5:3b")
hf_embedder = HuggingFaceEmbeddingModel() # Defaults to all-MiniLM-L6-v2

In [None]:
synthesizer = Synthesizer(model=local_qwen)
goldens = synthesizer.generate_goldens_from_docs(
    document_paths=['../../../Documents/Ragas/ragas_2309.15217v2.pdf'],
    max_goldens_per_context=2,
    context_construction_config=ContextConstructionConfig(
        embedder=hf_embedder ,
        critic_model=local_qwen
    )
)

# take documents
# load it with  getLoader() 
"""self.loader_mapping = {
                ".pdf": lc.PyPDFLoader,
                ".txt": lc.TextLoader,
                ".docx": lc.Docx2txtLoader,
                ".md": lc.TextLoader,
                ".markdown": lc.TextLoader,
                ".mdx": lc.TextLoader,
                """
# document objects are created with loading as sections( e.g. for pdfs, pages are sections)
# a_chunk_doc method chunks the objects with TokenTextSplitter
"""Load (File → LangChain)
Chunk (Sections → 1024-token strings with overlaps)
Embed (Strings → Vectors)
Store (Vectors → ChromaDB)
Critique (Random Chunks → LLM Score for whether it can be anchor or not→ Filter)
Group (Seed Chunk → Similarity Search → Final Context ( group the similar chunks and return the contexts))
Question ( write simple question for the contexts)
Evolution (rewrite the simple question for comlexity)
should_style(class StylingConfig:
    scenario: Optional[str] = None
    task: Optional[str] = None
    input_format: Optional[str] = None
    expected_output_format: Optional[str] = None
)  If user provides this, rewrite the question according to style
IncludeExpectedOutput(If we want expected output, give context and question and take the answer from the AI).
"""

Output()

'Load (File → LangChain)\nChunk (Sections → 1024-token strings with overlaps)\nEmbed (Strings → Vectors)\nStore (Vectors → ChromaDB)\nCritique (Random Chunks → LLM Score for whether it can be anchor or not→ Filter)\nGroup (Seed Chunk → Similarity Search → Final Context ( group the similar chunks and return the contexts))\nQuestion ( write simple question for the contexts)\nEvolution (rewrite the simple question for comlexity)\nshould_style(class StylingConfig:\n    scenario: Optional[str] = None\n    task: Optional[str] = None\n    input_format: Optional[str] = None\n    expected_output_format: Optional[str] = None\n)  If user provides this, rewrite the question according to style\nIncludeExpectedOutput(If we want expected output, give context and question and take the answer from the AI).\n'

In [7]:
goldens

[Golden(input='Faithfulness refers to claims in answers being grounded in context.', actual_output=None, expected_output='Faithfulness is measured by ensuring that the claims made in the generated answer can be inferred from the provided context. This involves extracting statements from the answer and verifying if these statements are supported by the context using a verification function. The final faithfulness score, \\( F \\), is calculated as the ratio of statements verified to the total number of statements extracted.', context=['we usually do not have access to human-annotated\ndatasets or reference answers. We therefore fo-\ncus on metrics that are fully self-contained and\nreference-free. We focus in particular three quality\naspects, which we argue are of central importance.\nFirst, Faithfulness refers to the idea that the an-\nswer should be grounded in the given context. This\nis important to avoid hallucinations, and to ensure\nthat the retrieved context can act as a justif

In [None]:
df = synthesizer.to_pandas()
display(df)

Unnamed: 0,input,actual_output,expected_output,context,retrieval_context,n_chunks_per_context,context_length,evolutions,context_quality,synthetic_input_quality,source_file
0,Faithfulness refers to claims in answers being...,,Faithfulness is measured by ensuring that the ...,[we usually do not have access to human-annota...,,3,8710,[Multi-context],,0.7,../../../Documents/Ragas/ragas_2309.15217v2.pdf
1,How does the generated answer address the ques...,,To measure how the generated answer addresses ...,[we usually do not have access to human-annota...,,3,8710,[Concretizing],,0.7,../../../Documents/Ragas/ragas_2309.15217v2.pdf
2,Compare Christopher Nolan’s directed Oppenheim...,,"Christopher Nolan directed the film ""Oppenheim...",[Question Context Answer\nWho directed the fil...,,2,2974,[Comparative],,1.0,../../../Documents/Ragas/ragas_2309.15217v2.pdf
3,When did the Sayajibai Clock Tower complete an...,,"The Sayajibai Clock Tower, also known as the R...",[Question Context Answer\nWho directed the fil...,,2,2974,[Comparative],,0.2,../../../Documents/Ragas/ragas_2309.15217v2.pdf
4,How does Demonstrate-search-predict by Matei Z...,,"Matei Zaharia's ""Demonstrate-search-predict"" c...",[ and\nMatei Zaharia. 2022. Demonstrate-search...,,3,8796,[Concretizing],,0.3,../../../Documents/Ragas/ragas_2309.15217v2.pdf
5,What aspects does Matei Zaharia's paper 'Demon...,,"Matei Zaharia's paper ""Demonstrate-search-pred...",[ and\nMatei Zaharia. 2022. Demonstrate-search...,,3,8796,[Reasoning],,0.6,../../../Documents/Ragas/ragas_2309.15217v2.pdf
