# Context Entitites Recall Evaluation

Using Langchain, OpenAI, and Ragas

In [None]:
!pip3 install ragas==0.1.13 datasets==2.20.0 langchain==0.2.12 openai==1.39.0 faiss-cpu==1.8.0.post1

In [None]:
from ipython_secrets import get_secret 
import os

os.environ['OPENAI_API_KEY'] = get_secret('OPENAI_API_KEY')

In [None]:
from ragas.metrics import context_entity_recall
from ragas import evaluate, RunConfig
from datasets import load_dataset, Dataset
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
import os
from typing import List

# Add your OpenAI API key to the environment variables
openai_api_key = os.getenv("OPENAI_API_KEY")

# Load sample dataset.
dataset = load_dataset("explodinggradients/amnesty_qa", split="eval")

sample_size = 100
# Get sample questions from the sample dataset.
sample_questions = dataset['question'][:sample_size]

# Get sample context information from the sample dataset.
sample_contexts = [item for row in dataset["contexts"]
                   [:sample_size] for item in row]

sample_ground_truths = [item for row in dataset["ground_truths"]
                   [:sample_size] for item in row]

# Break sample context into chunks to use with vector search.
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=400, chunk_overlap=100, add_start_index=True
)
chunks: List[str] = []
for context in sample_contexts:
    split_chunks = text_splitter.split_text(context)
    chunks.extend(split_chunks)

# Embedding models that we are evaluating.
openai_embedding_models = ["text-embedding-ada-002", "text-embedding-3-large"]

# Ragas evaluation config to use in all evaluations.
ragas_run_config = RunConfig(max_workers=4, max_wait=180)

# #Evaluate each embedding model
for embedding_model in openai_embedding_models:

    # Create an in-memory vector store for the evaluation.
    db = FAISS.from_texts(
        chunks, OpenAIEmbeddings(openai_api_key=openai_api_key, model=embedding_model))

    # Get retrieved context using similarity search.
    retrieval_contexts: List[str] = []
    for question in sample_questions:
        search_results = db.similarity_search(question)
        retrieval_contexts.append(list(map(
            lambda result: result.page_content, search_results)))

    # Run evaluation for context relevancy of retrieved information.
    result = evaluate(
        dataset=Dataset.from_dict({
            "question": sample_questions,
            "contexts": retrieval_contexts,
            "ground_truth": sample_ground_truths
        }),
        metrics=[context_entity_recall],
        run_config=ragas_run_config,
        raise_exceptions=False,
        llm=ChatOpenAI(openai_api_key=openai_api_key, model_name="gpt-4o-mini")
    )
    # Print out results
    print(f"Results for embedding model '{embedding_model}':")
    print(result)
