In [2]:
import os

import faiss
import openai
import numpy as np
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
docs = [
    "Paris is the capital and most populous city of France. The city is famed for the Eiffel Tower.",
    "Jane Austen was an English novelist best known for 'Pride and Prejudice' and 'Sense and Sensibility'.",
    "The Great Wall of China is a series of fortifications built to protect the ancient Chinese states.",
    "Mount Everest, part of the Himalayas, is Earth’s highest mountain above sea level.",
    "Mike loves the color pink more than any other color."
]

In [6]:
client = openai.OpenAI()
def get_embedding(text):
    response = client.embeddings.create(model="text-embedding-ada-002", input=text)
    return response.data[0].embedding
embeddings = np.array([get_embedding(d) for d in docs]).astype('float32')
index = faiss.IndexFlatIP(embeddings.shape[1])
faiss.normalize_L2(embeddings)
index.add(embeddings)

In [7]:
def retrieve(query, k):
    query_embedding = np.array([get_embedding(query)]).astype("float32")
    
    faiss.normalize_L2(query_embedding)
    _, idx = index.search(query_embedding, k)
    
    return [docs[i] for i in idx[0]]

In [10]:
def generate_answer(question, contexts):
    prompt = (
        "Answer the user question **only** with facts found in the context.\n\n"
        "Context:\n"
        + "\n".join(f"- {c}" for c in contexts)
        + f"\n\nQuestion: {question}\nAnswer:"
    )

    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}],
        temperature=0,
    )

    return response.choices[0].message.content.strip()

In [None]:
from datasets import Dataset

questions = [
    "What is the capital of France?",
    "Who wrote Pride and Prejudice?",
    "Where is Mount Everest located?",
    "What is Mike's favorite color?"
]

ground_truths = [
    "Paris",
    "Jane Austen",
    "the Himalayas",
    "Pink"
]

rows = []

for question, ground_truth in zip(questions, ground_truths):
    context = retrieve(question, k=2)
    answer = generate_answer(question, context)
    rows.append(
        {
            "question": question,
            "contexts": context,
            "answer": answer,
            "reference": ground_truth,
        }
    )

evaluation_dataset = Dataset.from_list(rows)

In [12]:
evaluation_dataset 

Dataset({
    features: ['question', 'contexts', 'answer', 'reference'],
    num_rows: 4
})

In [None]:
from ragas import evaluate
from ragas.metrics import (
    answer_correctness,
    answer_relevancy,
    faithfulness,
    context_precision,
    context_recall,
)

scores = evaluate(
    evaluation_dataset,
    metrics=[
        answer_correctness,
        answer_relevancy,
        faithfulness,
        context_precision,
        context_recall,
    ],
)

print(rows)
print(scores)

  from ragas.metrics import (
  from ragas.metrics import (
  from ragas.metrics import (
  from ragas.metrics import (
  from ragas.metrics import (
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Exception raised in Job[0]: TypeError(Cannot use aembed_text() with a synchronous client. Use embed_text() instead.)
LLM returned 1 generations instead of requested 3. Proceeding with 1 generations.
Exception raised in Job[1]: AttributeError('OpenAIEmbeddings' object has no attribute 'embed_query')
Exception raised in Job[5]: TypeError(Cannot use aembed_text() with a synchronous client. Use embed_text() instead.)
LLM returned 1 generations instead of requested 3. Proceeding with 1 generations.
Exception raised in Job[6]: AttributeError('OpenAIEmbeddings' object has no attribute 'embed_query')
Exception raised in Job[10]: TypeError(Cannot use aembed_text() with a synchronous client. Use embed_text() instead.)
LLM returned 1 generations instead of requested 3. Proceeding with 1 generations.


[{'question': 'What is the capital of France?', 'contexts': ['Paris is the capital and most populous city of France. The city is famed for the Eiffel Tower.', 'The Great Wall of China is a series of fortifications built to protect the ancient Chinese states.'], 'answer': 'The capital of France is Paris.', 'reference': 'Paris'}, {'question': 'Who wrote Pride and Prejudice?', 'contexts': ["Jane Austen was an English novelist best known for 'Pride and Prejudice' and 'Sense and Sensibility'.", 'Paris is the capital and most populous city of France. The city is famed for the Eiffel Tower.'], 'answer': "Jane Austen wrote 'Pride and Prejudice'.", 'reference': 'Jane Austen'}, {'question': 'Where is Mount Everest located?', 'contexts': ['Mount Everest, part of the Himalayas, is Earth’s highest mountain above sea level.', 'The Great Wall of China is a series of fortifications built to protect the ancient Chinese states.'], 'answer': 'Mount Everest is located in the Himalayas.', 'reference': 'the

: 