In [1]:
import pandas as pd
from tqdm.auto import tqdm

## Data ingestion

In [2]:
df_recipes = pd.read_csv("../data/recipes.csv")  # source data

In [3]:
# format documents for Qdrant vector DB
recipes_raw = df_recipes.to_dict(orient="records")
for recipe in tqdm(recipes_raw):
    description_stripped = recipe["recipe_description"].strip()
    directions_joined = " ".join(eval(recipe["directions"]))
    ingredients_joined = "; ".join(eval(recipe["ingredients"]))

    text = f"Recipe: {recipe['recipe_name'].strip()} | Description: {description_stripped} | Ratings: {recipe['ratings'].strip()} | Ready in: {recipe['ready-in'].strip()} | Directions: {directions_joined.strip()} | Ingredients: {ingredients_joined.strip()}"

    recipe["text"] = text

  0%|          | 0/88 [00:00<?, ?it/s]

In [4]:
# Qdrant
from qdrant_client import QdrantClient, models

qdrant_client = QdrantClient("http://localhost:6333")

EMBEDDING_DIMENSIONALITY = 512
model_handle = "jinaai/jina-embeddings-v2-small-en"

In [5]:
collection_name = "recipe-rag"

if qdrant_client.collection_exists(collection_name=collection_name):
    qdrant_client.delete_collection(collection_name=collection_name)

qdrant_client.create_collection(
    collection_name=collection_name,
    vectors_config=models.VectorParams(
        size=EMBEDDING_DIMENSIONALITY, distance=models.Distance.COSINE
    ),
)

True

In [6]:
# construct points
points = []

for recipe in tqdm(recipes_raw):
    point = models.PointStruct(
        id=recipe["recipe_id"],
        vector=models.Document(
            text=recipe["text"], model=model_handle
        ),  # embedding the text
        payload={
            "recipe_id": recipe["recipe_id"],
            "text": recipe["text"],
            "recipe_name": recipe["recipe_name"],
            "recipe_link": recipe["recipe_link"],
            "recipe_description": recipe["recipe_description"],
            "ratings": recipe["ratings"],
            "ready-in": recipe["ready-in"],
            "directions": recipe["directions"],
            "ingredients": recipe["ingredients"],
        },
    )
    points.append(point)

# upsert into DB
qdrant_client.upsert(collection_name=collection_name, points=points)

  0%|          | 0/88 [00:00<?, ?it/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

config.json: 0.00B [00:00, ?B/s]

tokenizer_config.json:   0%|          | 0.00/367 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

onnx/model.onnx:   0%|          | 0.00/130M [00:00<?, ?B/s]

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

## RAG pipeline

In [7]:
from dotenv import load_dotenv

load_dotenv()

True

In [8]:
from openai import OpenAI

openai_client = OpenAI()

In [9]:
# search
def vector_search(collection_name, query, limit=1):
    query_points = qdrant_client.query_points(
        collection_name=collection_name,
        query=models.Document(  # embed the query text locally with "jinaai/jina-embeddings-v2-small-en"
            text=query, model=model_handle
        ),
        limit=limit,  # top closest matches
        with_payload=True,  # to get metadata in the results
    )

    results = []

    for point in query_points.points:
        results.append(point.payload)
    return results

In [10]:
def build_prompt(query, search_results):
    prompt_template = """
You're a cooking assistant. Answer the QUESTION based on the CONTEXT from the recipe database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = ""

    for doc in search_results:
        context = (
            context + f"{doc['text']}\n\n"
        )  # doc['text'] should contain all the recipe information

    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [11]:
def llm(openai_client, prompt, llm_model="gpt-4o-mini"):
    response = openai_client.chat.completions.create(
        model=llm_model, messages=[{"role": "user", "content": prompt}]
    )

    return response.choices[0].message.content

In [12]:
def rag(query, openai_client, collection_name, llm_model="gpt-4o-mini", limit=5):
    search_results = vector_search(collection_name, query, limit)
    prompt = build_prompt(query, search_results)
    answer = llm(openai_client, prompt, llm_model)
    return answer

In [13]:
query = "Are there any recommended fish recipes? Can you give me the details about the recipes as well?"
answer = rag(
    query=query,
    openai_client=openai_client,
    collection_name="recipe-rag",
    llm_model="gpt-4o-mini",
)
print(answer)

Yes, there are several recommended fish recipes. Here are the details:

1. **Easy Oven-Baked Cod**
   - **Description**: Amazingly quick, fresh, and light; can be made with any type of fish fillet.
   - **Ratings**: 4.6 stars (60 ratings)
   - **Ready in**: 25 minutes
   - **Directions**: Preheat oven to 400°F. Line pan with aluminum foil and brush lightly with olive oil. Combine parsley, garlic, lemon zest, and salt; mix with breadcrumbs. Brush fillets with olive oil, press into breadcrumb mixture, and place in baking dish. Bake until firm, about 12-15 minutes. Serve with lemon wedges.
   - **Ingredients**: 1½ cups plain breadcrumbs, ½ cup fresh parsley, 2-3 garlic cloves, zest of 1 lemon, ¾ teaspoon coarse salt, 4 (6-8 ounce) cod fillets, olive oil.

2. **Lemon Baked Cod**
   - **Description**: Great baked cod recipe.
   - **Ratings**: 4.35 stars (209 ratings)
   - **Ready in**: 35 minutes
   - **Directions**: If fillets are large, cut into pieces. Mix butter and lemon juice. In anot

## Retrieval evaluation

In [14]:
df_question = pd.read_csv("../data/ground-truth-retrieval.csv")

In [15]:
df_question

Unnamed: 0,id,question
0,0,What's the total cook time for the Ground Beef...
1,0,What kind of meat is recommended in the Ground...
2,0,How is the Ground Beef Gyros served according ...
3,0,What rating did the Ground Beef Gyros recipe r...
4,0,Do the Ground Beef Gyros include a sauce with ...
...,...,...
435,87,What's the main protein used in the Yakisoba r...
436,87,How much time does it take to cook the Yakisoba?
437,87,What are the main vegetables required for the ...
438,87,How is the Yakisoba dish commonly served accor...


In [16]:
ground_truth = df_question.to_dict(orient="records")

### Metric calculation and evaluation

In [17]:
# metric calculation
def hit_rate(relevance_total):
    cnt = 0

    for line in relevance_total:
        if True in line:
            cnt = cnt + 1

    return cnt / len(relevance_total)


def mrr(relevance_total):
    total_score = 0.0

    for line in relevance_total:
        for rank in range(len(line)):
            if line[rank]:
                total_score = total_score + 1 / (rank + 1)

    return total_score / len(relevance_total)


def evaluate(ground_truth, search_function):
    relevance_total = []

    for q in tqdm(ground_truth):
        doc_id = q["id"]
        results = search_function(q)
        relevance = [d["recipe_id"] == doc_id for d in results]
        relevance_total.append(relevance)

    return {
        "hit_rate": hit_rate(relevance_total),
        "mrr": mrr(relevance_total),
    }

### Qdrant sparse vector search

In [18]:
# qdrant sparse vector search
collection_name = "recipe-rag-sparse"
if qdrant_client.collection_exists(collection_name=collection_name):
    qdrant_client.delete_collection(collection_name=collection_name)

qdrant_client.create_collection(
    collection_name=collection_name,
    sparse_vectors_config={
        "bm25": models.SparseVectorParams(
            modifier=models.Modifier.IDF,
        )
    },
)

True

In [19]:
# construct points
points = []

for recipe in tqdm(recipes_raw):
    point = models.PointStruct(
        id=recipe["recipe_id"],
        vector={
            "bm25": models.Document(
                text=recipe["text"],
                model="Qdrant/bm25",
            ),
        },
        payload={
            "recipe_id": recipe["recipe_id"],
            "text": recipe["text"],
            "recipe_name": recipe["recipe_name"],
            "recipe_link": recipe["recipe_link"],
            "recipe_description": recipe["recipe_description"],
            "ratings": recipe["ratings"],
            "ready-in": recipe["ready-in"],
            "directions": recipe["directions"],
            "ingredients": recipe["ingredients"],
        },
    )
    points.append(point)

# upsert into DB
qdrant_client.upsert(collection_name=collection_name, points=points)

  0%|          | 0/88 [00:00<?, ?it/s]

Fetching 30 files:   0%|          | 0/30 [00:00<?, ?it/s]

bengali.txt: 0.00B [00:00, ?B/s]

danish.txt:   0%|          | 0.00/424 [00:00<?, ?B/s]

azerbaijani.txt:   0%|          | 0.00/967 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

basque.txt: 0.00B [00:00, ?B/s]

catalan.txt: 0.00B [00:00, ?B/s]

arabic.txt: 0.00B [00:00, ?B/s]

dutch.txt:   0%|          | 0.00/453 [00:00<?, ?B/s]

english.txt:   0%|          | 0.00/936 [00:00<?, ?B/s]

finnish.txt: 0.00B [00:00, ?B/s]

chinese.txt: 0.00B [00:00, ?B/s]

french.txt:   0%|          | 0.00/813 [00:00<?, ?B/s]

german.txt: 0.00B [00:00, ?B/s]

greek.txt: 0.00B [00:00, ?B/s]

hebrew.txt: 0.00B [00:00, ?B/s]

hinglish.txt: 0.00B [00:00, ?B/s]

kazakh.txt: 0.00B [00:00, ?B/s]

italian.txt: 0.00B [00:00, ?B/s]

indonesian.txt: 0.00B [00:00, ?B/s]

hungarian.txt: 0.00B [00:00, ?B/s]

nepali.txt: 0.00B [00:00, ?B/s]

norwegian.txt:   0%|          | 0.00/851 [00:00<?, ?B/s]

portuguese.txt: 0.00B [00:00, ?B/s]

russian.txt: 0.00B [00:00, ?B/s]

swedish.txt:   0%|          | 0.00/559 [00:00<?, ?B/s]

slovene.txt: 0.00B [00:00, ?B/s]

romanian.txt: 0.00B [00:00, ?B/s]

spanish.txt: 0.00B [00:00, ?B/s]

tajik.txt: 0.00B [00:00, ?B/s]

turkish.txt:   0%|          | 0.00/260 [00:00<?, ?B/s]

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [20]:
def qdrant_sparse_search(query, collection_name, limit=5) -> list[models.ScoredPoint]:
    query_points = qdrant_client.query_points(
        collection_name=collection_name,
        query=models.Document(
            text=query,
            model="Qdrant/bm25",
        ),
        using="bm25",
        limit=limit,
        with_payload=True,
    )

    results = []
    for point in query_points.points:
        results.append(point.payload)
    return results

In [21]:
evaluate(
    ground_truth,
    lambda q: qdrant_sparse_search(
        q["question"], collection_name="recipe-rag-sparse", limit=10
    ),
)

  0%|          | 0/440 [00:00<?, ?it/s]

{'hit_rate': 1.0, 'mrr': 0.9720643939393939}

### Qdrant vector search

In [22]:
def qdrant_search(query, collection_name, limit=5):
    query_points = qdrant_client.query_points(
        collection_name=collection_name,
        query=models.Document(  # embed the query text locally with "jinaai/jina-embeddings-v2-small-en"
            text=query, model=model_handle
        ),
        limit=limit,  # top closest matches
        with_payload=True,  # to get metadata in the results
    )

    results = []
    for point in query_points.points:
        results.append(point.payload)
    return results

In [23]:
evaluate(
    ground_truth,
    lambda q: qdrant_search(q["question"], collection_name="recipe-rag", limit=10),
)

  0%|          | 0/440 [00:00<?, ?it/s]

{'hit_rate': 1.0, 'mrr': 0.9898484848484848}

### Qdrant hybrid search

In [24]:
# hybrid search with Qdrant
collection_name = "recipe-rag-hybrid"
if qdrant_client.collection_exists(collection_name):
    qdrant_client.delete_collection(collection_name=collection_name)

qdrant_client.create_collection(
    collection_name=collection_name,
    vectors_config={
        # Named dense vector for jinaai/jina-embeddings-v2-small-en
        "jina-small": models.VectorParams(
            size=512,
            distance=models.Distance.COSINE,
        ),
    },
    sparse_vectors_config={
        "bm25": models.SparseVectorParams(
            modifier=models.Modifier.IDF,
        )
    },
)

True

In [25]:
# construct points
points = []

for recipe in tqdm(recipes_raw):
    point = models.PointStruct(
        id=recipe["recipe_id"],
        vector={
            "jina-small": models.Document(
                text=recipe["text"],
                model="jinaai/jina-embeddings-v2-small-en",
            ),
            "bm25": models.Document(
                text=recipe["text"],
                model="Qdrant/bm25",
            ),
        },
        payload={
            "recipe_id": recipe["recipe_id"],
            "text": recipe["text"],
            "recipe_name": recipe["recipe_name"],
            "recipe_link": recipe["recipe_link"],
            "recipe_description": recipe["recipe_description"],
            "ratings": recipe["ratings"],
            "ready-in": recipe["ready-in"],
            "directions": recipe["directions"],
            "ingredients": recipe["ingredients"],
        },
    )
    points.append(point)

# upsert into DB
qdrant_client.upsert(collection_name=collection_name, points=points)

  0%|          | 0/88 [00:00<?, ?it/s]

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [26]:
def qdrant_rrf_search(query, collection_name, limit=5) -> list[models.ScoredPoint]:
    query_points = qdrant_client.query_points(
        collection_name=collection_name,
        prefetch=[
            models.Prefetch(
                query=models.Document(
                    text=query,
                    model="jinaai/jina-embeddings-v2-small-en",
                ),
                using="jina-small",
                limit=(5 * limit),
            ),
            models.Prefetch(
                query=models.Document(
                    text=query,
                    model="Qdrant/bm25",
                ),
                using="bm25",
                limit=(5 * limit),
            ),
        ],
        # Fusion query enables fusion on the prefetched results
        query=models.FusionQuery(fusion=models.Fusion.RRF),
        with_payload=True,
    )

    results = []
    for point in query_points.points:
        results.append(point.payload)
    return results

In [27]:
evaluate(
    ground_truth,
    lambda q: qdrant_rrf_search(
        q["question"], collection_name="recipe-rag-hybrid", limit=10
    ),
)

  0%|          | 0/440 [00:00<?, ?it/s]

{'hit_rate': 1.0, 'mrr': 0.990909090909091}

rrf search seems to have better results.

# RAG evaluation

In [28]:
prompt_template = """
You are an expert evaluator for a RAG system.
Your task is to analyze the relevance of the generated answer to the given question.
Based on the relevance of the generated answer, you will classify it
as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

Here is the data for evaluation:

Question: {question}
Generated Answer: {answer_llm}

Please analyze the content and context of the generated answer in relation to the question
and provide your evaluation in parsable JSON without using code blocks:

{{
  "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
  "Explanation": "[Provide a brief explanation for your evaluation]"
}}
""".strip()

In [29]:
record = ground_truth[0]
question = record["question"]
answer_llm = rag(question, openai_client, "recipe-rag", "gpt-4o-mini")

In [30]:
prompt = prompt_template.format(question=question, answer_llm=answer_llm)
print(prompt)

You are an expert evaluator for a RAG system.
Your task is to analyze the relevance of the generated answer to the given question.
Based on the relevance of the generated answer, you will classify it
as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

Here is the data for evaluation:

Question: What's the total cook time for the Ground Beef Gyros?
Generated Answer: The total cook time for the Ground Beef Gyros is 22 minutes.

Please analyze the content and context of the generated answer in relation to the question
and provide your evaluation in parsable JSON without using code blocks:

{
  "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
  "Explanation": "[Provide a brief explanation for your evaluation]"
}


In [31]:
import json

evaluations = []

for record in tqdm(ground_truth):
    question = record["question"]
    answer_llm = rag(
        query=question, openai_client=openai_client, collection_name="recipe-rag"
    )

    prompt = prompt_template.format(question=question, answer_llm=answer_llm)

    evaluation = llm(openai_client, prompt)
    evaluation = json.loads(evaluation)

    evaluations.append((record, answer_llm, evaluation))

  0%|          | 0/440 [00:00<?, ?it/s]

In [32]:
df_eval = pd.DataFrame(evaluations, columns=["record", "answer", "evaluation"])

df_eval["id"] = df_eval.record.apply(lambda d: d["id"])
df_eval["question"] = df_eval.record.apply(lambda d: d["question"])

df_eval["relevance"] = df_eval.evaluation.apply(lambda d: d["Relevance"])
df_eval["explanation"] = df_eval.evaluation.apply(lambda d: d["Explanation"])

del df_eval["record"]
del df_eval["evaluation"]

In [33]:
df_eval

Unnamed: 0,answer,id,question,relevance,explanation
0,The total cook time for the Ground Beef Gyros ...,0,What's the total cook time for the Ground Beef...,RELEVANT,The generated answer directly addresses the qu...
1,The recommended meat in the Ground Beef Gyros ...,0,What kind of meat is recommended in the Ground...,RELEVANT,The generated answer directly addresses the qu...
2,"According to the directions, the Ground Beef G...",0,How is the Ground Beef Gyros served according ...,RELEVANT,The generated answer directly addresses the qu...
3,The Ground Beef Gyros recipe received a rating...,0,What rating did the Ground Beef Gyros recipe r...,RELEVANT,The generated answer directly addresses the qu...
4,"Yes, the Ground Beef Gyros include a sauce tha...",0,Do the Ground Beef Gyros include a sauce with ...,RELEVANT,The generated answer directly addresses the qu...
...,...,...,...,...,...
435,The main protein used in the Yakisoba recipe i...,87,What's the main protein used in the Yakisoba r...,RELEVANT,The generated answer directly addresses the qu...
436,It takes 50 minutes to cook Yakisoba.,87,How much time does it take to cook the Yakisoba?,RELEVANT,The generated answer directly addresses the qu...
437,The main vegetables required for the Yakisoba ...,87,What are the main vegetables required for the ...,RELEVANT,The generated answer directly addresses the qu...
438,The Yakisoba dish is commonly served on a serv...,87,How is the Yakisoba dish commonly served accor...,RELEVANT,The generated answer directly addresses the qu...


In [34]:
df_eval.relevance.value_counts()

relevance
RELEVANT           424
PARTLY_RELEVANT     12
NON_RELEVANT         4
Name: count, dtype: int64

In [35]:
df_eval.relevance.value_counts(normalize=True)

relevance
RELEVANT           0.963636
PARTLY_RELEVANT    0.027273
NON_RELEVANT       0.009091
Name: proportion, dtype: float64

In [36]:
df_eval.to_csv("../data/rag-eval-gpt-4o-mini.csv", index=False)

In [38]:
import json

evaluations_gpt_4o = {}

for record_id, record in enumerate(tqdm(ground_truth)):
    if record_id in evaluations_gpt_4o:
        continue
    question = record["question"]
    answer_llm = rag(
        query=question,
        openai_client=openai_client,
        collection_name="recipe-rag",
        llm_model="gpt-4o",
    )

    prompt = prompt_template.format(question=question, answer_llm=answer_llm)

    evaluation = llm(openai_client, prompt)
    evaluation = json.loads(evaluation)

    evaluations_gpt_4o[record_id] = (record, answer_llm, evaluation)

  0%|          | 0/440 [00:00<?, ?it/s]

In [39]:
evaluations = list(evaluations_gpt_4o.values())

df_eval = pd.DataFrame(evaluations, columns=["record", "answer", "evaluation"])

df_eval["id"] = df_eval.record.apply(lambda d: d["id"])
df_eval["question"] = df_eval.record.apply(lambda d: d["question"])

df_eval["relevance"] = df_eval.evaluation.apply(lambda d: d["Relevance"])
df_eval["explanation"] = df_eval.evaluation.apply(lambda d: d["Explanation"])

del df_eval["record"]
del df_eval["evaluation"]

In [40]:
df_eval

Unnamed: 0,answer,id,question,relevance,explanation
0,The total cook time for the Ground Beef Gyros ...,0,What's the total cook time for the Ground Beef...,RELEVANT,The generated answer directly responds to the ...
1,The recommended meat in the Ground Beef Gyros ...,0,What kind of meat is recommended in the Ground...,RELEVANT,The generated answer directly addresses the qu...
2,The Ground Beef Gyros are served stuffed into ...,0,How is the Ground Beef Gyros served according ...,RELEVANT,The generated answer directly addresses the qu...
3,The Ground Beef Gyros recipe received a rating...,0,What rating did the Ground Beef Gyros recipe r...,RELEVANT,The generated answer directly responds to the ...
4,"Yes, the Ground Beef Gyros include a sauce wit...",0,Do the Ground Beef Gyros include a sauce with ...,RELEVANT,The generated answer directly answers the ques...
...,...,...,...,...,...
435,The main protein used in the Yakisoba recipe i...,87,What's the main protein used in the Yakisoba r...,RELEVANT,The generated answer correctly identifies pork...
436,The Yakisoba takes 50 minutes to be ready.,87,How much time does it take to cook the Yakisoba?,RELEVANT,The generated answer directly responds to the ...
437,The main vegetables required for the Yakisoba ...,87,What are the main vegetables required for the ...,RELEVANT,The generated answer accurately lists the main...
438,The Yakisoba dish is commonly served on a serv...,87,How is the Yakisoba dish commonly served accor...,RELEVANT,The generated answer directly addresses the qu...


In [41]:
df_eval.relevance.value_counts()

relevance
RELEVANT           418
PARTLY_RELEVANT     18
NON_RELEVANT         4
Name: count, dtype: int64

In [42]:
df_eval.relevance.value_counts(normalize=True)

relevance
RELEVANT           0.950000
PARTLY_RELEVANT    0.040909
NON_RELEVANT       0.009091
Name: proportion, dtype: float64

In [43]:
df_eval.to_csv("../data/rag-eval-gpt-4o.csv", index=False)