In [1]:
import requests
import pandas as pd
from tqdm.auto import tqdm

## Data Ingestion

In [2]:
df_recipes = pd.read_csv('../data/recipes.csv') # source data

In [3]:
# format documents for Qdrant vector DB
recipes_raw = df_recipes.to_dict(orient='records')
for recipe in tqdm(recipes_raw):
    description_stripped = recipe['recipe_description'].strip()
    directions_joined = " ".join(eval(recipe['directions']))
    ingredients_joined = "; ".join(eval(recipe['ingredients']))

    text = f"Recipe: {recipe['recipe_name'].strip()} | Description: {description_stripped} | Ratings: {recipe['ratings'].strip()} | Ready in: {recipe['ready-in'].strip()} | Directions: {directions_joined.strip()} | Ingredients: {ingredients_joined.strip()}"

    recipe['text'] = text

  0%|          | 0/88 [00:00<?, ?it/s]

In [4]:
# Qdrant
from qdrant_client import QdrantClient, models
qdrant_client = QdrantClient("http://localhost:6333")

EMBEDDING_DIMENSIONALITY = 512
model_handle = "jinaai/jina-embeddings-v2-small-en"

In [5]:
collection_name = "recipe-rag"

if qdrant_client.collection_exists(collection_name=collection_name):
    qdrant_client.delete_collection(collection_name=collection_name)

qdrant_client.create_collection(
    collection_name=collection_name,
    vectors_config=models.VectorParams(
        size=EMBEDDING_DIMENSIONALITY,
        distance=models.Distance.COSINE
    )
)

True

In [6]:
# construct points
points = []

for recipe in tqdm(recipes_raw):
    point = models.PointStruct(
        id=recipe['recipe_id'],
        vector=models.Document(text=recipe['text'], model=model_handle), # embedding the text
        payload={
            "text": recipe['text'],
            'recipe_name': recipe['recipe_name'],
            'recipe_link': recipe['recipe_link'],
            'recipe_description': recipe['recipe_description'],
            'ratings': recipe['ratings'],
            'ready-in': recipe['ready-in'],
            'directions': recipe['directions'],
            'ingredients': recipe['ingredients'],
        }
    )
    points.append(point)

# upsert into DB
qdrant_client.upsert(
    collection_name=collection_name,
    points=points
)

  0%|          | 0/88 [00:00<?, ?it/s]

UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

## RAG pipeline

In [7]:
from dotenv import load_dotenv
load_dotenv()

True

In [8]:
from openai import OpenAI
openai_client = OpenAI()

In [9]:
# search
def vector_search(query, limit=1):

    query_points = qdrant_client.query_points(
        collection_name=collection_name,
        query=models.Document( #embed the query text locally with "jinaai/jina-embeddings-v2-small-en"
            text=query,
            model=model_handle 
        ),
        limit=limit, # top closest matches
        with_payload=True #to get metadata in the results
    )

    results = []
    
    for point in query_points.points:
        results.append(point.payload)
    return results

In [10]:
def build_prompt(query, search_results):
    prompt_template = """
You're a cooking assistant. Answer the QUESTION based on the CONTEXT from the recipe database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context = context + f"{doc['text']}\n\n" # doc['text'] should contain all the recipe information
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [11]:
def llm(openai_client, llm_model, prompt):
    response = openai_client.chat.completions.create(
        model=llm_model,
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [12]:
def rag(query, openai_client, llm_model, limit=5):
    search_results = vector_search(query, limit)
    prompt = build_prompt(query, search_results)
    answer = llm(openai_client, llm_model, prompt)
    return answer

In [13]:
query = 'Are there any recommended fish recipes? Can you give me the details about the recipes as well?'
answer = rag(query, openai_client=openai_client, llm_model='gpt-4o-mini')
print(answer)

Yes, there are several recommended fish recipes. Here are the details:

1. **Easy Oven-Baked Cod**
   - **Description**: Amazingly quick, fresh, and light; can be made with any type of fish fillet.
   - **Ratings**: 4.6 stars (60 ratings)
   - **Ready in**: 25 mins
   - **Directions**: 
     - Preheat oven to 400°F. Line your pan with aluminum foil and lightly brush with olive oil or use cooking spray.
     - Combine parsley, garlic, lemon zest, and coarse salt on a cutting board; finely chop and mix with breadcrumbs.
     - Brush the top of each fillet with olive oil and press into the crumb mixture.
     - Place fillets in baking dish, crust-side up, and bake for 12-15 minutes until firm. Serve with lemon wedges.
   - **Ingredients**: 
     - 1 1/2 cups plain breadcrumbs
     - 1/2 cup fresh parsley
     - 2-3 garlic cloves
     - 1 lemon (zest of)
     - 3/4 teaspoon coarse salt
     - 4 (6-8 ounce) cod fish fillets
     - Olive oil

2. **Lemon Baked Cod**
   - **Description**: Grea