In [2]:
# install required dependencies
%pip install openai qdrant-client pandas numpy tqdm python-dotenv more-itertools

Defaulting to user installation because normal site-packages is not writeable
Collecting openai
  Downloading openai-1.84.0-py3-none-any.whl (725 kB)
     -------------------------------------- 725.5/725.5 KB 6.6 MB/s eta 0:00:00
Collecting qdrant-client
  Downloading qdrant_client-1.14.2-py3-none-any.whl (327 kB)
     ------------------------------------- 327.7/327.7 KB 21.2 MB/s eta 0:00:00
Collecting pandas
  Downloading pandas-2.3.0-cp310-cp310-win_amd64.whl (11.1 MB)
     --------------------------------------- 11.1/11.1 MB 14.6 MB/s eta 0:00:00
Collecting numpy
  Downloading numpy-2.2.6-cp310-cp310-win_amd64.whl (12.9 MB)
     --------------------------------------- 12.9/12.9 MB 14.9 MB/s eta 0:00:00
Collecting tqdm
  Downloading tqdm-4.67.1-py3-none-any.whl (78 kB)
     ---------------------------------------- 78.5/78.5 KB ? eta 0:00:00
Collecting python-dotenv
  Downloading python_dotenv-1.1.0-py3-none-any.whl (20 kB)
Collecting more-itertools
  Downloading more_itertools-10.7.

You should consider upgrading via the 'c:\Program Files\Python310\python.exe -m pip install --upgrade pip' command.


In [9]:
import pandas as pd
import numpy as np
from openai import OpenAI
import json
import time
from tqdm import tqdm
from uuid import uuid4
import os
from dotenv import load_dotenv
from qdrant_client import QdrantClient
from qdrant_client.models import PointStruct, VectorParams, Distance

In [10]:


# Load environment variables
load_dotenv()
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))

# Connect to Qdrant
qdrant = QdrantClient(host="localhost", port=6333)  # adjust if running in Docker or remote

COLLECTION_NAME = "recipes"
VECTOR_SIZE = 1536  # for text-embedding-3-small

# Load dataset
df = pd.read_csv("../datasets/Food Ingredients and Recipe Dataset with Image Name Mapping.csv")

# Take first 1000 recipes for testing
sample_df = df.head(1000).copy()
print(f"Working with {len(sample_df)} recipes")

# Prepare texts for embedding
def prepare_text_for_embedding(row):
    title = row['Title']
    ingredients = str(row['Cleaned_Ingredients'])
    ingredients_clean = ingredients.replace("['", "").replace("']", "").replace("', '", ", ")
    return f"Recipe: {title}\nIngredients: {ingredients_clean}"

print("Preparing texts for embedding...")
sample_df['embedding_text'] = sample_df.apply(prepare_text_for_embedding, axis=1)

# Batch embedding
def get_embeddings_batch(texts, model="text-embedding-3-small", batch_size=100):
    embeddings = []
    for i in tqdm(range(0, len(texts), batch_size), desc="Creating embeddings"):
        batch = texts[i:i+batch_size]
        try:
            response = client.embeddings.create(input=batch, model=model)
            batch_embeddings = [embedding.embedding for embedding in response.data]
            embeddings.extend(batch_embeddings)
            time.sleep(0.1)
        except Exception as e:
            print(f"Error on batch {i//batch_size+1}: {e}")
            embeddings.extend([None] * len(batch))
    return embeddings

print("Creating embeddings...")
texts = sample_df['embedding_text'].tolist()
embeddings = get_embeddings_batch(texts)
sample_df['embedding'] = embeddings

# Remove failed ones
sample_df = sample_df[sample_df['embedding'].notna()].copy()
print(f"✅ Successfully created {len(sample_df)} embeddings")

# Recreate collection if needed
print("Creating Qdrant collection...")
qdrant.recreate_collection(
    collection_name=COLLECTION_NAME,
    vectors_config=VectorParams(size=VECTOR_SIZE, distance=Distance.COSINE)
)

# Upload to Qdrant in batches
print("Uploading to Qdrant...")

points = []
for _, row in sample_df.iterrows():
    payload = {
        "page_content": row['embedding_text'],
        "title": row.get("Title", ""),
        "ingredients": row.get("Cleaned_Ingredients", ""),
        "instructions": row.get("Instructions", ""),
        "image_name": row.get("Image_Name", ""),
        "id": row.get("Unnamed: 0", -1)
    }
    points.append(PointStruct(id=str(uuid4()), vector=row['embedding'], payload=payload))

# Batch upload
from more_itertools import chunked
for batch in chunked(points, 100):
    qdrant.upsert(collection_name=COLLECTION_NAME, points=batch)

print("✅ Upload complete.")
print(f"Total points uploaded: {len(points)}")


Working with 1000 recipes
Preparing texts for embedding...
Creating embeddings...


Creating embeddings: 100%|██████████| 10/10 [00:12<00:00,  1.28s/it]
  qdrant.recreate_collection(


✅ Successfully created 1000 embeddings
Creating Qdrant collection...
Uploading to Qdrant...
✅ Upload complete.
Total points uploaded: 1000


In [None]:

from qdrant_client import QdrantClient
from qdrant_client.models import Filter, SearchRequest

# Connect to Qdrant
qdrant = QdrantClient(host="localhost", port=6333)  # adjust as needed

# Test similarity function using vector DB
def find_similar_recipes_from_qdrant(query, top_k=5, collection_name="recipes"):
    """Find similar recipes using Qdrant vector search"""

    # Step 1: Embed the query using OpenAI
    try:
        response = client.embeddings.create(
            input=[query],
            model="text-embedding-3-small"
        )
        query_vector = response.data[0].embedding
    except Exception as e:
        print(f"❌ Failed to embed query: {e}")
        return []

    # Step 2: Search in Qdrant
    try:
        hits = qdrant.search(
            collection_name=collection_name,
            query_vector=query_vector,
            limit=top_k
        )

        results = []
        for hit in hits:
            payload = hit.payload
            results.append({
                "title": payload.get("title", "Unknown"),
                "similarity": hit.score,
                "ingredients": payload.get("ingredients", "")[:200] + "..." if len(payload.get("ingredients", "")) > 200 else payload.get("ingredients", "")
            })
        return results

    except Exception as e:
        print(f"❌ Qdrant search failed: {e}")
        return []

# Test the search
print("\n" + "="*60)
print("TESTING QDRANT RECIPE SEARCH")
print("="*60)

test_query = "chicken with vegetables"
print(f"Searching for: '{test_query}'")

try:
    similar_recipes = find_similar_recipes_from_qdrant(test_query)

    for i, recipe in enumerate(similar_recipes, 1):
        print(f"\n{i}. {recipe['title']}")
        print(f"   Similarity: {recipe['similarity']:.3f}")
        print(f"   Ingredients: {recipe['ingredients']}")
        print(f"   image: ")

except Exception as e:
    print(f"Search test failed: {e}")


TESTING QDRANT RECIPE SEARCH
Searching for: 'chicken with vegetables'

1. Soy-Glazed Chicken with Broccoli
   Similarity: 0.512
   Ingredients: ['3 Tbsp. honey', '3 Tbsp. soy sauce or tamari', '3 Tbsp. unseasoned rice vinegar', '1 tsp. finely grated ginger (from one 2" piece)', '1 Tbsp. vegetable oil', '4 skinless, boneless chicken thighs', '...

2. Golden Noodles With Chicken
   Similarity: 0.506
   Ingredients: ['Extra-virgin olive oil', '4 shallots, thinly sliced into rings', '1/4 cup unbleached all-purpose flour', '2 tablespoons extra virgin olive oil', '2 bone-in, skin-on chicken breasts', 'Kosher salt an...

3. Chicken Brodo with Spring Vegetables and Fried Bread
   Similarity: 0.499
   Ingredients: ['4 lb. raw chicken bones', '3 spring onions or 4 scallions, chopped', '4 garlic cloves, crushed', '3 oz. thinly sliced prosciutto, chopped', '3/4 cup dried porcini mushrooms, rinsed', '1/3 cup extra-...

4. Tandoori Chicken and Vegetable Sheet-Pan Supper
   Similarity: 0.493
   Ingr

  hits = qdrant.search(
