# Building Personalized Recipe Finder with FAISS and OpenAI Embeddings pipeline






## Description
The goal of this experiment is to implement personalized Recipe Finder, that retrieves and adapts recipes based on user queries using FAISS for similarity search and GPT-4 for refinement.



## References

The dataset of recipes, used in this project, is available at this [link](https://raw.githubusercontent.com/tabatkins/recipe-db/master/db-recipes.json).

##Setup

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
cd "YOUR-PATH-HERE"

In [None]:
# Install necessary libraries
%%capture
!pip install openai faiss-cpu numpy json5 requests


In [None]:
# Import Libraries
import faiss
import json
import numpy as np
import os
import requests
import openai
from openai import OpenAI

In [None]:
# Set OpenAI API Key
openai.api_key = "YOUR-OPENAI-KEY-HERE"

In [None]:
# Function to generate text embeddings using OpenAI's API
# "text-embedding-ada-002" is OpenAI’s most advanced embedding model, designed for text similarity, retrieval, clustering, and classification tasks

def get_openai_embedding(text):
    """Generate text embeddings using OpenAI's latest API."""
    client = openai.OpenAI(api_key=openai.api_key )  # Create a client instance
    response = client.embeddings.create(
        input=text,
        model="text-embedding-ada-002"
    )
    return response.data[0].embedding

In [None]:
# Function to load recipes from a JSON file or URL
def load_recipes(source):
    """Load recipes from a local JSON file or URL."""
    if source.startswith("http"):  # If source is a URL
        response = requests.get(source)
        if response.status_code == 200:
            return response.json()
        else:
            raise Exception(f"Failed to fetch data from URL: {response.status_code}")
    elif os.path.exists(source):  # If source is a local file
        with open(source, "r", encoding="utf-8") as f:
            return json.load(f)
    else:
        raise FileNotFoundError(f"Invalid source: {source}")

In [None]:
# Function to extract a subset of 50 recipes from a dictionary dataset
def extract_recipe_subset(recipes_dict, limit=50):
    """Extract a subset of 50 recipes from a dictionary-based dataset."""
    recipes_list = list(recipes_dict.values())  # Convert dictionary values to a list
    subset_recipes = recipes_list[:limit]  # Extract first 50 recipes
    print(f"Extracted {len(subset_recipes)} recipes.")
    return subset_recipes

In [None]:
# Function to create a FAISS index for fast recipe retrieval
def create_faiss_index(recipes):
    """ Create a FAISS index for recipe retrieval based on embeddings."""
    if not recipes:
        raise ValueError("No recipes found to index!")

    recipe_texts = [
    r.get("name", "Unknown Name") + " " +
    ", ".join(r.get("ingredients", [])) + " " +  # Convert list to comma-separated string
    r.get("instructions", "No instructions provided")
    for r in recipes
]
    # Generate embeddings
    embeddings = np.array([get_openai_embedding(text) for text in recipe_texts if text], dtype=np.float32)

    # Ensure embeddings were generated
    if embeddings.shape[0] == 0:
        raise ValueError("No embeddings generated. Check input data and OpenAI API.")

    # Create FAISS index
    index = faiss.IndexFlatL2(embeddings.shape[1])
    index.add(embeddings)

    print("FAISS index created with", index.ntotal, "entries.")
    return index, recipe_texts

In [None]:
# Function to search for the most relevant recipes using FAISS
def search_recipe(query, index, recipe_texts, k=3):
    """ Search for the most relevant recipes using FAISS based on user query."""
    if index is None or index.ntotal == 0:
        raise ValueError("FAISS index is not initialized or empty!")

    query_embedding = np.array([get_openai_embedding(query)], dtype=np.float32)

    # Perform the FAISS search
    distances, indices = index.search(query_embedding, k)

    results = [recipe_texts[i] for i in indices[0]]
    return results

In [None]:
# Function to refine a retrieved recipe using OpenAI's GPT model

client = openai.OpenAI(api_key=openai.api_key )  # Initialize the new OpenAI client

def refine_recipe_with_gpt(recipe_text, preference):
    """Use OpenAI GPT to modify the recipe based on user preferences."""
    prompt = f"Modify this recipe to fit user preferences ({preference}): \n{recipe_text}"

    response = client.chat.completions.create(  # Updated method
        model="gpt-4-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful chef assistant."},
            {"role": "user", "content": prompt}
        ]
    )

    return response.choices[0].message.content

In [None]:
# Function to display query, retrieved recipe, and modified recipe with separators
def display_result(query, retrieved_recipe, modified_recipe):
    """ Nicely formats and prints the query, retrieved recipe, and modified recipe. """
    print("=" * 60)
    print(f"USER QUERY:\n{query}")
    print("=" * 60)
    print(f"RETRIEVED RECIPE:\n{retrieved_recipe}")
    print("=" * 60)
    print(f"MODIFIED RECIPE:\n{modified_recipe}")
    print("=" * 60)

In [None]:
# Load recipes from a dataset (local file or URL)
recipe_source = "https://raw.githubusercontent.com/tabatkins/recipe-db/master/db-recipes.json"
recipes = load_recipes(recipe_source)


In [None]:
# printing the keys
print("Keys in recipes:", recipes.keys())

Keys in recipes: dict_keys(['2', '4', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '21', '28', '30', '32', '36', '42', '48', '51', '54', '55', '56', '57', '61', '63', 'id1', 'id3', 'id4', 'id5', 'id6', 'id8', 'id9', 'id12', 'id15', 'id17', 'id18', 'id19', 'id20', 'id21', 'id22', 'id25', 'id26', 'id27', 'id28', 'id29', 'id30', 'id32', 'id33', 'id34', 'id35', 'id36', 'id37', 'id38', 'id39', 'id43', 'id44', 'id45', 'id46', 'id49', 'id50', 'id51', 'id52', 'id53', 'id55', 'id57', 'id58', 'id63', 'id64', 'id65', 'id66', 'id67', 'id68', 'id70', 'id73', 'id74', 'id75', 'id79', 'id81', 'id82', 'id83', 'id84', 'id86', 'id88', 'id90', 'id93', 'id95', 'id96', 'id97', 'id99', 'id101', 'id104', 'id105', 'id107', 'id108', 'id109', 'id110', 'id111', 'id112', 'id113', 'id114', 'id116', 'id117', 'id118', 'id119', 'id123', 'id125', 'id126', 'id127', 'id128', 'id131', 'id132', 'id133', 'id134', 'id136', 'id137', 'id139', 'id140', 'id141', 'id142', 'id144', 'id145', 'id146', '

In [None]:
# printing an example of a recipe
first_key = list(recipes.keys())[0]
print("Sample recipe structure:", recipes[first_key])

Sample recipe structure: {'id': '2', 'name': 'Baked Shrimp Scampi', 'source': 'Ina Garten: Barefoot Contessa Back to Basics', 'preptime': 0, 'waittime': 0, 'cooktime': 0, 'servings': 6, 'comments': 'Modified by reducing butter and salt.  Substituted frozen shrimp instead of fresh 12-15 count (butterflied, tails on).', 'calories': 2565, 'fat': 159, 'satfat': 67, 'carbs': 76, 'fiber': 4, 'sugar': 6, 'protein': 200, 'instructions': 'Preheat the oven to 425 degrees F.\r\n\r\nDefrost shrimp by putting in cold water, then drain and toss with wine, oil, salt, and pepper. Place in oven-safe dish and allow to sit at room temperature while you make the butter and garlic mixture.\r\n\r\nIn a small bowl, mash the softened butter with the rest of the ingredients and some salt and pepper.\r\n\r\nSpread the butter mixture evenly over the shrimp. Bake for 10 to 12 minutes until hot and bubbly. If you like the top browned, place under a broiler for 1-3 minutes (keep an eye on it). Serve with lemon wedg

In [None]:
# Extract a subset of 50 recipes from the dictionary dataset
subset_recipes = extract_recipe_subset(recipes, limit=50)

Extracted 50 recipes.


In [None]:
 # Create FAISS index for fast retrieval
index, recipe_texts = create_faiss_index(subset_recipes)

FAISS index created with 50 entries.


In [None]:
# User query for recipe search
user_query = "vegetarian pasta"
relevant_recipes = search_recipe(user_query, index, recipe_texts)

In [None]:
 # Modify the first retrieved recipe based on user preference
preference = "low-calorie alternative"
modified_recipe = refine_recipe_with_gpt(relevant_recipes[0], preference)


In [None]:
# Display the formatted output
display_result(user_query, relevant_recipes[0], modified_recipe)

USER QUERY:
vegetarian pasta
RETRIEVED RECIPE:
Straw and Hay Fettuccine Tangle 4 serving [asparagus pesto](http://www.xanthir.com/recipes/showrecipe.php?id=id436), 1/4 cup toasted pine nuts, 4 tbsp parmesan, grated, 12 oz dried pasta Bring a large pot of water to a rolling boil.

Meanwhile, make asparagus pesto.

Salt the pasta water well and cook the pasta until just tender. Drain and toss immediately with 1 cup of the asparagus pesto (1/4 cup per serving), stirring in more afterward depending on how heavily coated you like your pasta. Serve sprinkled with the remaining toasted pine nuts, a dusting of Parmesan, and a quick drizzle of extra-virgin olive oil.
MODIFIED RECIPE:
Here's the recipe for Straw and Hay Fettuccine Tangle modified to be a lower-calorie option:

### Ingredients:
- **Asparagus Pesto:** Refer to the linked recipe but replace any oil with a lower amount or substitute part of the oil with vegetable broth to reduce calories. Additionally, use a reduced amount of 