In [None]:
from langchain_community.document_loaders import TextLoader # Importing a custom text loader for the recipe description
from langchain_text_splitters import CharacterTextSplitter #Splitting the text into smaller chunks
from langchain_openai import OpenAIEmbeddings # Importing OpenAI embeddings for vectorization
from langchain_chroma import Chroma #Vector database for storing the embeddings
import warnings 
warnings.filterwarnings("ignore")  # Suppressing all warnings to avoid cluttering the output.

from dotenv import load_dotenv
load_dotenv()

import pandas as pd
recipes = pd.read_csv("output_data/common_ingredients_recipes.csv")

In [None]:
#Append the recipe ID at the beggining of the description
#Helps to filter the recipe when querying the vector database from a description
recipes["description"] = recipes["id"].astype(str) + " " + recipes["description"]

recipes["description"]

In [None]:
#Load the description column into a txt file to be used with LangChaing
recipes["description"].to_csv("output_data/recipe_description.txt",
                                   sep = "\n",
                                   index = False,
                                   header = False)

In [None]:
#Instantiate the text splitter
raw_documents = TextLoader("output_data/recipe_description.txt").load()
text_splitter = CharacterTextSplitter(chunk_size=0, chunk_overlap=0, separator="\n")
documents = text_splitter.split_documents(raw_documents)

In [6]:
#Returns the first recipe description
documents[0]

Document(metadata={'source': 'output_data/recipe_description.txt'}, page_content="149593 this is a never-fail muffin recipe, it's a blank canvas for anything you can dream up! i like coconut and canned chopped pineapple that has been well drained! you choose what you want to add in! if you like a sweet muffin add in more sugar.")

In [7]:
#Creating the document embeddings and storing them in a vector database
db_recipes = Chroma.from_documents(
    documents,
    embedding=OpenAIEmbeddings())

In [8]:
# Querying the vector database
query = "A warm winter soup"
docs = db_recipes.similarity_search(query, k = 10)
docs

[Document(id='c5bf314d-fb66-4a04-90c2-bba59291a8a3', metadata={'source': 'output_data/recipe_description.txt'}, page_content='38367 a mashed-potato type of soup. very comforting.'),
 Document(id='602afeba-b841-4faa-a405-f45275f1f1bb', metadata={'source': 'output_data/recipe_description.txt'}, page_content='147621 potatoes, cheese, broccoli, ham, and carrots. this is a nice comfort food for those cold winter days.  i sometimes add garlic in mine as im a garlic lover.'),
 Document(id='21b264a7-5a8b-45c8-9edb-832c8ab98b99', metadata={'source': 'output_data/recipe_description.txt'}, page_content='the pot pie part i make with leftover chicken, some frozen mixed veggies, and either leftover gravy or cream of chicken soup."'),
 Document(id='6db7c3a9-1122-4882-b274-c2cf6113798c', metadata={'source': 'output_data/recipe_description.txt'}, page_content='35859 this is a quick and very tasty soup that is very easy. i love it with a crusty bread and a salad for dinner.'),
 Document(id='95ff2d32-e86

In [9]:
#Retrieve the recipe from the ID
recipes[recipes["id"] == int(docs[0].page_content.split()[0].strip())]

Unnamed: 0,name,id,minutes,tags,n_steps,steps,description,ingredients,n_ingredients,average_rating
744,potato and kale soup,38367,55,"['60-minutes-or-less', 'time-to-make', 'course...",7,"['in a large pan , cook onion in oil until ten...",38367 a mashed-potato type of soup. very comfo...,"['olive oil', 'onion', 'garlic', 'potatoes', '...",7,4.4


In [10]:
# Initializing Phoenix
from phoenix.otel import register

# configure the Phoenix tracer
tracer_provider = register(
  project_name="default", # Default is 'default'
  auto_instrument=True # See 'Trace all calls made to a library' below
)
tracer = tracer_provider.get_tracer(__name__)

# Creating a function that retrieves the top 5 recipes based on a query
@tracer.chain
def retrieve_top_5_recipes(query: str) -> pd.DataFrame:
    top_docs = db_recipes.similarity_search(query, k=5)
    recipe_ids = [int(doc.page_content.split()[0].strip()) for doc in top_docs]
    return recipes[recipes["id"].isin(recipe_ids)]


retrieve_top_5_recipes("A quick and easy dinner")

🔭 OpenTelemetry Tracing Details 🔭
|  Phoenix Project: default
|  Span Processor: SimpleSpanProcessor
|  Collector Endpoint: https://app.phoenix.arize.com/v1/traces
|  Transport: HTTP + protobuf
|  Transport Headers: {'api_key': '****', 'authorization': '****'}
|  
|  Using a default SpanProcessor. `add_span_processor` will overwrite this default.
|  
|  
|  `register` has set this TracerProvider as the global OpenTelemetry default.
|  To disable this behavior, call `register` with `set_global_tracer_provider=False`.



Unnamed: 0,name,id,minutes,tags,n_steps,steps,description,ingredients,n_ingredients,average_rating
515,individual potato pudding once a month cooking,30803,80,"['weeknight', 'time-to-make', 'course', 'main-...",10,"['peel and grate the potatoes', 'allow to drai...",30803 an easy side dish for those once a month...,"['potatoes', 'onions', 'eggs', 'flour', 'salt'...",8,4.9
533,kale with caramelized onions and garlic,51780,22,"['30-minutes-or-less', 'time-to-make', 'course...",9,"['heat oil in large pan', 'add chopped onions ...",51780 a quick and simple side dish made with a...,"['kale', 'onion', 'garlic cloves', 'olive oil'...",5,4.4
662,oodles of noodles butter poppy seed variation,339702,13,"['15-minutes-or-less', 'time-to-make', 'course...",5,['cook noodles in a large pot of boiling salte...,339702 these noodles are wonderful if you need...,"['egg noodles', 'butter', 'poppy seed']",3,4.8
836,simple spaghetti dish,256215,11,"['15-minutes-or-less', 'time-to-make', 'main-i...",4,['cook spaghetti according to package directio...,256215 a great simple meal or late night snack...,"['spaghetti', 'butter', 'salt', 'black pepper']",4,4.5
912,super quick pizza dough,28008,35,"['60-minutes-or-less', 'time-to-make', 'course...",6,"['mix yeast in warm water until dissolved', 's...",28008 only a 15-minute rise time! add a pinch ...,"['yeast', 'water', 'oil', 'all-purpose flour']",4,4.0
