In [17]:
import pandas as pd
import re
import chromadb
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document
import os

In [18]:
from dotenv import load_dotenv
load_dotenv()

True

In [19]:
df = pd.read_csv('vegan_recipes.csv')

In [20]:
# quick look into the data
df.head()

Unnamed: 0.1,Unnamed: 0,href,title,ingredients,preparation
0,0,https://veganuary.com/recipes/rainbow-rice/,Rainbow Rice,\nIngredients\n\nCarrot ribbons (just use a pe...,\nMethod\n\nCook the rice as instructed on the...
1,1,https://veganuary.com/recipes/mfc-nachos/,Nachos,\nIngredients\n\n400g Meatless Farm Co mince (...,\nPreparation\n\nPreheat the oven to 350ºF\nHe...
2,2,https://veganuary.com/recipes/hazelnut-truffles/,Hazelnut Truffles,\nIngredients\n\n100g hazelnuts\n2 tablespoons...,\nMethod\n\nPreheat the oven to 200c\nPut the ...
3,3,https://veganuary.com/recipes/simple-roasted-r...,Simple Roasted Radish by ChicP,\nIngredients\n\n1 170g tub beetroot and horse...,\nPreparation\nPre heat the oven to 160°C\nCut...
4,4,https://veganuary.com/recipes/baked-apple-char...,Baked Apple Charlotte,\nIngredients\n\n2 tbsp rapeseed oil\n75g pitt...,\nPreparation\n\nServes 9\nYou will need an 8i...


In [21]:
# Helper Functions
def cleanNewLineAndParagraph(stringToBeCleaned):
    cleanNewLine = re.sub(r"\n", ".",stringToBeCleaned)
    return cleanNewLine

def cleanIngredients(IngredientString):
    cleanedString = re.sub(r'\nIngredients\n\n','Ingredients:',IngredientString)
    cleanedStringSecond = cleanNewLineAndParagraph(cleanedString)
    return cleanedStringSecond

def cleanPreparation(PreparationString):
    cleanedString = re.sub(r'\nMethod\n\n','',PreparationString)
    cleanedStringSecond = re.sub(r'\nPreparation\n\n','',cleanedString)
    cleanedStringThird = cleanNewLineAndParagraph(cleanedStringSecond)
    return cleanedStringThird

In [22]:
raw_ingredients = "\nIngredients\n\n2 cups flour\n1 tsp sugar"
raw_preparation = "\nPreparation\n\nServes 9\nYou will need an 8i.."

print(cleanIngredients(raw_ingredients))
print(cleanPreparation(raw_preparation))

Ingredients:2 cups flour.1 tsp sugar
Serves 9.You will need an 8i..


In [23]:
dfCleaned = df
dfCleaned['ingredientsV2'] = dfCleaned['ingredients'].apply(lambda x : cleanIngredients(x))
dfCleaned['preparationV2'] = dfCleaned['preparation'].apply(lambda x : cleanPreparation(x))
dfCleaned['ingredientToken'] = 'These are the Ingredients for ' + dfCleaned['title'] + ': ' + dfCleaned['ingredientsV2']
dfCleaned['preparationToken'] = 'These are the steps for ' + dfCleaned['title'] + ': ' + dfCleaned['preparationV2']

In [24]:
dfCleaned[['title','href','ingredientToken','preparationToken']].head()

Unnamed: 0,title,href,ingredientToken,preparationToken
0,Rainbow Rice,https://veganuary.com/recipes/rainbow-rice/,These are the Ingredients for Rainbow Rice: In...,These are the steps for Rainbow Rice: Cook the...
1,Nachos,https://veganuary.com/recipes/mfc-nachos/,These are the Ingredients for Nachos: Ingredie...,These are the steps for Nachos: Preheat the ov...
2,Hazelnut Truffles,https://veganuary.com/recipes/hazelnut-truffles/,These are the Ingredients for Hazelnut Truffle...,These are the steps for Hazelnut Truffles: Pre...
3,Simple Roasted Radish by ChicP,https://veganuary.com/recipes/simple-roasted-r...,These are the Ingredients for Simple Roasted R...,These are the steps for Simple Roasted Radish ...
4,Baked Apple Charlotte,https://veganuary.com/recipes/baked-apple-char...,These are the Ingredients for Baked Apple Char...,These are the steps for Baked Apple Charlotte:...


In [25]:
chroma_client = chromadb.Client()
collection = chroma_client.get_or_create_collection(name="veganRecipes")

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


In [26]:
# Embedding model
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Folder where ChromaDB will store vectors
persist_directory = "veganRecipeChromaDB"

# Prepare documents from dfCleaned
documents = []

for idx, row in dfCleaned.iterrows():
    title = row['title']
    link = row['href']

    # Ingredients
    documents.append(Document(
        page_content=row['ingredientToken'],
        metadata={"type": "ingredients", "title": title, "link": link}
    ))

    # Preparation
    documents.append(Document(
        page_content=row['preparationToken'],
        metadata={"type": "preparation", "title": title, "link": link}
    ))


In [27]:
vectorstore = Chroma.from_documents(
    documents,
    embedding=embedding_model,
    persist_directory=persist_directory
)

vectorstore.persist()


Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


In [28]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

query = "I like chicken curry. However since im now vegan What dish would you recommend?"
docs = retriever.get_relevant_documents(query)

for doc in docs:
    print("-----")
    print("Title:", doc.metadata.get("title"))
    print("Type:", doc.metadata.get("type"))
    print("Link:", doc.metadata.get("link"))
    print("Content:\n", doc.page_content)


-----
Title: Thai Prawn-Style Vegan Curry
Type: preparation
Link: https://veganuary.com/recipes/thai-prawn-style-vegan-curry/
Content:
 These are the steps for Thai Prawn-Style Vegan Curry: Heat the oil in a large frying pan and fry the onion and red pepper until soft..Add the coconut milk and cook-in sauce to the pan and mix well..Add the zucchini and baby corn and simmer for 5–7 minutes..Stir in flash-fried Prawn-Style Pieces and sugar snap peas. Garnish with chopped spring onions and serve with rice...
-----
Title: Thai Prawn-Style Vegan Curry
Type: preparation
Link: https://veganuary.com/recipes/thai-prawn-style-vegan-curry/
Content:
 These are the steps for Thai Prawn-Style Vegan Curry: Heat the oil in a large frying pan and fry the onion and red pepper until soft..Add the coconut milk and cook-in sauce to the pan and mix well..Add the zucchini and baby corn and simmer for 5–7 minutes..Stir in flash-fried Prawn-Style Pieces and sugar snap peas. Garnish with chopped spring onions a

In [29]:
from langsmith import traceable
from openai import OpenAI
from typing import List
import nest_asyncio


MODEL_PROVIDER = "openai"
MODEL_NAME = "gpt-4o-mini"
APP_VERSION = 1.0
RAG_SYSTEM_PROMPT = """You are an assistant for answer questions about vegan recipes. 
Use the following pieces of retrieved context to answer the latest question in the conversation. 
If you don't know the answer, just say that you don't know. 
Use three sentences maximum and keep the answer concise.
"""

openai_client = OpenAI()
nest_asyncio.apply()


"""
retrieve_documents
- Returns documents fetched from a vectorstore based on the user's question
"""
@traceable(run_type="chain")
def retrieve_documents(question: str):
    return retriever.get_relevant_documents(question)

"""
generate_response
- Calls `call_openai` to generate a model response after formatting inputs
"""
@traceable(run_type="chain")
def generate_response(question: str, documents):
    formatted_docs = "\n\n".join(doc.page_content for doc in documents)
    messages = [
        {
            "role": "system",
            "content": RAG_SYSTEM_PROMPT
        },
        {
            "role": "user",
            "content": f"Context: {formatted_docs} \n\n Question: {question}"
        }
    ]
    return call_openai(messages)

"""
call_openai
- Returns the chat completion output from OpenAI
"""
@traceable(run_type="llm")
def call_openai(
    messages: List[dict], model: str = MODEL_NAME, temperature: float = 0.0
) -> str:
    return openai_client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=temperature,
    )

"""
langsmith_rag
- Calls `retrieve_documents` to fetch documents
- Calls `generate_response` to generate a response based on the fetched documents
- Returns the model response
"""
@traceable(run_type="chain")
def langsmith_rag(question: str):
    documents = retrieve_documents(question)
    response = generate_response(question, documents)
    return response.choices[0].message.content


In [30]:
question = "What are some recipes where tomatoes are pertinent?"
ai_answer = langsmith_rag(question, langsmith_extra={"metadata": {"website": "www.google.com"}})
print(ai_answer)

Two recipes where tomatoes are key ingredients are the Heirloom Tomato & Endive Salad and the Moroccan-Style Lentil, Chickpea and Kale Soup. The salad features medium heirloom tomatoes and grape heirloom tomatoes, while the soup includes cherry tomatoes and canned chopped tomatoes. Both recipes highlight the flavor and versatility of tomatoes in vegan dishes.
