In [1]:
import os
import json
from typing import List, Dict
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_community.llms import Ollama
from tqdm import tqdm


In [2]:

class CookBookAI:
   def __init__(self, pdf_directory: str, db_path: str = "cookbook_vectordb"):
       self.pdf_directory = pdf_directory
       self.db_path = db_path
       self.embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
       self.vectorstore = None
       self.llm_instruct = Ollama(model="llama3.1:8b-instruct-q8_0", temperature=0.0)
       self.llm_creative = Ollama(model="llama3.1:latest", temperature=0.7)
       self.qa_chain = None

   def load_and_process_pdfs(self):
       documents = []
       pdf_files = [f for f in os.listdir(self.pdf_directory) if f.endswith('.pdf')]
       
       for filename in tqdm(pdf_files, desc="Processing PDFs"):
           file_path = os.path.join(self.pdf_directory, filename)
           loader = PyPDFLoader(file_path)
           pdf_documents = loader.load()
           
           for doc in pdf_documents:
               doc.metadata.update({
                   'source': filename,
                   'page': doc.metadata.get('page', 1)
               })
           documents.extend(pdf_documents)

       text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
       chunks = text_splitter.split_documents(documents)

       for chunk in chunks:
           chunk.metadata.update({
               'source_pdf': chunk.metadata['source'],
               'page': chunk.metadata['page'],
               'chunk_id': f"{chunk.metadata['source']}_p{chunk.metadata['page']}_{chunks.index(chunk)}"
           })

       print(f"Creating vector store from {len(chunks)} chunks...")
       self.vectorstore = FAISS.from_documents(chunks, self.embeddings)
       self.vectorstore.save_local(self.db_path)
       print(f"Vector store created and saved to {self.db_path}")

   def load_vectorstore(self):
       if os.path.exists(self.db_path):
           self.vectorstore = FAISS.load_local(self.db_path, self.embeddings, allow_dangerous_deserialization=True)
           print("Vector database loaded successfully.")
       else:
           print("No existing vector database found. Please process PDFs first.")

   def setup_qa_chain(self):
       if self.vectorstore is None:
           print("Please load or process documents before setting up the QA chain.")
           return

       retriever = self.vectorstore.as_retriever(search_kwargs={"k": 4})
       self.qa_chain = RetrievalQA.from_chain_type(
           llm=self.llm_instruct,
           chain_type="stuff",
           retriever=retriever,
           return_source_documents=True
       )

   def rat_process(self, user_input: str, max_iterations: int = 3) -> Dict:
       instruct_prompt = f"""
       # Task: Generate a step-by-step plan to create a recipe for {user_input}

## Think through this step-by-step:
1. Consider the main ingredients needed for {user_input}
2. Think about the cooking techniques typically used for this dish
3. Estimate the preparation and cooking time
4. Consider any special equipment that might be needed

## Example:
For a "Chocolate Chip Cookie" recipe:
{{
    "ingredients": ["flour", "butter", "sugar", "chocolate chips", "eggs", "vanilla extract"],
    "techniques": ["creaming", "mixing", "baking"],
    "prep_time": "15 minutes",
    "cook_time": "10-12 minutes",
    "equipment": ["mixing bowl", "baking sheet", "oven"],
    "initial_thoughts": "Start by creaming butter and sugar, then add eggs and vanilla. Mix in dry ingredients, fold in chocolate chips, and bake in preheated oven."
}}

## Your turn:
Now, create a similar JSON object for {user_input}. Pay close **attention** to the specific requirements of this dish.

Format your response as a JSON object with the following structure:
{{
    "ingredients": ["list of main ingredients"],
    "techniques": ["list of cooking techniques"],
    "prep_time": "estimated preparation time",
    "cook_time": "estimated cooking time",
    "equipment": ["list of special equipment, if any"],
    "initial_thoughts": "Your initial thoughts on how to approach this recipe"
}}

**IMPORTANT**: Remeber to **ONLY AND ONLY** output the requested JSON in the requiered without saying **ANY** additiional words.
       """
       
       initial_thoughts = json.loads(self.llm_instruct(instruct_prompt))
       
       recipe_info = {
           "dish": user_input,
           "thoughts": [initial_thoughts],
           "sources": []
       }

       for i in range(max_iterations):
           retrieval_query = f"{user_input}\n{json.dumps(recipe_info['thoughts'][-1])}"
           relevant_docs = self.vectorstore.similarity_search(retrieval_query, k=3)
           context = "\n".join([doc.page_content for doc in relevant_docs])
           
           recipe_info["sources"].extend([
               {
                   "content": doc.page_content,
                   "source_pdf": doc.metadata["source_pdf"],
                   "page": doc.metadata["page"]
               }
               for doc in relevant_docs
           ])
           
           creative_prompt = f"""
           # Task: Improve and expand the recipe for {user_input}

Given the following initial recipe plan:
{json.dumps(recipe_info['thoughts'][-1])}

And this additional context:
{context}

## Think through this step-by-step:
1. Analyze the initial plan and the new context
2. Identify any new ingredients or techniques that could enhance the recipe
3. Consider how to make the recipe more unique or interesting
4. Think about potential variations or substitutions

## Example:
For an improved "Chocolate Chip Cookie" recipe:
{{
    "ingredients": ["2 1/4 cups all-purpose flour", "1 cup unsalted butter, softened", "3/4 cup granulated sugar", "3/4 cup brown sugar", "2 large eggs", "1 tsp vanilla extract", "1 tsp baking soda", "1/2 tsp salt", "2 cups semi-sweet chocolate chips"],
    "instructions": [
        "Preheat oven to 375°F (190°C)",
        "Cream butter and sugars until light and fluffy",
        "Beat in eggs one at a time, then stir in vanilla",
        "Combine flour, baking soda, and salt; gradually stir into the creamed mixture",
        "Fold in chocolate chips",
        "Drop by rounded tablespoons onto ungreased baking sheets",
        "Bake for 9 to 11 minutes or until golden brown",
        "Let stand for 2 minutes before removing to cool on wire racks"
    ],
    "tips": [
        "Chill the dough for at least 1 hour for thicker cookies",
        "Use a mix of chocolate chips and chunks for texture variety"
    ],
    "variations": [
        "Add 1 cup of chopped nuts for extra crunch",
        "Use white chocolate chips and dried cranberries for a festive twist",
        "Sprinkle sea salt on top before baking for a sweet-salty flavor"
    ],
    "reasoning": "Added specific measurements and detailed instructions. Included chilling tip for texture improvement and suggested variations for versatility."
}}

## Your turn:
Now, create a similar JSON object for the improved {user_input} recipe. Be creative and detailed in your approach.

Format your response as a JSON object with the following structure:
{{
    "ingredients": ["updated list of ingredients with quantities"],
    "instructions": ["detailed step-by-step instructions"],
    "tips": ["list of helpful tips or tricks"],
    "variations": ["list of possible variations"],
    "reasoning": "Your thought process for these improvements"
}}

**IMPORTANT**: Remeber to **ONLY AND ONLY** output the requested JSON in the requiered without saying **ANY** additiional words.

           """
           
           improved_thoughts = json.loads(self.llm_creative(creative_prompt))
           recipe_info["thoughts"].append(improved_thoughts)
       
       return recipe_info

   def generate_recipe(self, dish: str) -> Dict:
       recipe_info = self.rat_process(dish)
       final_thoughts = recipe_info["thoughts"][-1]
       
       format_prompt = f"""
       # Task: Format the recipe for {user_input} into a structured output

Given the following recipe information:
{json.dumps(final_thoughts)}

## Think through this step-by-step:
1. Organize the ingredients list clearly
2. Structure the instructions in a logical order
3. Include any tips or variations
4. Consider adding estimated cooking time and serving size

## Example:
{{
    "title": "Ultimate Chocolate Chip Cookies",
    "ingredients": [
        "2 1/4 cups all-purpose flour",
        "1 cup unsalted butter, softened",
        "3/4 cup granulated sugar",
        "3/4 cup brown sugar",
        "2 large eggs",
        "1 tsp vanilla extract",
        "1 tsp baking soda",
        "1/2 tsp salt",
        "2 cups semi-sweet chocolate chips"
    ],
    "instructions": [
        "1. Preheat oven to 375°F (190°C)",
        "2. In a large bowl, cream together butter and sugars until light and fluffy",
        "3. Beat in eggs one at a time, then stir in vanilla",
        "4. In a separate bowl, combine flour, baking soda, and salt",
        "5. Gradually stir the dry ingredients into the creamed mixture",
        "6. Fold in chocolate chips",
        "7. Drop by rounded tablespoons onto ungreased baking sheets",
        "8. Bake for 9 to 11 minutes or until golden brown",
        "9. Let stand for 2 minutes before removing to cool on wire racks"
    ],
    "tips": [
        "For thicker cookies, chill the dough for at least 1 hour before baking",
        "Use a mix of chocolate chips and chunks for varied texture"
    ],
    "variations": [
        "Add 1 cup of chopped nuts for extra crunch",
        "Use white chocolate chips and dried cranberries for a festive twist",
        "Sprinkle sea salt on top before baking for a sweet-salty flavor"
    ],
    "cook_time": "25 minutes (15 minutes prep + 10 minutes baking)",
    "servings": "24 cookies"
}}

## Your turn:
Now, create a similar JSON object for the {user_input} recipe. Ensure all information is clearly structured and easy to follow.

Format your response as a JSON object with the following structure:
{{
    "title": "Recipe title",
    "ingredients": ["list of ingredients with quantities"],
    "instructions": ["numbered list of instructions"],
    "tips": ["list of helpful tips"],
    "variations": ["list of possible variations"],
    "cook_time": "estimated total cooking time",
    "servings": "number of servings"
}}

**IMPORTANT**: Remeber to **ONLY AND ONLY** output the requested JSON in the requiered without saying **ANY** additiional words.

       """
       
       formatted_recipe = json.loads(self.llm_instruct(format_prompt))
       recipe_info["formatted_recipe"] = formatted_recipe
       return recipe_info

   def modify_recipe(self, recipe: str, modification: str) -> Dict:
       modify_prompt = f"""
       # Task: Modify the recipe as requested

Original recipe:
{recipe}

Requested modification: {modification}

## Think through this step-by-step:
1. Understand the original recipe and the requested modification
2. Identify which parts of the recipe need to be changed
3. Make the necessary adjustments to ingredients, quantities, or instructions
4. Ensure the modified recipe is still balanced and feasible

## Example:
For modifying a chocolate chip cookie recipe to be gluten-free:
{{
    "original_recipe": "Standard chocolate chip cookie recipe with wheat flour",
    "modification": "Make the recipe gluten-free",
    "modified_recipe": {{
        "ingredients": [
            "2 1/4 cups gluten-free all-purpose flour blend",
            "1 tsp xanthan gum (if not included in the flour blend)",
            "1 cup unsalted butter, softened",
            "3/4 cup granulated sugar",
            "3/4 cup brown sugar",
            "2 large eggs",
            "1 tsp vanilla extract",
            "1 tsp baking soda",
            "1/2 tsp salt",
            "2 cups semi-sweet chocolate chips (ensure they're gluten-free)"
        ],
        "instructions": [
            "1. Preheat oven to 375°F (190°C)",
            "2. In a medium bowl, whisk together the gluten-free flour blend and xanthan gum (if using)",
            "3. In a large bowl, cream together butter and sugars until light and fluffy",
            "4. Beat in eggs one at a time, then stir in vanilla",
            "5. Gradually stir the dry ingredients into the creamed mixture",
            "6. Fold in chocolate chips",
            "7. Drop by rounded tablespoons onto ungreased baking sheets",
            "8. Bake for 10 to 12 minutes or until golden brown",
            "9. Let stand for 2 minutes before removing to cool on wire racks"
        ],
        "notes": "Gluten-free cookies may spread less than regular cookies. If needed, slightly flatten the dough balls before baking."
    }},
    "reasoning": "Replaced wheat flour with a gluten-free blend and added xanthan gum for proper texture. Ensured all other ingredients are gluten-free. Adjusted baking time slightly as gluten-free cookies may bake differently."
}}

## Your turn:
Now, create a similar JSON object for modifying the given recipe according to the requested modification. Be thorough in your modifications and explain your reasoning.

Format your response as a JSON object with the following structure:
{{
    "original_recipe": "Brief summary of the original recipe",
    "modification": "The requested modification",
    "modified_recipe": {{
        "ingredients": ["updated list of ingredients"],
        "instructions": ["updated list of instructions"],
        "notes": "Any important notes about the modifications"
    }},
    "reasoning": "Your thought process for these modifications"
}}

**IMPORTANT**: Remeber to **ONLY AND ONLY** output the requested JSON in the requiered without saying **ANY** additiional words.

       """
       
       return json.loads(self.llm_creative(modify_prompt))

   def get_recommendations(self, recipe: str) -> List[Dict]:
       recommend_prompt = f"""
       # Task: Suggest 3 related dishes or variations based on the given recipe

Given recipe:
{recipe}

## Think through this step-by-step:
1. Identify the key ingredients and flavors in the original recipe
2. Consider dishes with similar ingredients or cooking methods
3. Think about variations that could appeal to different tastes or dietary needs
4. Ensure the recommendations are diverse and interesting

## Example:
For recommendations based on a classic margherita pizza recipe:
[
    {{
        "dish": "Caprese Salad",
        "description": "A fresh salad made with sliced mozzarella, tomatoes, and basil, drizzled with olive oil and balsamic glaze",
        "similarity": "Uses the same key ingredients as margherita pizza but in a cold, uncooked preparation"
    }},
    {{
        "dish": "Tomato Basil Soup",
        "description": "A creamy soup made with ripe tomatoes, fresh basil, and a touch of cream, served with mozzarella crostini",
        "similarity": "Incorporates the flavors of margherita pizza into a comforting soup"
    }},
    {{
        "dish": "Eggplant Parmesan",
        "description": "Layers of breaded and fried eggplant slices with tomato sauce, mozzarella, and basil, baked until bubbly",
        "similarity": "Uses similar ingredients and Italian flavors, but with eggplant as the star instead of pizza crust"
    }}
]

## Your turn:
Now, create a similar JSON array with 3 recommendations based on the given recipe. Be creative and consider various culinary directions you could take.

Format your response as a JSON array with the following structure for each recommendation:
[
    {{
        "dish": "Name of the recommended dish",
        "description": "Brief description of the dish",
        "similarity": "How it relates to the original recipe"
    }},
    ...
]

**IMPORTANT**: Remeber to **ONLY AND ONLY** output the requested JSON in the requiered without saying **ANY** additiional words.

       """
       
       return json.loads(self.llm_creative(recommend_prompt))

   def ask_cooking_question(self, question: str) -> Dict:
       if self.qa_chain is None:
           return {"error": "Please set up the QA chain first."}

       qa_prompt = f"""
       # Task: Answer the following cooking question

Question: {question}

## Think through this step-by-step:
1. Understand the key elements of the question
2. Recall relevant cooking knowledge and techniques
3. Consider any potential variations or alternatives
4. Formulate a clear and informative answer

## Example:
For the question "What's the best way to cook a steak to medium-rare?":
{{
    "answer": "To cook a steak to medium-rare, follow these steps:\n1. Bring the steak to room temperature before cooking\n2. Season generously with salt and pepper\n3. Preheat a heavy-based pan or grill to high heat\n4. Cook the steak for 3-4 minutes on each side for a 1-inch thick steak\n5. Use a meat thermometer to check for an internal temperature of 135°F (57°C)\n6. Let the steak rest for 5 minutes before serving\n\nThe steak should have a warm red center when cut into.",
    "tips": [
        "Use tongs to flip the steak, not a fork, to avoid piercing the meat and losing juices",
        "For extra flavor, add butter, garlic, and herbs to the pan in the last minute of cooking",
        "Allow the pan to reheat between batches if cooking multiple steaks"
    ],
    "sources": [
        "The Art of Cooking Meats, Chapter 3: Grilling and Pan-Searing",
        "Professional Chef's Handbook, Section 2.4: Temperature Control in Meat Cookery"
    ]
}}

## Your turn:
Now, answer the given cooking question in a similar format. Provide a detailed answer, helpful tips, and reference any relevant cookbook sources if applicable.

Format your response as a JSON object with the following structure:
{{
    "answer": "Your detailed answer to the question",
    "tips": ["Any helpful tips related to the question"],
    "sources": ["References to specific cookbook pages or sections, if applicable"]
}}

**IMPORTANT**: Remeber to **ONLY AND ONLY** output the requested JSON in the requiered without saying **ANY** additiional words.

       """

       result = self.qa_chain({"query": qa_prompt})
       answer_data = json.loads(result['result'])
       
       return {
           "question": question,
           "answer": answer_data['answer'],
           "tips": answer_data.get('tips', []),
           "sources": [
               {
                   "content": doc.page_content,
                   "source_pdf": doc.metadata["source_pdf"],
                   "page": doc.metadata["page"]
               }
               for doc in result['source_documents']
           ]
       }


In [None]:

def run_cookbook_ai():
   cookbook = CookBookAI("path/to/cookbook/pdfs")

   while True:
       print("\nCookBook AI - Main Menu")
       print("1. Process PDF Cookbooks")
       print("2. Load Vector Database")
       print("3. Setup QA Chain")
       print("4. Generate Recipe")
       print("5. Modify Recipe")
       print("6. Get Recommendations")
       print("7. Ask a Cooking Question")
       print("8. Quit")

       choice = input("Enter your choice (1-8): ")

       if choice == "1":
           cookbook.load_and_process_pdfs()
       elif choice == "2":
           cookbook.load_vectorstore()
       elif choice == "3":
           cookbook.setup_qa_chain()
       elif choice == "4":
           dish = input("Enter the dish you want to cook: ")
           result = cookbook.generate_recipe(dish)
           print(json.dumps(result, indent=2))
       elif choice == "5":
           recipe = input("Enter the current recipe: ")
           modification = input("Enter the modification: ")
           result = cookbook.modify_recipe(recipe, modification)
           print(json.dumps(result, indent=2))
       elif choice == "6":
           recipe = input("Enter a recipe to get recommendations: ")
           recommendations = cookbook.get_recommendations(recipe)
           print(json.dumps(recommendations, indent=2))
       elif choice == "7":
           question = input("Enter your cooking question: ")
           result = cookbook.ask_cooking_question(question)
           print(json.dumps(result, indent=2))
       elif choice == "8":
           print("Thank you for using CookBook AI. Goodbye!")
           break
       else:
           print("Invalid choice. Please try again.")

if __name__ == "__main__":
   run_cookbook_ai()