In [2]:
pip install PyPDF2

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
Installing collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1
Note: you may need to restart the kernel to use updated packages.


# Task 1: PDF Text Extraction and Document Creation

In [None]:
# Step 1: Import the necessary libraries for PDF processing and document handling
import PyPDF2
from langchain.docstore.document import Document
from IPython.display import display, Markdown
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores.faiss import FAISS
from dotenv import load_dotenv
from langchain_openai import AzureChatOpenAI
import os

print("✅ All necessary libraries imported successfully.")

print("\n" + "="*60)
print("EXTRACTING TEXT FROM PDF")
print("="*60)

# Step 2: Open and read the PDF file
pdf_text = []
with open('Famous old receipts - bread.pdf', "rb") as file:
    reader = PyPDF2.PdfReader(file)
    for page in reader.pages:
        pdf_text.append(page.extract_text())

# Confirm successful extraction by printing the total number of pages
print(f"✅ Text extracted from {len(pdf_text)} pages.")

print("\n" + "="*60)
print("CREATING DOCUMENT OBJECTS")
print("="*60)

# Step 3: Create document objects from the extracted text
documents = []
for i in range(len(pdf_text)):
    documents.append(Document(page_content=pdf_text[i]))

# Print the number of documents created
print(f"✅ Number of documents created: {len(documents)}")

# Optional: Display first document preview
if documents and documents[0].page_content:
    print(f"📄 First document preview:")
    print(f"'{documents[0].page_content[:200]}...'")
    print(f"📊 Document length: {len(documents[0].page_content)} characters")

✅ All necessary libraries imported successfully.

EXTRACTING TEXT FROM PDF
✅ Text extracted from 7 pages.

CREATING DOCUMENT OBJECTS
✅ Number of documents created: 7
📄 First document preview:
'Famous Old Receipts
 
Traditional Bread Recipes from Yesteryear
A Collection of Time-Honored Bread Making Traditions
Compiled from Historic Recipe Collections
...'
📊 Document length: 159 characters


# Task 2: Document Embedding with FAISS Vector Database

In [None]:
print("\n" + "="*60)
print("LOADING EMBEDDING MODEL")
print("="*60)

# Step 1: Load Hugging Face's Sentence Transformers embedding model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Confirm successful model loading
print("✅ Embedding model loaded successfully.")

print("\n" + "="*60)
print("CREATING FAISS VECTOR DATABASE")
print("="*60)

# Step 2: Embed the document chunks and store them in a FAISS vector database
db_faiss = FAISS.from_documents(documents, embedding_model)

# Confirm that the embeddings have been successfully stored
print("✅ Document chunks embedded and stored in FAISS vector database.")

# Optional: Display vector database information
print(f"📊 Number of vectors in FAISS database: {db_faiss.index.ntotal}")
print(f"📐 Vector dimension: {embedding_model.client[1].get_sentence_embedding_dimension()}")

print("\n" + "="*60)
print("VECTOR DATABASE READY FOR RETRIEVAL")
print("="*60)


LOADING EMBEDDING MODEL
✅ Embedding model loaded successfully.

CREATING FAISS VECTOR DATABASE
✅ Document chunks embedded and stored in FAISS vector database.
📊 Number of vectors in FAISS database: 7
📐 Vector dimension: 384

VECTOR DATABASE READY FOR RETRIEVAL


# Task 3: Document Retrieval Function

In [None]:
print("\n" + "="*60)
print("CREATING DOCUMENT RETRIEVAL FUNCTION")
print("="*60)

# Step 1: Define a function to retrieve relevant documents based on a query
def retrieve_docs(query, k):
    # Perform similarity search on the FAISS database
    docs_faiss = db_faiss.similarity_search(query, k=k)
    
    # Return the most relevant document chunks
    return docs_faiss

print("✅ Document retrieval function created successfully.")

print("\n" + "="*60)
print("TESTING RETRIEVAL FUNCTION")
print("="*60)

# Step 2: Test the function by retrieving context based on a sample query
context = retrieve_docs("Most unique bread", 5)

# Display the first retrieved chunk to verify correct retrieval
print(f"📄 Query: 'Most unique bread'")
print(f"📊 Number of chunks retrieved: {len(context)}")
print(f"\n🔍 First retrieved chunk:")
print(context[0])

print("\n" + "="*60)
print("DISPLAYING ALL RETRIEVED CHUNKS")
print("="*60)

# Optional: Display all retrieved chunks with their content
for i, doc in enumerate(context, 1):
    print(f"\n📄 Chunk {i}:")
    print(f"Content: {doc.page_content[:300]}...")  # First 300 characters
    print(f"Metadata: {doc.metadata}")
    print("-" * 40)


CREATING DOCUMENT RETRIEVAL FUNCTION
✅ Document retrieval function created successfully.

TESTING RETRIEVAL FUNCTION
📄 Query: 'Most unique bread'
📊 Number of chunks retrieved: 5

🔍 First retrieved chunk:
page_content='Famous Old Receipts
 
Traditional Bread Recipes from Yesteryear
A Collection of Time-Honored Bread Making Traditions
Compiled from Historic Recipe Collections
'

DISPLAYING ALL RETRIEVED CHUNKS

📄 Chunk 1:
Content: Famous Old Receipts
 
Traditional Bread Recipes from Yesteryear
A Collection of Time-Honored Bread Making Traditions
Compiled from Historic Recipe Collections
...
Metadata: {}
----------------------------------------

📄 Chunk 2:
Content: Ancient Grain Artisan Bread
This most unique bread combines ancient grains with modern techniques, creating a
nutritious loaf with complex flavors and textures. The combination of spelt, quinoa, and seeds
makes this bread both distinctive and healthy.
Ingredients:
 2 cups wheat flour (whole wheat)
...
Metadata: {}
-----------

# Task 4: Recipe Recommendations with Azure OpenAI

In [None]:
print("\n" + "="*60)
print("SETTING UP AZURE OPENAI CLIENT")
print("="*60)

# Step 1: Load environment variables and initialize Azure OpenAI client
load_dotenv()

llm = AzureChatOpenAI(
    azure_deployment=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"],
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    api_key=os.environ["AZURE_OPENAI_API_KEY"],
    api_version=os.environ["AZURE_OPENAI_API_VERSION"],
    temperature=0
)

print("✅ Azure OpenAI client initialized successfully.")

# Step 2: Define the user query
query = "a bread that uses wheat flour and is suitable for a dinner party"

# Retrieve relevant context from the FAISS database
context = retrieve_docs(query, 10)

print(f"✅ Retrieved {len(context)} relevant document chunks for context.")

# Step 3: Define the system prompt for the assistant
system_message = f"""
    You are an assistant chef. 
    Your role is to recommend the most suitable recipe based on the context provided and the specific requirements given by the user, 
    such as available ingredients, dietary preferences, skill level, and desired baking time. 
    Make sure your recommendations are clear, practical, and tailored to meet the user's needs.
    You answer the {query} with the {context}
"""

print("✅ System message and query defined.")

print("\n" + "="*60)
print("STRUCTURING MESSAGES WITH CONTEXT")
print("="*60)

# Step 4: Combine the context into a single string
context_text = "\n\n".join([doc.page_content for doc in context])

# Structure the messages for the assistant
messages = [
    ("system", system_message),
    ("human", f"Context from bread recipes:\n{context_text}\n\nUser question: {query}")
]

print("✅ Messages structured with retrieved context.")

print("\n" + "="*60)
print("GENERATING RECIPE RECOMMENDATIONS")
print("="*60)

# Step 5: Generate and display the response from the assistant
response = llm.invoke(messages)  # Call the API with the messages

# Display the response in markdown format
display(Markdown(response.content))  # Display the response in markdown format

print("\n" + "="*60)
print("RECIPE RECOMMENDATIONS GENERATED SUCCESSFULLY")
print("="*60)



SETTING UP AZURE OPENAI CLIENT
✅ Azure OpenAI client initialized successfully.
✅ Retrieved 7 relevant document chunks for context.
✅ System message and query defined.

STRUCTURING MESSAGES WITH CONTEXT
✅ Messages structured with retrieved context.

GENERATING RECIPE RECOMMENDATIONS


For a dinner party, I recommend making **Herb Focaccia**. This impressive bread is not only delicious but also visually appealing, making it perfect for entertaining. The aromatic herbs and olive oil create a wonderful flavor that pairs beautifully with Italian meals or can be served as an appetizer.

### Herb Focaccia Recipe

**Ingredients:**
- 3½ cups wheat flour (bread flour)
- 1 packet instant yeast
- 2 teaspoons salt
- 1½ cups warm water
- ¼ cup extra virgin olive oil, divided
- 2 sprigs fresh rosemary
- Coarse sea salt for topping
- Cherry tomatoes (optional)

**Instructions:**
1. Combine flour, yeast, and salt in a large bowl.
2. Add warm water and 2 tablespoons of olive oil. Mix until a shaggy dough forms.
3. Knead briefly, then place the dough in an oiled bowl. Let it rise for 1 hour.
4. Transfer the dough to an oiled 9x13 inch pan, stretching it to fit the pan. Let it rise for an additional 30 minutes.
5. Dimple the surface with your fingers, drizzle with the remaining olive oil, and add rosemary and sea salt. You can also add cherry tomatoes if desired.
6. Bake at 425°F for 25-30 minutes until golden.

This focaccia is sure to impress your guests and is easy to make, making it a great choice for your dinner party!


RECIPE RECOMMENDATIONS GENERATED SUCCESSFULLY
