In [1]:
# Import the python modules
from dotenv import load_dotenv
from PIL import Image, ImageOps

from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.vectorstores import Chroma
from langchain_core.messages import HumanMessage, SystemMessage
from langchain.prompts.prompt import PromptTemplate
from langchain.chains import ConversationalRetrievalChain, LLMChain
from langchain.chains.qa_with_sources import load_qa_with_sources_chain

from unstructured_client import UnstructuredClient
from unstructured_client.models import shared
from unstructured_client.models.errors import SDKError

from unstructured.chunking.title import chunk_by_title
from unstructured.staging.base import dict_to_elements

import chromadb

load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm


True

In [14]:
# Open the image file
file = Image.open("images/lays.jpg")
image1 = ImageOps.grayscale(file)
image1.show()


file2 = Image.open("images/cadbury.jpg")
image2 = ImageOps.grayscale(file2)
image2.show()

file3 = Image.open("images/lays_magic.jpg")
image3 = ImageOps.grayscale(file3)
image3.show()


In [15]:
# Open the file in read mode
with open("src/prompts/label_prompt.txt", "r") as f:
    # Read the contents of the file
    file_contents = f.read()


print(file_contents)

label_extraction_prompt = file_contents

Parse the nutritional information and the list of ingredients of the product shown in the image. 

Use the nutritional information/nutritional fact label to identify the quantity of the following items:
Calories/Energy, Total Fat, Saturated Fat, Trans Fat, Cholesterol, Sodium, Total Carbohydrates, Dietary Fiber, Sugars, Protein, Vitamin A, Vitamin C, Calcium, and Iron.        
If the item is not present in the image, please mention that the item is not present in the image.

The output should contain only the nutritional information and the list of ingredients that is present on the product shown in the image. 
The output should be in a JSON format populated ONLY with the information that is available in the image. Make sure the JSON is valid and
do not add any additional information that is not present in the image.

Example of the output format:
{
    Nutritional information: {},
    Ingredients: ['items1', 'item2', 'item3']
}



## Zero Shot Food Label Info Extraction

In [16]:
llm = ChatGoogleGenerativeAI(model="gemini-pro-vision")

# example
message = HumanMessage(
    content=[
        {
            "type": "text", # Text part of the prompt
            "text": label_extraction_prompt,
        },
        {"type": "image_url", "image_url": image2}, # image part of the prompt
    ]
)

response = llm.invoke([message])

In [17]:
generated_text = response.content
print(generated_text)

 ```json
{
  "Nutritional information": {
    "Energy": "654 kCal",
    "Total Fat": "8.9 g",
    "Saturated Fat": "5.3 g",
    "Trans Fat": "Not Present",
    "Cholesterol": "Not Present",
    "Sodium": "0.07 g",
    "Total Carbohydrates": "17 g",
    "Dietary Fiber": "0.6 g",
    "Sugars": "16 g",
    "Protein": "2.1 g",
    "Vitamin A": "Not Present",
    "Vitamin C": "Not Present",
    "Calcium": "Not Present",
    "Iron": "Not Present"
  },
  "Ingredients": [
    "Not Present"
  ]
}
```


## Few Shot Prompt 

In [18]:
prompt1 = """nutritional information per 100 g: 
            {
                "Energy": "537 kCal",
                "Macronutrients": { 
                    "Protein": "6.9 g",
                    "Carbohydrates": { 
                        "Total": "52.9 g",
                        "of which Sugars": "2.5 g",
                        },
                    "Dietary Fiber": "Not Present",
                    "Total Fat": {
                        "Total": "33.1 g",
                        "Saturated Fat": "12.5 g",
                        "Trans Fat": "0.1 g",
                        },
                    "Linoleic Acid": "Not Present",
                    "Linolenic acid": "Not Present"
                },
                "Minerals": { 
                    "Calcium": "Not Present",
                    "Iron": "Not Present",
                    "Magnesium": "Not Present",
                    "Phosphorus": "Not Present",
                    "Potassium": "Not Present",
                    "Sodium": "993 mg",
                    "Zinc": "Not Present",
                    "Cooper": "Not Present",
                    "Manganese": "Not Present",
                    "Selenium": "Not Present"
                },
                "Vitamins": {                
                    "Vitamin A": "Not Present",
                    "Vitamic E": "Not Present",
                    "Vitamin D": "Not Present",
                    "Vitamin C": "Not Present",
                    "Thiamin": "Not Present",
                    "Riboflavin": "Not Present",
                    "Niacin": "Not Present",
                    "Vitamin B6": "Not Present",
                    "Vitamin B12": "Not Present",
                    "Choline": "Not Present",
                    "Vitamin K": "Not Present",
                    "Folate": "Not Present",              
            }"""

print(prompt1)

nutritional information per 100 g: 
            {
                "Energy": "537 kCal",
                "Macronutrients": { 
                    "Protein": "6.9 g",
                    "Carbohydrates": { 
                        "Total": "52.9 g",
                        "of which Sugars": "2.5 g",
                        },
                    "Dietary Fiber": "Not Present",
                    "Total Fat": {
                        "Total": "33.1 g",
                        "Saturated Fat": "12.5 g",
                        "Trans Fat": "0.1 g",
                        },
                    "Linoleic Acid": "Not Present",
                    "Linolenic acid": "Not Present"
                },
                "Minerals": { 
                    "Calcium": "Not Present",
                    "Iron": "Not Present",
                    "Magnesium": "Not Present",
                    "Phosphorus": "Not Present",
                    "Potassium": "Not Present",
                    "Sodium": "

In [19]:
prompt2 = """nutritional information per 100 g: 
            {
                "Energy": "534 kCal",
                "Macronutrients": { 
                    "Protein": "7.3 g",
                    "Carbohydrates": {
                        "Total": "57 g",
                        "of which Sugars": "56 g",
                        },
                    },
                    "Dietary Fiber": "2.1 g",
                    "Total Fat": {
                        "Total": "30 g",
                        "Saturated Fat": "18 g",
                        "Trans Fat": "Not Present",
                    },
                    "Linoleic Acid": "Not Present",
                    "Linolenic acid": "Not Present"
                },
                "Minerals": { 
                    "Calcium": "Not Present",
                    "Iron": "Not Present",
                    "Magnesium": "Not Present",
                    "Phosphorus": "Not Present",
                    "Potassium": "Not Present",
                    "Sodium": "240 mg",
                    "Zinc": "Not Present",
                    "Cooper": "Not Present",
                    "Manganese": "Not Present",
                    "Selenium": "Not Present"
                },
                "Vitamins": {                
                    "Vitamin A": "Not Present",
                    "Vitamic E": "Not Present",
                    "Vitamin D": "Not Present",
                    "Vitamin C": "Not Present",
                    "Thiamin": "Not Present",
                    "Riboflavin": "Not Present",
                    "Niacin": "Not Present",
                    "Vitamin B6": "Not Present",
                    "Vitamin B12": "Not Present",
                    "Choline": "Not Present",
                    "Vitamin K": "Not Present",
                    "Folate": "Not Present",              
            }"""

In [20]:
image_message1 = {
    "type": "image_url",
    "image_url": image1
}

image_message2 = {
    "type": "image_url",
    "image_url": image2
}

image_message3 = {
    "type": "image_url",
    "image_url": image3
}

# prompt2 = """Parse the nurtitional information from the food label image."""

message = HumanMessage(content=[image_message1, prompt1, image_message2, prompt2, image_message3])

In [21]:
response = llm.invoke([message])
print(response.content)

 nutritional information per 100 g: 
            {
                "Energy": "547 kCal",
                "Macronutrients": { 
                    "Protein": "9 g",
                    "Carbohydrates": { 
                        "Total": "47.6 g",
                        "of which Sugars": "4.1 g",
                        },
                    "Dietary Fiber": "9 g",
                    "Total Fat": {
                        "Total": "35.7 g",
                        "Saturated Fat": "14.1 g",
                        "Trans Fat": "0.1 g",
                        },
                    "Linoleic Acid": "Not Present",
                    "Linolenic acid": "Not Present"
                },
                "Minerals": { 
                    "Calcium": "Not Present",
                    "Iron": "Not Present",
                    "Magnesium": "Not Present",
                    "Phosphorus": "Not Present",
                    "Potassium": "Not Present",
                    "Sodium": "942 mg",


## RAG based Retrieval

In [41]:
#Load the models
llm = ChatGoogleGenerativeAI(model="gemini-pro")
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", task_type="retrieval_document")

### Process the pdf file

In [None]:
# Setup the Unstructured Client
s = UnstructuredClient(
    api_key_auth=DLAI_API_KEY,
    server_url=DLAI_API_URL,
)

In [None]:
filename = "example_files/nutritional_tables_info.pdf"

with open(filename, "rb") as f:
    files=shared.Files(
        content=f.read(),
        file_name=filename,
    )

req = shared.PartitionParameters(
    files=files,
    strategy="hi_res",
    hi_res_model_name="yolox",
    pdf_infer_table_structure=True,
    skip_infer_table_types=[],
)

try:
    resp = s.general.partition(req)
    pdf_elements = dict_to_elements(resp.elements)
except SDKError as e:
    print(e)

In [None]:
for pdf_el in pdf_elements:
    print(pdf_el.to_dict())

In [None]:
elements = chunk_by_title(pdf_elements)

In [None]:
documents = []
for element in elements:
    metadata = element.metadata.to_dict()
    del metadata["languages"]
    metadata["source"] = metadata["filename"]
    documents.append(Document(page_content=element.text, metadata=metadata))

In [None]:
vectorstore = Chroma.from_documents(documents, embeddings)

In [None]:
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 2}
)

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
rag_chain.invoke({
    "question": "What are the macronutrient requirements for a male in the age group between 14-18?",
    "chat_history": []
})["answer"]