In [None]:
#————————————————————

# Name: Azure OpenAI API, GPT 4o mini, Image to Text

# Purpose: This notebook will use Azure OpenAI GPT 4o mini to turn the contents of an image or images into a list of ingridients, which in turn will output a recipe.

# Company: Allgeier Schweiz AG
# Author: Nicolas Rehder (nrehder@allgeier.ch)
# Create for: SDSC 2024 & ZHAW 2025
# Date Created: 22.01.2024
# Last Updated: 19.01.2025
# Python Version: 3.12.1

# Troubleshooting:
# https://community.openai.com/t/issue-with-useage-of-json-output-an-citation/584189
# https://alexholmeset.blog/2024/05/22/use-the-azure-openai-gpt-4o-all-in-one-model-with-powershell/

# Additionals:

# If necessary, download Python packages (run the below command in terminal if packages have not yet been installed)
# pip install -r C:\Python\openai-lab\support\requirements\requirements.txt

#————————————————————

In [1]:
# Import Python packages
import os
import io
import time
import base64
from io import StringIO
import json
from dotenv import load_dotenv, find_dotenv
from pathlib import Path
import pandas as pd
from openai import AzureOpenAI
import sys

In [2]:
# Load required variables from .env file.
load_dotenv(dotenv_path=Path("/workspaces/azure-openai-lab/.venv/.env")) #Error sometimes due to \ or \\. Try one or the other. "C:\\Python\\azure-openai-lab\\.venv\\.env"

# Load Azure OpenAI Key and Endpoint. These values can be found within the Azure OpenAI Service resource in portal.azure.com under Keys and Endpoint
azure_oai_key = os.environ['AZURE_OPENAI_KEY_P34']
azure_oai_endpoint = os.environ['AZURE_OPENAI_ENDPOINT_P34']

In [3]:
# Initialize the Azure OpenAI client
client = AzureOpenAI(
        azure_endpoint = azure_oai_endpoint, 
        api_key=azure_oai_key,  
        api_version= "2024-10-21" #"2024-04-01-preview" #"2024-05-13"
        )

In [4]:
# Prepare image for Azure OpenAI model
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')
  

# Image of individual food products  
avocado = encode_image(f"/workspaces/azure-openai-lab/images/products/avocado.jpg") #f"C:\\Python\\azure-openai-lab\\images\\products\\avocado.jpg"
tofu = encode_image(f"/workspaces/azure-openai-lab/images/products/tofu.jpg") #f"C:\\Python\\azure-openai-lab\\images\\products\\tofu.jpg"
broccoli = encode_image(f"/workspaces/azure-openai-lab/images/products/broccoli.jpg") #f"C:\\Python\\azure-openai-lab\\images\\products\\broccoli.jpg"
chili = encode_image(f"/workspaces/azure-openai-lab/images/products/chili.jpg") #f"C:\\Python\\azure-openai-lab\\images\\products\\chili.jpg"
coconut_milk = encode_image(f"/workspaces/azure-openai-lab/images/products/coconut_milk.jpg") #f"C:\\Python\\azure-openai-lab\\images\\products\\coconut_milk.jpg"
soy_sauce = encode_image(f"/workspaces/azure-openai-lab/images/products/soy_sauce.jpg") #f"C:\\Python\\azure-openai-lab\\images\\products\\soy_sauce.jpg"

# Image of a refrigerator with foods
refrigerator = encode_image(f"/workspaces/azure-openai-lab/images/products/refrigerator.jpg") #f"C:\\Python\\azure-openai-lab\\images\\products\\refrigerator.jpg"

# Image of a recipe book page
recipe = encode_image(f"/workspaces/azure-openai-lab/images/recipes/biscotti.jpg") #f"C:\\Python\\azure-openai-lab\\images\\products\\refrigerator.jpg"

In [5]:
# Generate a list of ingredients from individual food product images.

# Send request to Azure OpenAI model
response = client.chat.completions.create(
    model="gpt-4o-mini",
    temperature=0.7,
    #max_tokens=120,
    messages=[
            {
            "role": "user",
            "content": [
                {"type": "text", "text": "You are a helpful cook. Analyze the provided images, determine the food product being depicted and create a list of all products"},
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{avocado}"
                    },
                },
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{tofu}"
                    },
                },
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{broccoli}"
                    },
                },
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{chili}"
                    },
                },
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{coconut_milk}"
                    },
                },
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{soy_sauce}"
                    },
                },                                                                
            ],
        }
    ]
)

result = response.choices[0].message.content
print(result + "\n")

Based on the provided images, here is the analysis of the food products depicted:

1. **Avocado**
   - Product: Tropical Avocado
   - Brand: Primagusto

2. **Tofu**
   - Product: Tofu (Nature)
   - Brand: Coop (PRIX Garantie)

3. **Broccoli**
   - Product: Swiss Broccoli
   - Brand: Coop (Qualité & Prix)

4. **Chili Peppers**
   - Product: Hot Chili
   - Brand: Coop (Betty Bossi)

5. **Coconut Milk**
   - Product: Coconut Milk
   - Brand: Thai Kitchen

6. **Soy Sauce**
   - Product: Soy Sauce
   - Brand: Kikkoman

### List of All Products:
- Tropical Avocado (Primagusto)
- Tofu (PRIX Garantie)
- Swiss Broccoli (Qualité & Prix)
- Hot Chili (Betty Bossi)
- Coconut Milk (Thai Kitchen)
- Soy Sauce (Kikkoman)



In [6]:
# Generate a list of ingredients and additional attributes from individual food product images.

# Create advanced System prompt
systemcontent = \
"""
### INSTRUCTIONS
1. Analyze the provided images.
2. Determine the food product being depicted.
3. Count the numbers of invidual food product items in bowls or vessels.

---

### OUTPUT FORMAT
Return a JSON array with the following format:
{
  "name": "",
  "amount": "",
  "units": "",
  "expiration_days": null
}


The variables should contain the following information:
- name: the name of the product in each image.
- amount: the number of products in each image.
- units: the unit of the product in each image using the metric system.
- expiration_days: the expiration date of the product in each image in average number of days.
"""

# Send request to Azure OpenAI model
response = client.chat.completions.create(
    model="gpt-4o-mini",
    temperature=0.7,
    #max_tokens=120,
    messages=[
            {
            "role": "user",
            "content": [
                {"type": "text", "text": systemcontent},
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{avocado}"
                    },
                },
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{tofu}"
                    },
                },
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{broccoli}"
                    },
                },
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{chili}"
                    },
                },
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{coconut_milk}"
                    },
                },
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{soy_sauce}"
                    },
                },                                                                
            ],
        }
    ]
)

result = response.choices[0].message.content
print(result + "\n")

```json
[
    {"name":"Avocado","amount":"1","units":"piece","expiration_days":"5"},
    {"name":"Tofu","amount":"1","units":"310 g","expiration_days":"5"},
    {"name":"Broccoli","amount":"1","units":"750 g","expiration_days":"14"},
    {"name":"Chili","amount":"3","units":"pieces","expiration_days":"7"},
    {"name":"Coconut Milk","amount":"1","units":"500 ml","expiration_days":"365"},
    {"name":"Soy Sauce","amount":"1","units":"150 ml","expiration_days":"365"}
]
```



In [9]:
# Generate a list of ingredients and additional attributes from a single image containing multiple food product.

# Create advanced System prompt
systemcontent = \
"""
### INSTRUCTIONS
1. Analyze the provided images.
2. Determine the food product being depicted.
3. Count the numbers of invidual food product items in bowls or vessels.

---

### OUTPUT FORMAT
Return a JSON array with the following format:
{
  "name": "",
  "amount": "",
  "units": "",
  "expiration_days": null
}


The variables should contain the following information:
- name: the name of the product in each image.
- amount: the number of products in each image.
- units: the unit of the product in each image using the metric system.
- expiration_days: the expiration date of the product in each image in average number of days.
"""

# Send request to Azure OpenAI model
response = client.chat.completions.create(
    model="gpt-4o-mini",
    temperature=0.7,
    #max_tokens=120,
    messages=[
            {
            "role": "user",
            "content": [
                {"type": "text", "text": systemcontent},
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{refrigerator}"
                    },
                },                                                               
            ],
        }
    ]
)

result = response.choices[0].message.content
print(result + "\n")

```json
[
  {
    "name": "Cauliflower",
    "amount": 1,
    "units": "piece",
    "expiration_days": 7
  },
  {
    "name": "Radishes",
    "amount": 6,
    "units": "pieces",
    "expiration_days": 10
  },
  {
    "name": "Tomatoes",
    "amount": 8,
    "units": "pieces",
    "expiration_days": 7
  },
  {
    "name": "Mushrooms",
    "amount": 2,
    "units": "pieces",
    "expiration_days": 5
  },
  {
    "name": "Cucumbers",
    "amount": 5,
    "units": "pieces",
    "expiration_days": 7
  },
  {
    "name": "Green Onions",
    "amount": 1,
    "units": "bunch",
    "expiration_days": 10
  },
  {
    "name": "Red Cabbage",
    "amount": 1,
    "units": "piece",
    "expiration_days": 14
  },
  {
    "name": "Yellow Bell Pepper",
    "amount": 1,
    "units": "piece",
    "expiration_days": 7
  },
  {
    "name": "Lettuce",
    "amount": 1,
    "units": "head",
    "expiration_days": 5
  }
]
```



In [8]:
# Generate a list of ingredients and additional attributes from a recipe book page containing text.

# Create advanced System prompt
systemcontent = \
"""
### INSTRUCTIONS
1. Analyze the provided recipe image.
2. Extract the recipe name located at the top of the image with the largest orange font.
3. Extract the recipe description located at the top of the image in quotes.
4. Extract the recipe ingredients located under the "INGREDIENTS" header.
5. Extract the recipe steps located under the "INSTRUCTIONS" header.
6. Extract the recipe nutritions at the bottom of the image starting after "Nutrition Tip" in the grey font.

---

### OUTPUT FORMAT
Return a JSON array with the following format:
{
  "name": "",
  "description": "",
  "ingredients": [],
  "amount": [],
  "units": [],
  "steps": [],
  "nutritions": "",
  "expiration_days": null
}

The variables should contain the following information:
- name: the name of the recipe.
- description: the description of the recipe.
- ingredients: a list of the ingredients of the recipe.
- amount: a list of the number of each ingredient used in the recipe in numeric form. If no number is visible, define the value as "variable".
- units: a list of the units of each ingredient using the metric system. If no unit is visible, define the value as "null".
- steps: a list of the preparation instructions of the recipe.
- nutritions: a summary of the nutritional information of the recipe. If no nutritional information is found, define the value as "null".
- expiration_days: the expiration date of each ingredient in average number of days. If no expiration date is known, define the value as "null".

"""

# Send request to Azure OpenAI model
response = client.chat.completions.create(
    model="gpt-4o-mini",
    temperature=0.7,
    #max_tokens=120,
    messages=[
            {
            "role": "user",
            "content": [
                {"type": "text", "text": systemcontent},
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{recipe}"
                    },
                },                                                               
            ],
        }
    ]
)

result = response.choices[0].message.content
print(result + "\n")


```json
{
  "name": "Carmela's Biscotti",
  "description": "Italians use the term biscotti as a generic word for any type of cookies. Around the world however, \"biscotti\" is synonymous with the crunchy almond cookies that get the name from their original method of baking \"Bis-Cotti\" or \"Twice-Cooked.\" As its name implies, these cookies are in fact baked twice, first in the form of a log and then after they are sliced. These delicious, crisp cookies can be stored for long periods of time and come in a variety of flavors. The following recipe, from my Molise Region, is just one of the many traditional recipes.",
  "ingredients": [
    "flour",
    "sugar",
    "unblanched whole almonds (ground fine in a blender or food processor)",
    "baking powder",
    "baking soda",
    "cinnamon",
    "salt (optional)",
    "unblanched whole almonds (roasted)",
    "honey",
    "lukewarm water",
    "almond extract (optional) or vanilla (optional)"
  ],
  "amount": [
    2,
    0.75,
    0.75

In [40]:
#Transform output to pandas dataframe and save as CSV file

# Clean up Azure OpenAI Output
json_data = result.strip('` \n')

if json_data.startswith('json'):
    json_data = json_data[4:]  # Remove the first 4 characters 'json'

omni_ingredients_from_json = json.loads(json_data)
omni_ingredients = pd.json_normalize(omni_ingredients_from_json)
# path_output = r"/workspaces/azure-openai-lab/data/omni-ingredients.csv"
# omni_ingredients.to_csv(path_output, sep='\t', encoding='utf-8', index=False)