In [None]:
#————————————————————

# Name: Azure OpenAI API, GPT4o, Image to Text

# Purpose: This notebook will use Azure OpenAI GPT4o to turn the contents of an image or images into a list of ingridients, which in turn will output a recipe.

# Company: Allgeier Schweiz AG
# Author: Nicolas Rehder (nrehder@allgeier.ch), Alex Dean (adean@allgeier.ch)
# Create for: SDSC 2024
# Date Created: 22.01.2024
# Last Updated: 24.05.2024
# Python Version: 3.10.4

# Troubleshooting:
# https://community.openai.com/t/issue-with-useage-of-json-output-an-citation/584189
# https://alexholmeset.blog/2024/05/22/use-the-azure-openai-gpt-4o-all-in-one-model-with-powershell/

# Additionals:

# If necessary, download Python packages (run the below command in terminal if packages have not yet been installed)
# pip install -r C:\Python\openai-lab\support\requirements\requirements.txt

#————————————————————

In [1]:
# Import Python packages
import os
import io
import time
import base64
from io import StringIO
import json
from dotenv import load_dotenv, find_dotenv
from pathlib import Path
import pandas as pd
from openai import AzureOpenAI
import sys

In [2]:
# Load required variables from .env file.
load_dotenv(dotenv_path=Path("/workspaces/azure-openai-lab/.venv/.env")) #Error sometimes due to \ or \\. Try one or the other. "C:\\Python\\azure-openai-lab\\.venv\\.env"

# Load Azure OpenAI Key and Endpoint. These values can be found within the Azure OpenAI Service resource in portal.azure.com under Keys and Endpoint
azure_oai_key = os.environ['AZURE_OPENAI_KEY']
azure_oai_endpoint = os.environ['AZURE_OPENAI_ENDPOINT']

In [3]:
# Initialize the Azure OpenAI client
client = AzureOpenAI(
        azure_endpoint = azure_oai_endpoint, 
        api_key=azure_oai_key,  
        api_version= "2024-04-01-preview" #"2024-05-13"
        )

In [35]:
# Prepare image for Azure OpenAI model
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')
  
# Image of a refrigerator with foods
refrigerator = encode_image(f"/workspaces/azure-openai-lab/images/products/refrigerator.jpg") #f"C:\\Python\\azure-openai-lab\\images\\products\\refrigerator.jpg"

# Image of individual food products  
avocado = encode_image(f"/workspaces/azure-openai-lab/images/products/avocado.jpg") #f"C:\\Python\\azure-openai-lab\\images\\products\\avocado.jpg"
tofu = encode_image(f"/workspaces/azure-openai-lab/images/products/tofu.jpg") #f"C:\\Python\\azure-openai-lab\\images\\products\\tofu.jpg"
broccoli = encode_image(f"/workspaces/azure-openai-lab/images/products/broccoli.jpg") #f"C:\\Python\\azure-openai-lab\\images\\products\\broccoli.jpg"
chili = encode_image(f"/workspaces/azure-openai-lab/images/products/chili.jpg") #f"C:\\Python\\azure-openai-lab\\images\\products\\chili.jpg"
coconut_milk = encode_image(f"/workspaces/azure-openai-lab/images/products/coconut_milk.jpg") #f"C:\\Python\\azure-openai-lab\\images\\products\\coconut_milk.jpg"
soy_sauce = encode_image(f"/workspaces/azure-openai-lab/images/products/soy_sauce.jpg") #f"C:\\Python\\azure-openai-lab\\images\\products\\soy_sauce.jpg"

In [29]:
# Generate a list of ingredients from individual food product images.

# Send request to Azure OpenAI model
response = client.chat.completions.create(
    model="gpt-4o",
    temperature=0.7,
    #max_tokens=120,
    messages=[
            {
            "role": "user",
            "content": [
                {"type": "text", "text": "You are a helpful cook. Analyze the provided images, determine the food product being depicted and create a list of all products"},
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{avocado}"
                    },
                },
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{tofu}"
                    },
                },
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{broccoli}"
                    },
                },
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{chili}"
                    },
                },
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{coconut_milk}"
                    },
                },
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{soy_sauce}"
                    },
                },                                                                
            ],
        }
    ]
)

result = response.choices[0].message.content
print(result + "\n")

'The provided images depict the following food products:\n\n1. Tropical Avocado\n2. Tofu (Nature)\n3. Broccoli (Swiss)\n4. Hot Chili\n5. Coconut Milk\n6. Soy Sauce\n\nHere is the list of all the products:\n\n1. Tropical Avocado\n2. Tofu (Nature)\n3. Broccoli (Swiss)\n4. Hot Chili\n5. Coconut Milk\n6. Soy Sauce'

In [32]:
# Generate a list of ingredients from individual food product images.

# Create advanced System prompt
systemcontent = \
"""
### Instructions
1. Analyze the provided images.
2. Determine the food product being depicted.
3. Count the numbers of invidual food product items in bowls or vessels.

### Output format
Return a JSON array with the following format:
{"name":"",amount:"", units:"", expiration_days:}

The variables should contain the following information:
- name: the name of the product in each image.
- amount: the number of products in each image.
- units: the unit of the product in each image using the metric system.
- expiration_days: the expiration date of the product in each image in average number of days.

"""

# Send request to Azure OpenAI model
response = client.chat.completions.create(
    model="gpt-4o",
    temperature=0.7,
    #max_tokens=120,
    messages=[
            {
            "role": "user",
            "content": [
                {"type": "text", "text": systemcontent},
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{avocado}"
                    },
                },
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{tofu}"
                    },
                },
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{broccoli}"
                    },
                },
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{chili}"
                    },
                },
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{coconut_milk}"
                    },
                },
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{soy_sauce}"
                    },
                },                                                                
            ],
        }
    ]
)

result = response.choices[0].message.content
print(result + "\n")

In [39]:
# Generate a list of ingredients from a single image containing multiple food product.

# Create advanced System prompt
systemcontent = \
"""
### Instructions
1. Analyze the provided images.
2. Determine the food product being depicted.
3. Count the numbers of invidual food product items in bowls or vessels.

### Output format
Return a JSON array with the following format:
{"name":"",amount:"", units:"", expiration_days:}

The variables should contain the following information:
- name: the name of the product in each image.
- amount: the number of products in each image.
- units: the unit of the product in each image using the metric system.
- expiration_days: the expiration date of the product in each image in average number of days.

"""

# Send request to Azure OpenAI model
response = client.chat.completions.create(
    model="gpt-4o",
    temperature=0.7,
    #max_tokens=120,
    messages=[
            {
            "role": "user",
            "content": [
                {"type": "text", "text": systemcontent},
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{refrigerator}"
                    },
                },                                                               
            ],
        }
    ]
)

result = response.choices[0].message.content
print(result + "\n")

In [40]:
#Transform output to pandas dataframe and save as CSV file

# Clean up Azure OpenAI Output
json_data = result.strip('` \n')

if json_data.startswith('json'):
    json_data = json_data[4:]  # Remove the first 4 characters 'json'

omni_ingredients_from_json = json.loads(json_data)
omni_ingredients = pd.json_normalize(omni_ingredients_from_json)
# path_output = r"/workspaces/azure-openai-lab/data/omni-ingredients.csv"
# omni_ingredients.to_csv(path_output, sep='\t', encoding='utf-8', index=False)