In [5]:
import io
import os
import base64
import numpy as np
from langchain.chat_models import ChatOpenAI
from langchain.schema.messages import HumanMessage, AIMessage
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())

True

In [6]:
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

In [3]:
chain = ChatOpenAI(model="gpt-4-vision-preview", max_tokens=1024)


In [7]:
image = encode_image("./food.jpeg")


msg = chain.invoke(
    [   AIMessage(
        content="You are a useful bot that is especially good at OCR from images"
    ),
        HumanMessage(
            content=[
                {"type": "text", "text": "Identify all items on the this image which are food related and provide a list of what you see"},
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{image}"
                    },
                },
            ]
        )
    ]
)
print(msg.content)

In the image, I can identify the following food-related items:

1. A carton of eggs with several eggs inside.
2. A package of cheese (appears to be a wheel or wedge of cheese in plastic wrap).
3. Two zucchinis.
4. A mesh bag containing what looks like shallots or small red onions.
5. A package of sliced salami.
6. A pack of Greek olives (labeled "CHALKIDIKI" which refers to Chalkidiki olives).

These items suggest that someone may be preparing to cook a meal that involves these ingredients.


In [10]:
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.schema import StrOutputParser


chain = ChatOpenAI(model="gpt-3.5-turbo", max_tokens=1024)
prompt = PromptTemplate.from_template(
"""
You see the following foods in a product list:
{food}
Create a dish using only these ingredients and say what it is called. Only return the name of the dish.
No additional explanation or anything related. Just the name

Example:
'Pizza'
'Sushi'

Output:
"""
)
runnable = prompt | chain | StrOutputParser()

In [11]:
dish = runnable.invoke({"food": msg.content})
print(dish)

'Zucchini and Cheese Frittata'


In [13]:
from openai import OpenAI
client = OpenAI()

response = client.images.generate(
  model="dall-e-3",
  prompt=f"A nice candlelight dinner with {dish} for two persons",
  size="1024x1024",
  quality="standard",
  n=1,
)

image_url = response.data[0].url


In [14]:
print(image_url)

https://oaidalleapiprodscus.blob.core.windows.net/private/org-Q6tXelh9xR8sTrUZQ3cWF1c8/user-kFDQy0aIrkJp1d1FoXtSudK6/img-eUWNfkB6dttRfmgY0tYIfMIs.png?st=2023-11-09T17%3A42%3A30Z&se=2023-11-09T19%3A42%3A30Z&sp=r&sv=2021-08-06&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2023-11-09T08%3A28%3A46Z&ske=2023-11-10T08%3A28%3A46Z&sks=b&skv=2021-08-06&sig=tXRoKb6BEifXEBEYQkkq%2B5UnCtcFjN%2BtvsAPt9Jcxi4%3D
