# Working with Images with GPT

In [1]:
%pip install python-dotenv langchain-openai langchain-core langchain ipython requests pillow openai



In [2]:
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI

In [3]:
load_dotenv('template.env')

True

In [4]:
openai_api_key = os.getenv("OPENAI_API_KEY")
gpt = ChatOpenAI(model='gpt-4')

In [21]:
sample_image_url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/a/a5/Tsunami_by_hokusai_19th_century.jpg/2880px-Tsunami_by_hokusai_19th_century.jpg'

<img src="https://upload.wikimedia.org/wikipedia/commons/thumb/a/a5/Tsunami_by_hokusai_19th_century.jpg/2880px-Tsunami_by_hokusai_19th_century.jpg" width="400">


In [22]:
# Tracing
tracing = os.getenv("LANGCHAIN_TRACING_V2")
langsmith = os.getenv("LANGCHAIN_API_KEY")

In [8]:
from langchain.schema import HumanMessage, SystemMessage

system_message = SystemMessage(content="You are a very critical art connoisseur who specializes in historical artworks")
user_message = HumanMessage(content="What is this painting about?")
image_message = HumanMessage(content=f"Image URL: {sample_image_url}")

response = gpt(messages=[system_message, user_message, image_message], max_tokens=4000)

print(response.content)

  response = gpt(messages=[system_message, user_message, image_message], max_tokens=4000)


This is the iconic work "The Great Wave off Kanagawa" by the master of ukiyo-e, Katsushika Hokusai. It is the first print of his series Thirty-Six Views of Mount Fuji, created around the early 1830s. 

The painting depicts an enormous wave threatening boats near the Japanese prefecture of Kanagawa. Mount Fuji can be seen in the background, its small, static presence contrasting dramatically with the dynamic, powerful wave, which is the main subject of the piece. Hokusai's innovative use of a variety of blue shades, including the novel Berlin blue, creates a striking, dramatic contrast that enhances the sense of movement and danger.

Hokusai's work is a fine example of the tension and harmony between man, nature, and the passing of time, common themes in Japanese art. Despite its visual simplicity, the painting offers various layers of interpretation, from the struggle of man against the overwhelming forces of nature to the aesthetic beauty of the wave, which, despite its destructive po

In [9]:
print(response)

content='This is the iconic work "The Great Wave off Kanagawa" by the master of ukiyo-e, Katsushika Hokusai. It is the first print of his series Thirty-Six Views of Mount Fuji, created around the early 1830s. \n\nThe painting depicts an enormous wave threatening boats near the Japanese prefecture of Kanagawa. Mount Fuji can be seen in the background, its small, static presence contrasting dramatically with the dynamic, powerful wave, which is the main subject of the piece. Hokusai\'s innovative use of a variety of blue shades, including the novel Berlin blue, creates a striking, dramatic contrast that enhances the sense of movement and danger.\n\nHokusai\'s work is a fine example of the tension and harmony between man, nature, and the passing of time, common themes in Japanese art. Despite its visual simplicity, the painting offers various layers of interpretation, from the struggle of man against the overwhelming forces of nature to the aesthetic beauty of the wave, which, despite its

In [11]:
print(response.usage_metadata)

{'input_tokens': 81, 'output_tokens': 293, 'total_tokens': 374, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}



### Example Response

`-- Your response may be different`

This is a classic piece of Japanese art known as "The Great Wave off Kanagawa," which is part of the series "Thirty-six Views of Mount Fuji" by the esteemed ukiyo-e artist Katsushika Hokusai. As a critical connoisseur, I must note the remarkable composition and bold use of color that makes this woodblock print an iconic masterpiece of art history.

The painting captures an enormous wave threatening boats near the Japanese prefecture of Kanagawa. Although it appears to be a menacing wave, the composition cleverly situates Mount Fuji in the background, which is dwarfed in size by the wave but stands resolute and immovable, symbolizing the enduring power of nature and possibly also the spirit of Japan.

Hokusai's genius is evident in the dynamic curvature of the wave and the foamy tendrils that almost resemble clawed fingers, conjuring a visceral response from the viewer as if one can almost feel the drop in the stomach that accompanies a ship at the mercy of the ocean's might.

The intricacy with which he has depicted the waves and the delicate details of the foam against the simple elegance of Mount Fuji contributes to a contrast that is both dramatic and tranquil, a duality that defines the relationship between humanity and nature.


-----------------
You can also send a local image to the API, it requires some transformations

Generating Images with Dall-E over the API




In [12]:
import os
import requests
from PIL import Image
import json

In [13]:
%pip install openai



In [14]:
import os
from openai import OpenAI

# Set your OpenAI API key from environment variables
openai_api_key = os.getenv("OPENAI_API_KEY")

# Initialize the OpenAI client (directly, as image generation is outside the scope of LangChain's chat model)
client = OpenAI(api_key=openai_api_key)

# Define parameters for the image generation
model = "dall-e-3"
prompt = "shinto shrine in the middle of the lake with fuji in the background in autumn"
n = 1
size = "1024x1024"
quality = "standard"
style = "vivid"

# Generate image using the OpenAI API
response = client.images.generate(
    model=model,
    prompt=prompt,
    n=n,
    size=size,
    quality=quality,
    style=style
)

response

ImagesResponse(created=1730970045, data=[Image(b64_json=None, revised_prompt="Visually depict a serene autumn landscape where a traditional Shinto shrine resides in the middle of a calm, mirror-like lake. The majestic Mount Fuji, partially shrouded in low mist, forms a grand backdrop. Sunlight filters through colorful fall foliage, casting a magical glow on the scene. The shrine appears tranquil and pristine, while the vibrant autumn colors reflected on the lake's surface enhance its sacred ambiance. The image represents a peaceful coexistence of man-made structures and the stunning natural environment.", url='https://oaidalleapiprodscus.blob.core.windows.net/private/org-IVoRW5oM0GIt2nR5ZPAIBLr5/user-SV9GulA67H25pVFgyav0y8ia/img-GcsNArhvmynNEgXkfKBgIeHR.png?st=2024-11-07T08%3A00%3A45Z&se=2024-11-07T10%3A00%3A45Z&sp=r&sv=2024-08-04&sr=b&rscd=inline&rsct=image/png&skoid=d505667d-d6c1-4a0a-bac7-5c84a87759f8&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2024-11-06T20%3A01%3A51Z&ske=2024-1

In [15]:
# Extract the generated image URL from the response
json_response = json.loads(response.model_dump_json())

In [16]:
json_response

{'created': 1730970045,
 'data': [{'b64_json': None,
   'revised_prompt': "Visually depict a serene autumn landscape where a traditional Shinto shrine resides in the middle of a calm, mirror-like lake. The majestic Mount Fuji, partially shrouded in low mist, forms a grand backdrop. Sunlight filters through colorful fall foliage, casting a magical glow on the scene. The shrine appears tranquil and pristine, while the vibrant autumn colors reflected on the lake's surface enhance its sacred ambiance. The image represents a peaceful coexistence of man-made structures and the stunning natural environment.",
   'url': 'https://oaidalleapiprodscus.blob.core.windows.net/private/org-IVoRW5oM0GIt2nR5ZPAIBLr5/user-SV9GulA67H25pVFgyav0y8ia/img-GcsNArhvmynNEgXkfKBgIeHR.png?st=2024-11-07T08%3A00%3A45Z&se=2024-11-07T10%3A00%3A45Z&sp=r&sv=2024-08-04&sr=b&rscd=inline&rsct=image/png&skoid=d505667d-d6c1-4a0a-bac7-5c84a87759f8&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2024-11-06T20%3A01%3A51Z&ske=202

In [17]:
# Set the directory for the stored image
image_dir = os.path.join(os.curdir, 'media')

# Initialize the image path (note the filetype should be png)
image_path = os.path.join(image_dir, 'generated_image.png')

# Retrieve the generated image
image_url = json_response["data"][0]["url"]  # extract image URL from response
generated_image = requests.get(image_url).content  # download the image
with open(image_path, "wb") as image_file:
    image_file.write(generated_image)

FileNotFoundError: [Errno 2] No such file or directory: './media/generated_image.png'

In [None]:
# Display the image in the default image viewer
image = Image.open(image_path)
image.show()

In [18]:
from IPython.display import Image
Image(filename=image_path)

FileNotFoundError: [Errno 2] No such file or directory: './media/generated_image.png'

In [None]:
from IPython.display import Image

# Now let's generate some our avatar
model = "dall-e-3"
n = 1
size = "1024x1024"
quality = "standard"
style = "vivid"
prompt = """
Generate a cartoon image with the following description of myslef: The image features a female with
brown long hair with curtain bangs, smiling broadly at the camera.

I wear transparent wide frame glasses.

I have a rather light Asian complex and appear to be cheerful. The individual is wearing a black
shirt.

The photo's plain and white background accentuates the subject's features.
The overall appearance suggests a casual or professional setting, perhaps intended for an
identification document or a profile picture.
"""

response = client.images.generate(
    model=model,
    prompt=prompt,
    n=n,
    size=size,
    quality=quality,
    style=style
)

response.image.show()

ImagesResponse(created=1730970382, data=[Image(b64_json=None, revised_prompt='Generate a cartoon image of a cheerful East Asian woman with long, brown hair styled with curtain bangs. She is grinning widely and wearing translucent broad frame glasses. The focus of the image is her light complexion and the black shirt she wears, contrasting with a simple white background. The setting could indicate a casual or professional ambiance, as it might be used for identification documents or profile pictures.', url='https://oaidalleapiprodscus.blob.core.windows.net/private/org-IVoRW5oM0GIt2nR5ZPAIBLr5/user-SV9GulA67H25pVFgyav0y8ia/img-6DPLWC5g7vi27D8R3qKc2hNJ.png?st=2024-11-07T08%3A06%3A22Z&se=2024-11-07T10%3A06%3A22Z&sp=r&sv=2024-08-04&sr=b&rscd=inline&rsct=image/png&skoid=d505667d-d6c1-4a0a-bac7-5c84a87759f8&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2024-11-06T20%3A23%3A08Z&ske=2024-11-07T20%3A23%3A08Z&sks=b&skv=2024-08-04&sig=GyhQu6ppm%2BnS08WRvDqA4IQFZKNsXF7Kr3cdE8sTgIA%3D')])

In [None]:
json_response = json.loads(response.model_dump_json())

In [None]:
json_response