In [None]:
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Visual Question Answering (VQA) with Imagen on Vertex AI

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/vision/getting-started/visual_question_answering.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Google Colaboratory logo"><br> Run in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/generative-ai/blob/main/vision/getting-started/visual_question_answering.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/vision/getting-started/visual_question_answering.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo"><br> Open in Vertex AI Workbench
    </a>
  </td>
</table>


| | |
|-|-|
|Author(s) | [Thu Ya Kyaw](https://github.com/iamthuya) |

## Overview

[Imagen on Vertex AI](https://cloud.google.com/vertex-ai/docs/generative-ai/image/overview) (image Generative AI) offers a variety of features:
- Image generation
- Image editing
- Visual captioning
- Visual question answering

This notebook focuses on **visual question answering** only.

[Visual question answering (VQA) with Imagen](https://cloud.google.com/vertex-ai/docs/generative-ai/image/visual-question-answering) can understand the content of an image and answer questions about it. The model takes in an image and a question as input, and then using the image as context to produce one or more answers to the question.

The visual question answering (VQA) can be used for a variety of use cases, including:
- assisting the visually impaired to gain more information about the images
- answering customer questions about products or services in the image
- creating interactive learning environment and providing interactive learning experiences

### Objectives

In this notebook, you will learn how to use the Vertex AI Python SDK to:

- Answering questions about images using the Imagen's visual question answering features

- Experiment with different parameters, such as:
    - number of answers to be provided by the model

### Costs

This tutorial uses billable components of Google Cloud:
- Vertex AI (Imagen)

Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing) and use the [Pricing Calculator](https://cloud.google.com/products/calculator/) to generate a cost estimate based on your projected usage.

## Getting Started

### Install Vertex AI SDK, other packages and their dependencies

In [1]:
%pip install --upgrade --user google-cloud-aiplatform

Collecting google-cloud-aiplatform
  Downloading google_cloud_aiplatform-1.71.1-py2.py3-none-any.whl.metadata (32 kB)
Downloading google_cloud_aiplatform-1.71.1-py2.py3-none-any.whl (6.2 MB)
   ---------------------------------------- 0.0/6.2 MB ? eta -:--:--
   -------------------------------------- - 6.0/6.2 MB 30.7 MB/s eta 0:00:01
   ---------------------------------------- 6.2/6.2 MB 21.0 MB/s eta 0:00:00
Installing collected packages: google-cloud-aiplatform
  Attempting uninstall: google-cloud-aiplatform
    Found existing installation: google-cloud-aiplatform 1.70.0
    Uninstalling google-cloud-aiplatform-1.70.0:
      Successfully uninstalled google-cloud-aiplatform-1.70.0
Successfully installed google-cloud-aiplatform-1.71.1
Note: you may need to restart the kernel to use updated packages.




### Restart current runtime

To use the newly installed packages in this Jupyter runtime, you must restart the runtime. You can do this by running the cell below, which will restart the current kernel.

In [2]:
# # Restart kernel after installs so that your environment can access the new packages
# import IPython

# app = IPython.Application.instance()
# app.kernel.do_shutdown(True)

{'status': 'ok', 'restart': True}

<div class="alert alert-block alert-warning">
<b>⚠️ The kernel is going to restart. Please wait until it is finished before continuing to the next step. ⚠️</b>
</div>


### Authenticate your notebook environment (Colab only)

If you are running this notebook on Google Colab, you will need to authenticate your environment. To do this, run the new cell below. This step is not required if you are using [Vertex AI Workbench](https://cloud.google.com/vertex-ai-workbench).

In [None]:
# import sys

# if "google.colab" in sys.modules:
#     # Authenticate user to Google Cloud
#     from google.colab import auth

#     auth.authenticate_user()

### Define Google Cloud project information and initialize Vertex AI

Initialize the Vertex AI SDK for Python for your project:

In [1]:
# Define project information
PROJECT_ID = "sesac-24-101"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}

# Initialize Vertex AI
import vertexai

vertexai.init(project=PROJECT_ID, location=LOCATION)

### Load the image question answering model

The model names from Vertex AI Imagen have two components: model name and version number. The naming convention follows this format: `<model-name>@<version-number>`.

For example, `imagetext@001` represents the version **001** of the **imagetext** model.

In [2]:
from vertexai.preview.vision_models import ImageQnAModel

image_qna_model = ImageQnAModel.from_pretrained("imagetext@001")

### Load the image file

To use the image question answering model, you first need to create an `Image` class using the image file. The model only accepts `Image` class objects, so this is a necessary step before you can ask questions.

Additionally, [Visual Question Answering with Imagen](https://cloud.google.com/vertex-ai/docs/generative-ai/image/visual-question-answering) only accepts specific image file formats (e.g. PNG, JPEG), and may have file size limitations (e.g. 10 MB). You can find out specific details from the [official documentation](https://cloud.google.com/vertex-ai/docs/generative-ai/image/visual-question-answering#img-vqa-rest).

In [3]:
# from vertexai.preview.vision_models import Image

# # Use either a Google Cloud Storage URI or a local file path.
# file_path = (
#     "gs://github-repo/img/vision/google-cloud-next.jpeg"  # @param {type:"string"}
# )

# # Load the image file as Image object
# cloud_next_image = Image.load_from_file(file_path)
# cloud_next_image.show()

In [7]:
from PIL import Image
from PIL.ExifTags import TAGS
import os

# 이미지 파일 경로
# image_path = 'C:/Users/park0/Downloads/IMG_5008.JPG'
# output_path = 'image_metadata.txt'

def get_image_metadata(image_path):
    # 이미지 파일 열기
    image = Image.open(image_path)
    metadata = {}
    
    # EXIF 데이터 추출
    exif_data = image._getexif()
    if exif_data:
        for tag_id, value in exif_data.items():
            # 태그 ID를 사람이 읽을 수 있는 이름으로 변환
            tag_name = TAGS.get(tag_id, tag_id)
            metadata[tag_name] = value
    
    return metadata

# 메타데이터 추출 및 파일에 저장
folder = os.listdir('sample_data')
for file in folder:
    metadata = get_image_metadata(os.path.join('sample_data', file))
    with open(os.path.splitext(os.path.basename(file))[0] + '.txt', 'w') as file:
        for key, value in metadata.items():
            file.write(f"{key}: {value}\n")

print(f"메타데이터가 '{output_path}'에 저장되었습니다.")

FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\SBA\\img_content\\NIA_dataset02_000000778638.jpg'

In [None]:
from vertexai.preview.vision_models import Image

# Use either a Google Cloud Storage URI or a local file path.
file_path = (
    "gs://github-repo/img/vision/google-cloud-next.jpeg"  # @param {type:"string"}
)

# Load the image file as Image object
cloud_next_image = Image.load_from_file(file_path)
cloud_next_image.show()

### Ask questions about the image

Now ask questions about the image using the model:

In [4]:
# Ask a question about the image
image_qna_model.ask_question(
    image=cloud_next_image, question="What is happening in this image?"
)

['google cloud next']

In [None]:
# Ask a follow up question about the image
image_qna_model.ask_question(
    image=cloud_next_image, question="What are the people in the image doing?"
)

You can get up to three answers from a single image by changing the `number_of_results` parameter from 1 to 3.

In [None]:
# Get 3 answers from the image
image_qna_model.ask_question(
    image=cloud_next_image,
    question="What are the people in the image doing?",
    number_of_results=3,
)

## Try it yourself

You can also try using the visual question answering model with images of your choice. If you need to download an image file, you can use the provided auxiliary function `download_image`.

Feel free to experiment with different images and model parameters to see how the results change.

In [None]:
import os

import requests


def download_image(url: str) -> str:
    """Downloads an image from the specified URL."""

    # Send a get request to the url
    response = requests.get(url)

    if response.status_code != 200:
        raise Exception(f"Failed to download image from {url}")

    # Define image related variables
    image_path = os.path.basename(url)
    image_bytes = response.content
    image_type = response.headers["Content-Type"]

    # Check for image type, currently only PNG or JPEG format are supported
    if image_type not in {"image/png", "image/jpeg"}:
        raise ValueError("Image can only be in PNG or JPEG format")

    # Write image data to a file
    with open(image_path, "wb") as f:
        f.write(image_bytes)
    return image_path

In [None]:
# Download an image
url = "https://storage.googleapis.com/gweb-cloudblog-publish/images/GettyImages-871168786.max-2600x2600.jpg"
image_path = download_image(url)

In [None]:
# Load the newly downloaded image
user_image = Image.load_from_file(image_path)
user_image.show()

In [None]:
# Ask a question about the image
image_qna_model.ask_question(
    image=user_image,
    question="What is happening in this photo?",
    number_of_results=3,
)

In [None]:
# Ask a question about the image
image_qna_model.ask_question(
    image=user_image,
    question="What advertising channels would this image be suitable for?",
    number_of_results=3,
)

In [None]:
# Ask a question about the image
image_qna_model.ask_question(
    image=user_image,
    question="What type of insects could live in this area?",
    number_of_results=3,
)