In [None]:
# Copyright 2026 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Gemini 3.1 Flash Image (Nano Banana üçå) Generation on Vertex AI

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/gemini/getting-started/intro_gemini_3_1_flash_image_gen.ipynb">
      <img width="32px" src="https://www.gstatic.com/pantheon/images/bigquery/welcome_page/colab-logo.svg" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fgenerative-ai%2Fmain%2Fgemini%2Fgetting-started%2Fintro_gemini_3_1_flash_image_gen.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/gemini/getting-started/intro_gemini_3_1_flash_image_gen.ipynb">
      <img src="https://www.gstatic.com/images/branding/gcpiconscolors/vertexai/v1/32px.svg" alt="Vertex AI logo"><br> Open in Vertex AI Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/getting-started/intro_gemini_3_1_flash_image_gen.ipynb">
      <img width="32px" src="https://raw.githubusercontent.com/primer/octicons/refs/heads/main/icons/mark-github-24.svg" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
</table>

<div style="clear: both;"></div>

<b>Share to:</b>

<a href="https://www.linkedin.com/sharing/share-offsite/?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/getting-started/intro_gemini_3_1_flash_image_gen.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/8/81/LinkedIn_icon.svg" alt="LinkedIn logo">
</a>

<a href="https://bsky.app/intent/compose?text=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/getting-started/intro_gemini_3_1_flash_image_gen.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/7/7a/Bluesky_Logo.svg" alt="Bluesky logo">
</a>

<a href="https://twitter.com/intent/tweet?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/getting-started/intro_gemini_3_1_flash_image_gen.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/5a/X_icon_2.svg" alt="X logo">
</a>

<a href="https://reddit.com/submit?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/getting-started/intro_gemini_3_1_flash_image_gen.ipynb" target="_blank">
  <img width="20px" src="https://redditinc.com/hubfs/Reddit%20Inc/Brand/Reddit_Logo.png" alt="Reddit logo">
</a>

<a href="https://www.facebook.com/sharer/sharer.php?u=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/getting-started/intro_gemini_3_1_flash_image_gen.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/51/Facebook_f_logo_%282019%29.svg" alt="Facebook logo">
</a>

| Author |
| --- |
| [Katie Nguyen](https://github.com/katiemn) |

## Overview
This notebook will show you how to use the Gemini 3.1 Flash Image (Nano Banana) image model. This model is a powerful, generalist multimodal model that offers state-of-the-art image generation and conversational image editing capabilities. It's also able to show its work, allowing you to set the thinking level and see the 'thought process' behind the generated output.

In this tutorial, you'll learn how to use gemini-3.1-flash-image-preview in Vertex AI using the Google Gen AI SDK to try out the following scenarios:

- Image generation:
  - Text-to-image generation
  - Model thoughts
  - Grounding with web search
  - Image sizing
- Image editing:
  - Multi-turn image editing (chat)
  - Editing with reference images

## Get started

### Install Google Gen AI SDK for Python


In [None]:
%pip install --upgrade --quiet google-genai

### Authenticate your notebook environment (Colab only)

If you are running this notebook on Google Colab, run the following cell to authenticate your environment.

In [2]:
import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()

### Import libraries

In [3]:
from IPython.display import Image, Markdown, display
from google import genai
from google.genai import types

import warnings
warnings.filterwarnings("ignore")

### Set Google Cloud project information and create client

To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).

Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

In [4]:
import os

PROJECT_ID = "[your-project-id]"  # @param {type: "string", placeholder: "[your-project-id]", isTemplate: true}
if not PROJECT_ID or PROJECT_ID == "[your-project-id]":
    PROJECT_ID = str(os.environ.get("GOOGLE_CLOUD_PROJECT"))

LOCATION = "global"

client = genai.Client(vertexai=True, project=PROJECT_ID, location=LOCATION)

### Load the model

In [5]:
MODEL_ID="gemini-3.1-flash-image-preview"

## Image generation

### Text-to-image

In the cell below, you'll call the `generate_content` method and modify the following arguments:

  - `prompt`: A text-only user message describing the image to be generated.
  - `config`: A config for specifying content settings.
    - `response_modalities`: To generate an image, you must include `IMAGE` in the `response_modalities` list. To get both text and images, specify `IMAGE` and `TEXT`.
    - `ImageConfig`: Set the `aspect_ratio`. Valid ratios are: 1:1, 3:2, 2:3, 3:4, 4:3, 1:4, 4:1, 4:5, 5:4, 1:8, 8:1, 9:16, 16:9, 21:9
    - `ThinkingConfig`: Set the `thinking_level` to `HIGH` or `MINIMAL`, and if you'd like to view the model thoughts, set `include_thoughts` to `True`.
      - `HIGH`: Allows the model to use more tokens for thinking and is suitable for complex prompts requiring deep reasoning.
      - `MINIMAL`: Constrains the model to use as few tokens as possible for thinking and is best used for low-complexity tasks.


All generated images include a [SynthID watermark](https://deepmind.google/technologies/synthid/), which can be verified via the Media Studio in [Vertex AI Studio](https://cloud.google.com/generative-ai-studio?hl=en).

In [None]:
prompt = """
A high-contrast, grainy black and white street photography shot. A woman in dark sunglasses is captured in mid-stride with elegant motion blur. Overlaid on the image are large, white, pillowy bubble lines that curve around her to trace her silhouette. A word is added to the top of the image in the same white, bubble font: STYLE.
"""
aspect_ratio = "3:2"

response = client.models.generate_content(
    model=MODEL_ID,
    contents=prompt,
    config=types.GenerateContentConfig(
        response_modalities=['IMAGE', 'TEXT'],
        image_config=types.ImageConfig(
            aspect_ratio=aspect_ratio,
            output_mime_type="image/png",
        ),
        thinking_config=types.ThinkingConfig(
            include_thoughts=True,
            thinking_level=types.ThinkingLevel.HIGH
        )
    ),

)

# Check for errors if an image is not generated
if response.candidates[0].finish_reason != types.FinishReason.STOP:
    reason = response.candidates[0].finish_reason
    raise ValueError(f"Prompt Content Error: {reason}")

for part in response.candidates[0].content.parts:
    if part.thought:
        continue # Skip displaying thoughts
    if part.inline_data:
        display(Image(data=part.inline_data.data, width=500))

### See the thoughts

Since this is a thinking model, you can check the thoughts that led to the image being produced.

In [None]:
for part in response.parts:
  if part.thought:
    if part.text:
      display(Markdown(part.text))
    elif part.inline_data:
      display(Image(data=part.inline_data.data, width=500))

### Grounding with web search results

With this model, you can also generate responses that are grounded in the results of a Google Search.

To display the grounding data, use the helper function in the following cell.

In [14]:
def print_search_grounding_data(response: types.GenerateContentResponse) -> None:
    """Prints Gemini response with grounding citations in Markdown format."""
    grounding_metadata = response.candidates[0].grounding_metadata
    lines = []

    if response.text:
        # Citation indexes are in bytes
        ENCODING = "utf-8"
        text_bytes = response.text.encode(ENCODING)
        last_byte_index = 0

        if grounding_metadata.grounding_supports:
            for support in grounding_metadata.grounding_supports:
                lines.append(
                    text_bytes[last_byte_index : support.segment.end_index].decode(ENCODING)
                )

                # Generate and append citation footnotes (e.g., "[1][2]")
                footnotes = "".join([f"[{i + 1}]" for i in support.grounding_chunk_indices])
                lines.append(f" {footnotes}")

                # Update index for the next segment
                last_byte_index = support.segment.end_index

        # Append any remaining text after the last citation
        if last_byte_index < len(text_bytes):
            lines.append(text_bytes[last_byte_index:].decode(ENCODING))

    lines.append("\n\n----\n## Grounding Sources\n")

    if grounding_metadata.grounding_chunks:
        # Build Grounding Sources Section
        lines.append("### Grounding Chunks\n")
        for i, chunk in enumerate(grounding_metadata.grounding_chunks, start=1):
            context = chunk.web or chunk.retrieved_context or chunk.maps
            if not context:
                continue
            uri = context.uri
            title = context.title or "Source"

            # Convert GCS URIs to public HTTPS URLs
            if uri:
                uri = uri.replace(" ", "%20")
                if uri.startswith("gs://"):
                    uri = uri.replace(
                        "gs://", "https://storage.googleapis.com/", 1
                    )

            lines.append(f"{i}. [{title}]({uri})\n")
            if hasattr(context, "place_id") and context.place_id:
                lines.append(f"    - Place ID: `{context.place_id}`\n\n")
            if hasattr(context, "text") and context.text:
                lines.append(f"{context.text}\n\n")

    # Add Search/Retrieval Queries
    if grounding_metadata.web_search_queries:
        lines.append(
            f"\n**Web Search Queries:** {grounding_metadata.web_search_queries}\n"
        )
        if grounding_metadata.search_entry_point:
            lines.append(
                f"\n**Search Entry Point:**\n{grounding_metadata.search_entry_point.rendered_content}\n"
            )
    elif grounding_metadata.retrieval_queries:
        lines.append(
            f"\n**Retrieval Queries:** {grounding_metadata.retrieval_queries}\n"
        )

    display(Markdown("".join(lines)))

Next, you'll create a Google Search tool and include it in the `tools` parameter of the following request to perform a web search.

In [None]:
prompt = """
A cinematic, wide-angle illustration of the stadium of the NFL team that won the 2026 Super Bowl. Digital art style with high-detail architecture.
"""
google_search = types.Tool(google_search=types.GoogleSearch(
    search_types=types.SearchTypes(
        web_search=types.WebSearch()
    )
))
response = client.models.generate_content(
    model=MODEL_ID,
    contents=prompt,
    config=types.GenerateContentConfig(
        response_modalities=['TEXT', 'IMAGE'],
        image_config=types.ImageConfig(
            aspect_ratio="21:9",
        ),
        thinking_config=types.ThinkingConfig(
            include_thoughts=True,
            thinking_level=types.ThinkingLevel.HIGH
        ),
        tools=[google_search],
    )
)

for part in response.candidates[0].content.parts:
    if part.text:
        display(Markdown(part.text))
    if part.inline_data:
        display(Image(data=part.inline_data.data, width=500))

print_search_grounding_data(response)

### Image sizes

You can have Nano Banana generate images with an `image_size` of `512`, `1K`, `2K`, or `4K`.

In [None]:
prompt = """
Macro photography of a vibrant teal and lime green chameleon perched on a mossy branch.
"""

response = client.models.generate_content(
    model=MODEL_ID,
    contents=prompt,
    config=types.GenerateContentConfig(
        response_modalities=['TEXT', 'IMAGE'],
        image_config=types.ImageConfig(
            aspect_ratio="1:1",
            image_size="4K",
        ),
    )
)

for part in response.candidates[0].content.parts:
    if part.thought:
        continue
    if part.inline_data:
        display(Image(data=part.inline_data.data, width=500))

## Image editing

You can also edit images with this model; simply pass the original image as part of the prompt.

### Multi-turn image editing (chat)

In this next section, you'll generate a starting image and iteratively alter certain aspects of the image by chatting with the model.

In [None]:
chat = client.chats.create(
    model=MODEL_ID,
    config=types.GenerateContentConfig(
        response_modalities=['TEXT', 'IMAGE']
    )
)

message = "A minimalist overhead shot of two dense florist buckets on a smooth grey pavement. One contains a bouquet of pink baby‚Äôs breath, and the other contains a cluster of pale blush poppies."
response = chat.send_message(message)

for part in response.candidates[0].content.parts:
    if part.thought:
        continue
    if part.inline_data:
        display(Image(data=part.inline_data.data, width=500))

Now, you'll send a new message in the existing chat to update the previously generated image.

In [None]:
message = "Change the poppies to white dahlias and add a third bucket of pink hydrangeas in the top right corner."
response = chat.send_message(message)

for part in response.candidates[0].content.parts:
    if part.thought:
        continue
    if part.inline_data:
        display(Image(data=part.inline_data.data, width=500))

### Multiple reference images

With Nano Banana, you can include up to 14 reference images in a request to generate a new image that preserves the content of the original images.

Run the following cell to visualize the starting images stored in Cloud Storage.

In [None]:
import requests
from PIL import Image as PIL_Image
from io import BytesIO
import matplotlib.pyplot as plt

image_urls = [
    "https://storage.googleapis.com/cloud-samples-data/generative-ai/image/woman.jpeg",
    "https://storage.googleapis.com/cloud-samples-data/generative-ai/image/black-boots.jpg",
    "https://storage.googleapis.com/cloud-samples-data/generative-ai/image/black-bag.jpg",
    "https://storage.googleapis.com/cloud-samples-data/generative-ai/image/shirt.jpg",
    "https://storage.googleapis.com/cloud-samples-data/generative-ai/image/jacket.jpg",
    "https://storage.googleapis.com/cloud-samples-data/generative-ai/image/white-pants.jpg",
]

fig, axes = plt.subplots(2, 3, figsize=(12, 8))
for i, ax in enumerate(axes.flatten()):
    ax.imshow(PIL_Image.open(BytesIO(requests.get(image_urls[i]).content)))
    ax.axis("off")
plt.show()

The process for sending the request is similar to previous image editing calls. The main difference is that you will provide multiple `Part.from_uri` instances, one for each reference image.

In [None]:
response = client.models.generate_content(
    model=MODEL_ID,
    contents=[
        types.Part.from_uri(
            file_uri="gs://cloud-samples-data/generative-ai/image/woman.jpeg",
            mime_type="image/jpeg",
        ),
        types.Part.from_uri(
            file_uri="gs://cloud-samples-data/generative-ai/image/black-boots.jpg",
            mime_type="image/png",
        ),
        types.Part.from_uri(
            file_uri="gs://cloud-samples-data/generative-ai/image/black-bag.jpg",
            mime_type="image/png",
        ),
        types.Part.from_uri(
            file_uri="gs://cloud-samples-data/generative-ai/image/shirt.jpg",
            mime_type="image/png",
        ),
        types.Part.from_uri(
            file_uri="gs://cloud-samples-data/generative-ai/image/jacket.jpg",
            mime_type="image/jpeg",
        ),
        types.Part.from_uri(
            file_uri="gs://cloud-samples-data/generative-ai/image/white-pants.jpg",
            mime_type="image/png",
        ),
        types.Part.from_uri(
            file_uri="gs://cloud-samples-data/generative-ai/image/white-pants.jpg",
            mime_type="image/png",
        ),
        "Generate an image of a woman wearing white pants, a navy blue shirt, a cropped trench coat, and black boots. She's carrying a black bag. The outline of the woman is cropped against a piece of cardstock with a giant paperclip attached at the top.",
    ],
    config=types.GenerateContentConfig(
        response_modalities=["TEXT", "IMAGE"],
        image_config=types.ImageConfig(
            aspect_ratio="4:3",
        ),
    ),
)

for part in response.candidates[0].content.parts:
    if part.text:
        display(Markdown(part.text))
    if part.inline_data:
        display(Image(data=part.inline_data.data, width=500))