Google Cloud > VertexAI > Workbench

* Gemini 1.0 Pro --- NLP tasks 
* Gemini 1.0 Pro-vision --- for vision tasks 

In [2]:
PROJECT_ID = "qwiklabs-gcp-01-b1a82f306fd5"  
LOCATION = "us-central1" 

import vertexai

vertexai.init(project=PROJECT_ID, location=LOCATION)

from vertexai.generative_models import GenerationConfig, GenerativeModel, Image, Part

### Gemini Pro Model 

In [None]:
model = GenerativeModel("gemini-1.0-pro")

responses = model.generate_content("Why is the sky blue?", stream=True)  # responses will be streamed as they are generated. 

for response in responses:
    print(response.text, end="")  # print the text 

In [None]:
prompt = """Create a numbered list of 10 items. Each item in the list should be a trend in the tech industry.

Each trend should be less than 5 words."""  

responses = model.generate_content(prompt, stream=True)

for response in responses:
    print(response.text, end="")

Model Parameters

In [None]:
generation_config = GenerationConfig(
    temperature=0.9,  # randomness of the prediction scaling the logits before before applying softmax. higher value increases the randomness 
    top_p = 1.0, # nucleus sampling, controls the cumulative probability threshold for token selection. top x% probable tokens
    top_k = 32, # number of top probable tokens to consider for sampling increasing the coherence by increasing the k
    candidate_count = 1, # number of candidate sequences to generate in parallel. 
    max_output_tokens = 8192,
)

responses = model.generate_content(
    "Why is the sky blue?",
    generation_config=generation_config,
    stream=True,
)

for response in responses:
    print(response.text, end="")

### Test Chat prompts

In [None]:
chat = model.start_chat()

prompt = """My name is Ned. You are my personal assistant. My favorite movies are Lord of the Rings and Hobbit.

Suggest another movie I might like.
"""

responses = chat.send_message(prompt, stream=True)

for response in responses:
    print(response.text, end="")

In [None]:
# follow up prompts

prompt = "Are my favorite movies based on a book series?"

responses = chat.send_message(prompt, stream=True)

for response in responses:
    print(response.text, end="")

To view the history 

In [None]:
print(chat.history)

### Gemini 1.0 Pro vision model 

In [None]:
multimodal_model = GenerativeModel("gemini-1.0-pro-vision")

### helper functions

In [10]:
import http.client  # for http request to interact with web servers
import typing # defining types for variables and functions, improving code readability and maintability 
import urllib.request # tools for working with URLS, potentially for constucting requests to access APIs. 

import IPython.display  # specific to IPYthon notebooks
from PIL import Image as PIL_Image 
from PIL import ImageOps as PIL_ImageOps # various Image operations and manipulations that might be usefull for processing images generated by the model. 

# Perfect coding --> var: type = value 

def display_images(
    images: typing.Iterable[Image],
    max_width: int = 600,
    max_height: int = 350,
) -> None:
    
    # its iterable now
    for image in images:
        pil_image = typing.cast(PIL_Image.Image, image._pil_image)
        
        #m converts the image to RGB if not already 
        if pil_image.mode != "RGB":  # check the "mode" of the image. 
            pil_image = pil_image.convert("RGB")
            
        # resizes the image
        image_width, image_height = pil_image.size
        if max_width < image_width or max_height < image_height:
            pil_image = PIL_ImageOps.contain(pil_image, (max_width, max_height))

        # with Ipython.display, diplay the image
        IPython.display.display(pil_image)


# output the bytes. Fetches image data from the URL
def get_image_bytes_from_url(image_url: str) -> bytes:
    
    # use urllib.request.urlopen()
    with urllib.request.urlopen(image_url) as response:
        # we need it as http.client.HTTpResponse 
        response = typing.cast(http.client.HTTPResponse, response)
        image_bytes = response.read()  # read the image 
        
    return image_bytes

# loads the image from the urls and converts it to an "Image" Object 
def load_image_from_url(image_url: str) -> Image:
    image_bytes = get_image_bytes_from_url(image_url)
    
    # use Image library 
    return Image.from_bytes(image_bytes)

# converts a google cloud storage (GCS) URI to a URL that can be used to fetch the image. 
def get_url_from_gcs(gcs_uri: str) -> str:   # GCS URI to convert 
    # converts gcs uri to url for image display.
    url = "https://storage.googleapis.com/" + gcs_uri.replace("gs://", "").replace(
        " ", "%20"
    )
    return url


def print_multimodal_prompt(contents: list):
    """
    Given contents that would be sent to Gemini,
    output the full multimodal prompt for ease of readability.
    """
    for content in contents:
        
        # imported from the vertexai.generative_models --> Image and Part 
        if isinstance(content, Image):  # use isinstance()
            display_images([content])
            
        elif isinstance(content, Part):
            url = get_url_from_gcs(content.file_data.file_uri)
            IPython.display.display(load_image_from_url(url))
        else:
            print(content)

In [None]:
# Download an image from Google Cloud Storage
! gsutil cp "gs://cloud-samples-data/generative-ai/image/320px-Felis_catus-cat_on_snow.jpg" ./image.jpg

# Load from local file
image = Image.load_from_file("image.jpg")

# Prepare contents
prompt = "Describe this image?"
contents = [image, prompt]  # make it as a list 

responses = multimodal_model.generate_content(contents, stream=True)

print("-------Prompt--------")
print_multimodal_prompt(contents)

print("\n-------Response--------")
for response in responses:
    print(response.text, end="")

In [None]:
# Load image from Cloud Storage URI
gcs_uri = "gs://cloud-samples-data/generative-ai/image/boats.jpeg"

# Prepare contents
image = Part.from_uri(gcs_uri, mime_type="image/jpeg")
prompt = "Describe the scene?"
contents = [image, prompt]

responses = multimodal_model.generate_content(contents, stream=True)

print("-------Prompt--------")
print_multimodal_prompt(contents)

print("\n-------Response--------")
for response in responses:
    print(response.text, end="")

In [None]:
# Load image from Cloud Storage URI
image_url = (
    "https://storage.googleapis.com/cloud-samples-data/generative-ai/image/boats.jpeg"
)
image = load_image_from_url(image_url)  # convert to bytes

# Prepare contents
prompt = "Describe the scene?"
contents = [image, prompt]

responses = multimodal_model.generate_content(contents, stream=True)

print("-------Prompt--------")
print_multimodal_prompt(contents)

print("\n-------Response--------")
for response in responses:
    print(response.text, end="")

In [None]:
# Load images from Cloud Storage URI
image1_url = "https://storage.googleapis.com/github-repo/img/gemini/intro/landmark1.jpg"
image2_url = "https://storage.googleapis.com/github-repo/img/gemini/intro/landmark2.jpg"
image3_url = "https://storage.googleapis.com/github-repo/img/gemini/intro/landmark3.jpg"
image1 = load_image_from_url(image1_url)
image2 = load_image_from_url(image2_url)
image3 = load_image_from_url(image3_url)

# Prepare prompts
prompt1 = """{"city": "London", "Landmark:", "Big Ben"}"""
prompt2 = """{"city": "Paris", "Landmark:", "Eiffel Tower"}"""

# Prepare contents
contents = [image1, prompt1, image2, prompt2, image3]

responses = multimodal_model.generate_content(contents, stream=True)

print("-------Prompt--------")
print_multimodal_prompt(contents)

print("\n-------Response--------")
for response in responses:
    print(response.text, end="")

In [None]:
file_path = "github-repo/img/gemini/multimodality_usecases_overview/pixel8.mp4"
video_uri = f"gs://{file_path}"
video_url = f"https://storage.googleapis.com/{file_path}"

IPython.display.Video(video_url, width=450)

In [None]:
prompt = """
Answer the following questions using the video only:
What is the profession of the main person?
What are the main features of the phone highlighted?
Which city was this recorded in?
Provide the answer JSON.
"""

video = Part.from_uri(video_uri, mime_type="video/mp4")
contents = [prompt, video]

responses = multimodal_model.generate_content(contents, stream=True)

for response in responses:
    print(response.text, end="")