In [None]:
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Text Embedding New API

<table align="left">

  <td>
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/generative_ai/text_embedding_new_api.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Colab logo"> Run in Colab
    </a>
  </td>
  <td>
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/generative_ai/text_embedding_new_api.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      View on GitHub
    </a>
  </td>
  <td>
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/official/generative_ai/text_embedding_new_api.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo">
      Open in Vertex AI Workbench
    </a>
  </td>                                                                                               
</table>

**_NOTE_**: This notebook has been tested in the following environment:

* Python version = 3.9

## Overview

This notebook is a code example for how to call our newly released text emebedding models (textembedding-gecko@latest and textembedding-gecko-multilingual@latest).

Learn more about [text embedding api](https://cloud.google.com/vertex-ai/docs/generative-ai/embeddings/get-text-embeddings#api_changes_to_models_released_in_or_after_august_2023).

### Objective

In this tutorial, you learn how to call text embedding latest APIs on two
new models, textembedding-gecko@latest and textembedding-gecko-multilingual@latest:

This tutorial uses the following Google Cloud ML services and resources:

- Vertex LLM SDK


The steps performed include:

- Installation and imports
- Generate embeddings

### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI

Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing),
and use the [Pricing Calculator](https://cloud.google.com/products/calculator/)
to generate a cost estimate based on your projected usage.

## Installation

Install the following packages required to execute this notebook.

In [1]:
# Install the packages
! pip3 install --upgrade --quiet google-cloud-aiplatform

### Colab only: Uncomment the following cell to restart the kernel.

In [None]:
# Automatically restart kernel after installs so that your environment can access the new packages
# import IPython

# app = IPython.Application.instance()
# app.kernel.do_shutdown(True)

## Before you begin

### Set up your Google Cloud project

**The following steps are required, regardless of your notebook environment.**

1. [Select or create a Google Cloud project](https://console.cloud.google.com/cloud-resource-manager). When you first create an account, you get a $300 free credit towards your compute/storage costs.

2. [Make sure that billing is enabled for your project](https://cloud.google.com/billing/docs/how-to/modify-project).

3. [Enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com). {TODO: Update the APIs needed for your tutorial. Edit the API names, and update the link to append the API IDs, separating each one with a comma. For example, container.googleapis.com,cloudbuild.googleapis.com}

4. If you are running this notebook locally, you need to install the [Cloud SDK](https://cloud.google.com/sdk).

#### Set your project ID

**If you don't know your project ID**, try the following:
* Run `gcloud config list`.
* Run `gcloud projects list`.
* See the support page: [Locate the project ID](https://support.google.com/googleapi/answer/7014113)

In [2]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}

# Set the project id
! gcloud config set project {PROJECT_ID}

Updated property [core/project].


#### Region

You can also change the `REGION` variable used by Vertex AI. Learn more about [Vertex AI regions](https://cloud.google.com/vertex-ai/docs/general/locations).

In [3]:
REGION = "us-central1"  # @param {type: "string"}

### Authenticate your Google Cloud account

Depending on your Jupyter environment, you may have to manually authenticate. Follow the relevant instructions below.

**1. Vertex AI Workbench**
* Do nothing as you are already authenticated.

**2. Local JupyterLab instance, uncomment and run:**

In [None]:
# ! gcloud auth login

**3. Colab, uncomment and run:**

In [4]:
# from google.colab import auth
# auth.authenticate_user()

### Import libraries

In [7]:
import vertexai
from vertexai.language_models import TextEmbeddingInput, TextEmbeddingModel

### Initialize Vertex AI SDK for Python

Initialize the Vertex AI SDK for Python for your project.

In [8]:
vertexai.init(project=PROJECT_ID, location=REGION)

### Generate embeddings

1.   Set the model name. The latest models are
     1. "textembedding-gecko@latest" for English.
     2. "textembedding-gecko-multilingual@latest" for i18n. See [Language coverage](https://cloud.google.com/vertex-ai/docs/generative-ai/embeddings/get-text-embeddings#language_coverage_for_textembedding-gecko-multilinguallatest) for the list of supported languages.
2.   Set the task_type, text, and (optional) title as the model inputs. Available task types are:
     1. "RETRIEVAL_QUERY"
     2. "RETRIEVAL_DOCUMENT"
     3. "SEMANTIC_SIMILARITY"
     4. "CLASSIFICATION"
     5. "CLUSTERING"

> *Note: title field is only allowed for model inputs with task_type "RETRIEVAL_DOCUMENT".*


In [9]:
# Set the model name.
MODEL_NAME = "textembedding-gecko@latest"  # @param ["textembedding-gecko@latest", "textembedding-gecko-multilingual@latest"]

# Set the task_type, text and optional title as the model inputs.
# Available task_types are 
#     1. "RETRIEVAL_QUERY"
#     2. "RETRIEVAL_DOCUMENT"
#     3. "SEMANTIC_SIMILARITY"
#     4. "CLASSIFICATION"
#     5. "CLUSTERING"
TASK_TYPE = "RETRIEVAL_DOCUMENT"  # @param ["RETRIEVAL_QUERY", "RETRIEVAL_DOCUMENT", "SEMANTIC_SIMILARITY", "CLASSIFICATION", "CLUSTERING"]
TITLE = "Google"  # @param {type:"string"}
TEXT = "Embed text."  # @param {type:"string"}

# Verify the input is valid.
if not MODEL_NAME:
    raise ValueError("Please set MODEL_NAME.")
if not TASK_TYPE:
    raise ValueError("Please set TASK_TYPE.")
if not TEXT:
    raise ValueError("Please set TEXT.")
if TITLE and TASK_TYPE != "RETRIEVAL_DOCUMENT":
    raise ValueError("Title can only be provided if the task_type is RETRIEVAL_DOCUMENT")

In [None]:
# A function to generate text embedding.
def text_embedding(
  model_name: str, task_type: str, text: str, title: str = "") -> list:
    """Generate text embedding with a Large Language Model."""
    model = TextEmbeddingModel.from_pretrained(model_name)

    text_embedding_input = TextEmbeddingInput(
        task_type=task_type, title=title, text=text)
    embeddings = model.get_embeddings([text_embedding_input])
    return embeddings[0].values

# Get text embedding for the downstream task.
embedding = text_embedding(
    model_name=MODEL_NAME, task_type=TASK_TYPE, text=TEXT, title=TITLE)
print(len(embedding))

## Cleaning up

To clean up all Google Cloud resources used in this project, you can [delete the Google Cloud
project](https://cloud.google.com/resource-manager/docs/creating-managing-projects#shutting_down_projects) you used for the tutorial.