# Google Cloud Translation API

In [None]:
!pip install google-cloud-translate

## Translate without Glossary

### Translate with Cloud Translation API

There are 2 generally used models:
1. Neural machine translation (NMT): `general/nmt`.
2. Translation LLM: `general/translation-llm`.

In [None]:
import os
from google.cloud import translate_v3

PROJECT_ID = os.environ.get("GOOGLE_CLOUD_PROJECT")


def translate_text(
    text: str = "sample text",
    source_language_code: str = "en",
    target_language_code: str = "id",
    model_id: str = "general/translation-llm",
):
    """Translating Text with Cloud Translation API.
    Args:
        text: The content to translate.
        source_language_code: The language code for the source text.
            E.g. "en" for English, "es" for Spanish, etc.
        target_language_code: The language code for the translation.
            E.g. "id" for Indonesian, "es" for Spanish, etc.
            Available languages: https://cloud.google.com/translate/docs/languages#neural_machine_translation_model
        model_id: The model to use for translation.
    """

    client = translate_v3.TranslationServiceClient()
    parent = f"projects/{PROJECT_ID}/locations/us-central1"
    model_path = f"{parent}/models/{model_id}"

    # Translate text from English to chosen language
    # Supported mime types: # https://cloud.google.com/translate/docs/supported-formats
    response = client.translate_text(
        contents=[text],
        target_language_code=target_language_code,
        parent=parent,
        mime_type="text/plain",
        source_language_code=source_language_code,
        model=model_path,
    )

    # Display the translation for each input text provided
    for translation in response.translations:
        print(f"{translation.translated_text}")

    return response

### Translate with Gemini API

In [None]:
import base64
import vertexai
from vertexai.generative_models import GenerativeModel, Part, SafetySetting

PROJECT_ID = os.environ.get("GOOGLE_CLOUD_PROJECT")

generation_config = {
    "candidate_count": 1,
    "max_output_tokens": 8192,
    "temperature": 0,
    "top_p": 0.95,
    "top_k": 1,
}

safety_settings = [
    SafetySetting(
        category=SafetySetting.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
        threshold=SafetySetting.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE
    ),
    SafetySetting(
        category=SafetySetting.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
        threshold=SafetySetting.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE
    ),
    SafetySetting(
        category=SafetySetting.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
        threshold=SafetySetting.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE
    ),
    SafetySetting(
        category=SafetySetting.HarmCategory.HARM_CATEGORY_HARASSMENT,
        threshold=SafetySetting.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE
    ),
]

def translate_text_gemini(
    text: str = "sample text",
    language_code: str = "id",
    model_id: str = "gemini-1.5-flash-002",
):
    """Translating Text with Gemini model.
    Args:
        text: The content to translate.
        language_code: The language code for the translation.
            E.g. "id" for Indonesian, "es" for Spanish, etc.
        model_id: The Gemini model to use for translation.
    """

    vertexai.init(project=PROJECT_ID, location="us-central1")
    model = GenerativeModel(model_id)

    text1 = f"""
    You are an expert Translator.
    You are tasked to translate documents from en to {language_code}.
    Please provide an accurate translation of this document and return translation text only: {text}"""

    responses = model.generate_content(
        [text1],
        generation_config=generation_config,
        safety_settings=safety_settings,
    )

    print(responses.text.strip())
    return responses.text

### Try Translation

In [None]:
# translate_text("youth", "en", 'id', 'general/nmt')
# translate_text("malang", "id", 'en', 'general/nmt')
translate_text("dewasa", "id", 'en', 'general/nmt')

## Translate with Glossary

### Create Glossary

In [None]:
from google.cloud import translate_v3 as translate

PROJECT_ID = os.environ.get("GOOGLE_CLOUD_PROJECT")

def create_glossary(
    input_uri: str = "YOUR_INPUT_URI",
    glossary_id: str = "YOUR_GLOSSARY_ID",
    timeout: int = 180,
) -> translate.Glossary:
    """
    Create a equivalent term sets glossary. Glossary can be words or
    short phrases (usually fewer than five words).
    https://cloud.google.com/translate/docs/advanced/glossary#format-glossary
    Args:
        input_uri: The gsutil URI of the glossary file (usually in CSV) stored in Google Cloud Storage.
        glossary_id: The ID of the glossary to create.
        timeout: The polling timeout.
    """
    client = translate.TranslationServiceClient()

    # Supported language codes: https://cloud.google.com/translate/docs/languages
    source_lang_code = "id"
    target_lang_code = "en"
    location = "us-central1"  # The location of the glossary

    name = client.glossary_path(PROJECT_ID, location, glossary_id)
    language_codes_set = translate.types.Glossary.LanguageCodesSet(
        language_codes=[source_lang_code, target_lang_code]
    )

    gcs_source = translate.types.GcsSource(input_uri=input_uri)

    input_config = translate.types.GlossaryInputConfig(gcs_source=gcs_source)

    glossary = translate.types.Glossary(
        name=name, language_codes_set=language_codes_set, input_config=input_config
    )

    parent = f"projects/{PROJECT_ID}/locations/{location}"
    # glossary is a custom dictionary Translation API uses
    # to translate the domain-specific terminology.
    operation = client.create_glossary(parent=parent, glossary=glossary)

    result = operation.result(timeout)
    print(f"Created: {result.name}")
    print(f"Input Uri: {result.input_config.gcs_source.input_uri}")

    return result

In [None]:
# Create the glossary
create_glossary(
    input_uri="gs://your-input-uri.csv",
    glossary_id="YOUR_GLOSSARY_ID",
)

### List Glossaries

In [None]:
from google.cloud import translate

PROJECT_ID = os.environ.get("GOOGLE_CLOUD_PROJECT")

def list_glossaries() -> translate.Glossary:
    """List Glossaries"""
    client = translate.TranslationServiceClient()

    location = "us-central1"
    parent = f"projects/{PROJECT_ID}/locations/{location}"

    # Iterate over all results
    glossaries = client.list_glossaries(parent=parent)
    for glossary in glossaries:
        print(f"Name: {glossary.name}")
        print(f"Entry count: {glossary.entry_count}")
        print(f"Input uri: {glossary.input_config.gcs_source.input_uri}")

        # Note: You can create a glossary using one of two modes:
        # language_code_set or language_pair. When listing the information for
        # a glossary, you can only get information for the mode you used
        # when creating the glossary.
        for language_code in glossary.language_codes_set.language_codes:
            print(f"Language code: {language_code}")

    return glossaries

In [None]:
list_glossaries()

### Create Translation Function

In [None]:
from google.cloud import translate

PROJECT_ID = os.environ.get("GOOGLE_CLOUD_PROJECT")

def translate_text_with_glossary(
    text: str = "sample text",
    glossary_id: str = "YOUR_GLOSSARY_ID",
) -> translate.TranslateTextResponse:
    """Translates a given text using a glossary.

    Args:
        text: The text to translate.
        glossary_id: The ID of the glossary to use.

    Returns:
        The translated text.
    """
    client = translate.TranslationServiceClient()
    location = "us-central1"
    parent = f"projects/{PROJECT_ID}/locations/{location}"

    glossary = client.glossary_path(
        PROJECT_ID, "us-central1", glossary_id  # The location of the glossary
    )

    glossary_config = translate.TranslateTextGlossaryConfig(glossary=glossary)

    # Supported language codes: https://cloud.google.com/translate/docs/languages
    response = client.translate_text(
        request={
            "contents": [text],
            "target_language_code": "en",
            "source_language_code": "id",
            "parent": parent,
            "glossary_config": glossary_config,
        }
    )

    # Print translations
    for translation in response.glossary_translations:
        print(f"{translation.translated_text}")

## Compare Translations

In [None]:
# mature
translate_text("dewasa", source_language_code="id", target_language_code="en", model_id="general/nmt")
print("===================")
# adult
translate_text_with_glossary("dewasa", glossary_id="YOUR_GLOSSARY_ID")

In [None]:
# Poor
translate_text("Malang", source_language_code="id", target_language_code="en", model_id="general/nmt")
print("===================")
# Malang
translate_text_with_glossary("Malang", glossary_id="YOUR_GLOSSARY_ID")

In [None]:
# Stone City
translate_text("Kota Batu", source_language_code="id", target_language_code="en", model_id="general/nmt")
print("===================")
# Batu City
translate_text_with_glossary("Kota Batu", glossary_id="YOUR_GLOSSARY_ID")
# rocks
translate_text_with_glossary("bebatuan", glossary_id="YOUR_GLOSSARY_ID")