# Preparation

In [10]:
# Install environment packages first
!pip install -q -U immutabledict sentencepiece
!pip install -q -U sacremoses transformers
!git clone https://github.com/google/gemma_pytorch.git
!mkdir /kaggle/working/gemma/
!mv /kaggle/working/gemma_pytorch/gemma/* /kaggle/working/gemma/

# Install JupyterLab widgets module
!pip uninstall jupyterlab_widgets -y
!pip install jupyterlab_widgets

fatal: destination path 'gemma_pytorch' already exists and is not an empty directory.
mkdir: cannot create directory '/kaggle/working/gemma/': File exists
mv: cannot stat '/kaggle/working/gemma_pytorch/gemma/*': No such file or directory
Found existing installation: jupyterlab_widgets 3.0.13
Uninstalling jupyterlab_widgets-3.0.13:
  Successfully uninstalled jupyterlab_widgets-3.0.13
Collecting jupyterlab_widgets
  Using cached jupyterlab_widgets-3.0.13-py3-none-any.whl.metadata (4.1 kB)
Using cached jupyterlab_widgets-3.0.13-py3-none-any.whl (214 kB)
Installing collected packages: jupyterlab_widgets
Successfully installed jupyterlab_widgets-3.0.13


In [None]:
# Import primary function libraries
import contextlib
import gemma
import os
import sys
import torch
import transformers

# Import secondary function libraries
from transformers import MarianMTModel, MarianTokenizer
from gemma.config import GemmaConfig, get_model_config
from gemma.model import GemmaForCausalLM
from gemma.tokenizer import Tokenizer

# Define functional preferences

In [11]:
# Model variant and machine classification
variant = "2b-v2"
machine_type = "cuda"
model_path = '/kaggle/input/gemma-2-2b-jpn-it/pytorch/gemma-2-2b-jpn-it/1/'
weights_file = os.path.join(model_path, "model.ckpt")

# Set the default Tensor type
@contextlib.contextmanager
def _set_default_tensor_type(dtype: torch.dtype):
    torch.set_default_dtype(dtype)
    yield
    torch.set_default_dtype(torch.float)

# Assign the model configuration along with the tokenizer
model_config = get_model_config(variant)
model_config.tokenizer = os.path.join(model_path, "tokenizer.model")

# Set the device, e.g. GPU or CPU
device = torch.device(machine_type)

# Load the model
with _set_default_tensor_type(model_config.get_dtype()):
    model = GemmaForCausalLM(model_config)
    model.load_weights(weights_file)
    model = model.to(device).eval()

# Generate lyric translation

In [12]:
# Use the translation model
user_chat_template = "<start_of_turn>user\n{prompt}<end_of_turn>\n"

# Input the lyric from the source of a song ("Don't Get On My Way" by Hiroshi Kakizaki & r-Project)
japanese_lyric = "けして誰にも邪魔はさせない"

# Assign the translation prompt as an input
prompt = (
    user_chat_template.format(prompt=japanese_lyric)
    + "<start_of_turn>model\n"
)

# Generate the lyric translation as an output
result = model.generate(
    prompt,
    device=device,
    output_len=256
)

# Print the result
print("Generated output (Japanese): ", result)

Generated output (Japanese):  その気持ち、よく分かります！ 😊 

「自分のペースで、自分のやりたいことを追求したい」という気持ちって、とても素晴らしいですね。 

でも、時には周囲の人と協力したり、助け合うことも大切です。 
周りの人に「邪魔」だと感じられるようなことは、他に良い方法はないかな？ 😊

もし、何か困っていることがあれば、気軽に相談してくださいね！  




<end_of_turn>


# Multilingual translations

In [13]:
# Use MarianMT model as a function to translate text
def translate_text(text, target_language):
    """Translates text from Japanese to the target language using MarianMT models."""
    
    # Step 1: Model name based on the target language, e.g. Spanish (Español), Italian (Italiano), French (Français), English
    model_name = f'Helsinki-NLP/opus-mt-ja-{target_language}'
    
    # Step 2: Tokenizer and model for translation
    tokenizer = MarianTokenizer.from_pretrained(model_name)
    model = MarianMTModel.from_pretrained(model_name)
    
    # Step 3: Input text tokenization
    tokenized_text = tokenizer(text, return_tensors='pt', padding=True)
    
    # Step 4: Generate translation
    translated = model.generate(**tokenized_text)
    
    # Step 5: Decode output for text translation
    return tokenizer.decode(translated[0], skip_special_tokens=True)

# Retrieve the lyric example
japanese_lyric = "けして誰にも邪魔はさせない"

# Perform translation to Spanish, Italian, French and English
spanish_translation = translate_text(japanese_lyric, 'es')
portuguese_translation = translate_text(japanese_lyric, 'pt')
italian_translation = translate_text(japanese_lyric, 'it')
french_translation = translate_text(japanese_lyric, 'fr')
danish_translation = translate_text(japanese_lyric, 'da')
swedish_translation = translate_text(japanese_lyric, 'sv')
hungarian_translation = translate_text(japanese_lyric, 'hu')
finnish_translation = translate_text(japanese_lyric, 'fi')
german_translation = translate_text(japanese_lyric, 'de')
dutch_translation = translate_text(japanese_lyric, 'nl')
english_translation = translate_text(japanese_lyric, 'en')

# Print the translations
print("Japanese input: ", japanese_lyric)
print("Translation in Spanish: ", spanish_translation)
print("Translation in Portuguese: ", portuguese_translation)
print("Translation in Italian: ", italian_translation)
print("Translation in French: ", french_translation)
print("Translation in Danish: ", danish_translation)
print("Translation in Swedish: ", swedish_translation)
print("Translation in Hungarian: ", hungarian_translation)
print("Translation in Finnish: ", finnish_translation)
print("Translation in German: ", german_translation)
print("Translation in Dutch: ", dutch_translation)
print("Translation in English: ", english_translation)

Japanese input:  けして誰にも邪魔はさせない
Translation in Spanish:  No voy a dejar que nadie se meta en mi camino.
Translation in Portuguese:  Não vou deixar que ninguém se meta no meu caminho.
Translation in Italian:  Non permettero' a nessuno di interferire.
Translation in French:  Je ne laisserai personne m'en empêcher.
Translation in Danish:  Jeg vil ikke lade nogen komme i vejen for dig.
Translation in Swedish:  Jag tänker inte låta någon störa mig.
Translation in Hungarian:  Senki sem állhat az utamba.
Translation in Finnish:  En anna kenenkään häiritä sinua.
Translation in German:  Ich lasse niemanden aus dem Weg gehen.
Translation in Dutch:  Ik laat niemand je in de weg staan.
Translation in English:  I'm not letting anyone get in my way.
