# Preparation

In [1]:
# Install environment packages first
!pip install -q -U immutabledict sentencepiece
!pip install -q -U sacremoses transformers
!git clone https://github.com/google/gemma_pytorch.git
!mkdir /kaggle/working/gemma/
!mv /kaggle/working/gemma_pytorch/gemma/* /kaggle/working/gemma/

Cloning into 'gemma_pytorch'...
remote: Enumerating objects: 239, done.[K
remote: Counting objects: 100% (123/123), done.[K
remote: Compressing objects: 100% (70/70), done.[K
remote: Total 239 (delta 87), reused 58 (delta 53), pack-reused 116 (from 1)[K
Receiving objects: 100% (239/239), 2.18 MiB | 19.54 MiB/s, done.
Resolving deltas: 100% (136/136), done.


In [2]:
# Import primary function libraries
import contextlib
import gemma
import os
import sys
import torch
import transformers

# Import secondary function libraries
from transformers import MarianMTModel, MarianTokenizer
from gemma.config import GemmaConfig, get_model_config
from gemma.model import GemmaForCausalLM
from gemma.tokenizer import Tokenizer

# Define functional preferences

In [3]:
# Model variant and machine classification
variant = "2b-v2"
machine_type = "cuda"
model_path = '/kaggle/input/gemma-2-2b-jpn-it/pytorch/gemma-2-2b-jpn-it/1/'
weights_file = os.path.join(model_path, "model.ckpt")

# Set the default Tensor type
@contextlib.contextmanager
def _set_default_tensor_type(dtype: torch.dtype):
    torch.set_default_dtype(dtype)
    yield
    torch.set_default_dtype(torch.float)

# Assign the model configuration along with the tokenizer
model_config = get_model_config(variant)
model_config.tokenizer = os.path.join(model_path, "tokenizer.model")

# Set the device, e.g. GPU or CPU
device = torch.device(machine_type)

# Load the model
with _set_default_tensor_type(model_config.get_dtype()):
    model = GemmaForCausalLM(model_config)
    model.load_weights(weights_file)
    model = model.to(device).eval()

# Generate lyric translation

In [4]:
# Use the translation model
user_chat_template = "<start_of_turn>user\n{prompt}<end_of_turn>\n"

# Input the lyric from the source of a song ("Don't Get On My Way" by Hiroshi Kakizaki & r-Project)
japanese_lyric = "けして誰にも邪魔はさせない"

# Assign the translation prompt as an input
prompt = (
    user_chat_template.format(prompt=japanese_lyric)
    + "<start_of_turn>model\n"
)

# Generate the lyric translation as an output
result = model.generate(
    prompt,
    device=device,
    output_len=256
)

# Print the result
print("Generated output (Japanese): ", result)

Generated output (Japanese):  その言葉、とても強い意志を感じます！ 

どんなことがあって、誰かを邪魔しないために、強い思いで行動しているのでしょうか？


 
 


<end_of_turn>


# Multilingual translations

In [5]:
# Use MarianMT model as a function to translate text
def translate_text(text, target_language):
    """Translates text from Japanese to the target language using MarianMT models."""
    
    # Step 1: Model name based on the target language, e.g. Spanish (Español), Italian (Italiano), French (Français), English
    model_name = f'Helsinki-NLP/opus-mt-ja-{target_language}'
    
    # Step 2: Tokenizer and model for translation
    tokenizer = MarianTokenizer.from_pretrained(model_name)
    model = MarianMTModel.from_pretrained(model_name)
    
    # Step 3: Input text tokenization
    tokenized_text = tokenizer(text, return_tensors='pt', padding=True)
    
    # Step 4: Generate translation
    translated = model.generate(**tokenized_text)
    
    # Step 5: Decode output for text translation
    return tokenizer.decode(translated[0], skip_special_tokens=True)

# Retrieve the lyric example
japanese_lyric = "けして誰にも邪魔はさせない"

# Perform translation to Spanish, Italian, French and English
spanish_translation = translate_text(japanese_lyric, 'es')
italian_translation = translate_text(japanese_lyric, 'it')
french_translation = translate_text(japanese_lyric, 'fr')
english_translation = translate_text(japanese_lyric, 'en')

# Print the translations
print("Japanese input: ", japanese_lyric)
print("Translation in Spanish: ", spanish_translation)
print("Translation in Italian: ", italian_translation)
print("Translation in French: ", french_translation)
print("Translation in English: ", english_translation)

tokenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/788k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/829k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.58M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.38k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/306M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/834k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/832k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.72M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.39k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/312M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/788k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/828k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.59M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.38k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/306M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/782k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/802k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.50M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.38k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/303M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

Japanese input:  けして誰にも邪魔はさせない
Translation in Spanish:  No voy a dejar que nadie se meta en mi camino.
Translation in Italian:  Non permettero' a nessuno di interferire.
Translation in French:  Je ne laisserai personne m'en empêcher.
Translation in English:  I'm not letting anyone get in my way.
