In [24]:
import time
from transformers import MarianMTModel, MarianTokenizer

# Start time for loading model and tokenizer
start_load_time = time.time()

model_name = 'Helsinki-NLP/opus-mt-ja-en'
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)

# End time for loading model and tokenizer
end_load_time = time.time()




In [25]:
# Sentence to translate
sentence = "日本語では、友達と話すときと、会社で話すときは、かなり違う言い回しを使うことが一般的です。"

tokens = tokenizer.tokenize(sentence)
print(tokens)

['▁日本', '語', 'では', '、', '友達', 'と', '話す', 'とき', 'と', '、', '会社', 'で', '話す', 'とき', 'は', '、', 'かなり', '違う', '言い', '回し', 'を', '使う', 'ことが', '一般的', 'です', '。']


In [26]:
# Start time for tokenization
start_tokenization_time = time.time()

# Time for setting up data for model

prepared_input = tokenizer(sentence, return_tensors="pt", truncation=True)

# End time for tokenization
end_tokenization_time = time.time()

# Start time for translation
start_translation_time = time.time()

# Translation
translated = model.generate(**tokenizer(sentence, return_tensors="pt", truncation=True))

# End time for translation
end_translation_time = time.time()

# Decoding the translation
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
print(translated_text)

# Printing time taken for each step
print("Loading Time: {:.4f} seconds".format(end_load_time - start_load_time))
print("Tokenization Time: {:.4f} seconds".format(end_tokenization_time - start_tokenization_time))
print("Translation Time: {:.4f} seconds".format(end_translation_time - start_translation_time))
print("Total Runtime: {:.4f} seconds".format(end_translation_time - start_load_time))


In Japanese, it is common to use very different words when talking to friends and when talking to them at work.
Loading Time: 6.9182 seconds
Tokenization Time: 0.0009 seconds
Translation Time: 1.6699 seconds
Total Runtime: 25.2373 seconds


In [27]:
import time
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast

# Start time for loading model and tokenizer
start_load_time = time.time()

model_name_fb = "facebook/mbart-large-50-many-to-many-mmt"
tokenizer_fb = MBart50TokenizerFast.from_pretrained(model_name_fb)
model_fb = MBartForConditionalGeneration.from_pretrained(model_name_fb)

# End time for loading model and tokenizer
end_load_time = time.time()



In [28]:
sentence = "日本語では、友達と話すときと、会社で話すときは、かなり違う言い回しを使うことが一般的です。"

tokens_fb = tokenizer_fb.tokenize(sentence)
print(tokens_fb)


['▁', '日本語', 'では', '、', '友達', 'と', '話す', 'とき', 'と', '、', '会社', 'で', '話す', 'ときは', '、', 'かなり', '違う', '言い', '回', 'し', 'を使う', 'ことが', '一般的', 'です', '。']


In [29]:

source_lang = "ja_XX"
target_lang = "en_XX"
tokenizer_fb.src_lang = source_lang

# Start time for tokenization
start_tokenization_time = time.time()

# Tokenize the input text
inputs = tokenizer_fb(sentence, return_tensors="pt")

# End time for tokenization
end_tokenization_time = time.time()

# Start time for translation
start_translation_time = time.time()

# Generate translation tokens
translated_tokens = model_fb.generate(**inputs, forced_bos_token_id=tokenizer_fb.lang_code_to_id[target_lang])

# End time for translation
end_translation_time = time.time()

# Decode the translated tokens to text
translated_text = tokenizer_fb.decode(translated_tokens[0], skip_special_tokens=True)
print(translated_text)

# Printing time taken for each step
print("Loading Time: {:.4f} seconds".format(end_load_time - start_load_time))
print("Tokenization Time: {:.4f} seconds".format(end_tokenization_time - start_tokenization_time))
print("Translation Time: {:.4f} seconds".format(end_translation_time - start_translation_time))
print("Total Runtime: {:.4f} seconds".format(end_translation_time - start_load_time))


In Japanese, it is common to use quite different phrases when talking with friends and when talking at a company.
Loading Time: 10.8278 seconds
Tokenization Time: 0.0008 seconds
Translation Time: 13.5508 seconds
Total Runtime: 33.4388 seconds
