In [8]:
import os
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from langdetect import detect

In [12]:
HOME = os.getcwd()
while os.path.basename(HOME) != "app":
	HOME = os.path.dirname(HOME)
print(HOME)

/Users/macbook/Projects/detect_image_product/app


In [18]:
MODELS_FOLDER = os.path.join(HOME, "models")
os.makedirs(MODELS_FOLDER, exist_ok=True)
MODEL_NAME = "facebook/nllb-200-distilled-600M"
MODEL_FOLDER = os.path.join(MODELS_FOLDER, "nllb200_model")

In [19]:
if not os.path.exists(MODEL_FOLDER):
	print(f"Dowloading model {MODEL_NAME} ...")
	
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)

	tokenizer.save_pretrained(MODEL_FOLDER)
	model.save_pretrained(MODEL_FOLDER)

	print(f"Download successful!")
else:
	print(f"Model existed !")

Dowloading model facebook/nllb-200-distilled-600M ...




Download successful!


In [None]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from langdetect import detect

SUPPORTED_LANGS = {
	"vi": "vie_Latn",
	"en": "eng_Latn",
	"fr": "fra_Latn",
	"de": "deu_Latn",
	"zh": "zho_Hans",
	"ja": "jpn_Jpan",
}

LOCAL_MODEL_PATH = os.path.join(MODELS_FOLDER, "nllb200_model")
tokenizer = AutoTokenizer.from_pretrained(LOCAL_MODEL_PATH)
model = AutoModelForSeq2SeqLM.from_pretrained(LOCAL_MODEL_PATH)

def translate(text, tgt_lang="eng_Latn"):
	detected_lang = detect(text)
	src_lang = SUPPORTED_LANGS.get(detected_lang, "vie_Latn")

	print(f"Phát hiện ngôn ngữ: {detected_lang} -> {src_lang}")

	inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
	inputs["input_ids"][:, 0] = tokenizer.convert_tokens_to_ids([src_lang])[0] 
	output = model.generate(**inputs, forced_bos_token_id=tokenizer.convert_tokens_to_ids([tgt_lang])[0])
	
	return tokenizer.decode(output[0], skip_special_tokens=True)

texts = [
	"Kính mắt chống chói",
	"Bonjour, comment ça va ?",
	"你好，你好吗？",
]

for text in texts:
	translated_text = translate(text)
	print(f"Gốc: {text}\nDịch: {translated_text}\n")

Phát hiện ngôn ngữ: vi -> vie_Latn
Gốc: Kính mắt chống chói
Dịch: Anti-glare glasses

Phát hiện ngôn ngữ: fr -> fra_Latn
Gốc: Bonjour, comment ça va ?
Dịch: Hey, how you doing?

Phát hiện ngôn ngữ: zh-cn -> vie_Latn
Gốc: 你好，你好吗？
Dịch: You are good, you are good?

