In [1]:
import torch
import subtitler
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast

In [None]:
# 方式一
model = subtitler.load_model("small")
avpath = subtitler.get_avpath(["data/test/audios"])
audio_path = subtitler.generate_subtitle(model, avpath, **{"task": "translate"})

In [None]:
# 方式二
# load base model
model = subtitler.load_model("medium")

# load audio and pad/trim it to fit 30 seconds
audio = subtitler.load_audio("data/test/audios/Mojito.mp3")
audio = subtitler.pad_or_trim(audio)

# make log-Mel spectrogram and move to the same device as the model
mel = subtitler.log_mel_spectrogram(audio).to(model.device)

# detect the spoken language
_, probs = model.detect_language(mel)
print(f"Detected language: {max(probs, key=probs.get)}")

# decode the audio
options = {}
options["fp16"] = True if torch.cuda.is_available() else False
options = subtitler.DecodingOptions(**options)

result = subtitler.decode(model, mel, options)

# print the recognized text
print(result.text)

In [None]:
# 方式三
# load base model
model = subtitler.load_model("large")

# gain audio stream, load audio and pad/trim it to fit 30 seconds
audio_path = subtitler.extract_audio("data/test/videos/接触.mp4")
audio = subtitler.load_audio(audio_path)
audio = subtitler.pad_or_trim(audio)

# make log-Mel spectrogram and move to the same device as the model
mel = subtitler.log_mel_spectrogram(audio).to(model.device)

# detect the spoken language
_, probs = model.detect_language(mel)
print(f"Detected language: {max(probs, key=probs.get)}")

# decode the audio
options = {}
options["fp16"] = True if torch.cuda.is_available() else False
options = subtitler.DecodingOptions(**options)
result = subtitler.decode(model, mel, options)

# print the recognized text
print(result.text)

In [None]:
# X->Chniese "translate"
article_en = "actions speak louder than words."
article_ja = "虎穴に入らずんば虎子を得ず."
article_ko = "지혜로운 자는 비극에서도 긍정적인 것을 찾는다."


model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")


# translate English to Chniese
tokenizer.src_lang = "en_XX"
encoded_en = tokenizer(article_en, return_tensors="pt")
generated_tokens = model.generate(
    **encoded_en,
    forced_bos_token_id=tokenizer.lang_code_to_id["zh_CN"]
)
result_en_zh = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
print(result_en_zh)


# translate Japanese to Chinese
tokenizer.src_lang = "ja_XX"
encoded_ja = tokenizer(article_ja, return_tensors="pt")
generated_tokens = model.generate(
    **encoded_ja,
    forced_bos_token_id=tokenizer.lang_code_to_id["zh_CN"]
)
result_ja_zh = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
print(result_ja_zh)


# translate Korean to Chniese
tokenizer.src_lang = "ko_KR"
encoded_ko = tokenizer(article_ko, return_tensors="pt")
generated_tokens = model.generate(
    **encoded_ko,
    forced_bos_token_id=tokenizer.lang_code_to_id["zh_CN"]
)
result_ko_zh = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
print(result_ko_zh)