In [1]:
from faster_whisper import WhisperModel

In [2]:
from transformers import MarianMTModel, MarianTokenizer

In [3]:
model_name ='Helsinki-NLP/opus-mt-ja-en'

model_translator = MarianMTModel.from_pretrained(model_name)
tokenizer = MarianTokenizer.from_pretrained(model_name)

def translate_text(input_text):
    input_ids = tokenizer.encode(input_text, return_tensors = 'pt', padding = True, truncation = True)

    translation_ids = model_translator.generate(
        input_ids,
        max_length = 200,
        num_beams = 5,
        length_penalty = 1.0,
        no_repeat_ngram_size = 5,
        top_k = 5,
        top_p = 0.95,
        early_stopping = False,
        do_sample = True
    )

    translated_text = tokenizer.decode(translation_ids[0], skip_special_tokens = True)

    return translated_text


In [4]:
model_size = "small"

# Run on GPU with FP16
model = WhisperModel(model_size, device="cuda", compute_type="float16")

segments, info = model.transcribe("song5.mp3", beam_size=5)

print("Detected language '%s' with probability %f" % (info.language, info.language_probability))

for segment in segments:
    print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))


Detected language 'ja' with probability 0.979980
[0.00s -> 5.00s] 夜の空を飾る 綺麗な花
[5.00s -> 11.00s] 街の声をキュッと 光が包み込む
[11.00s -> 19.00s] 音のナイフ二人だけの 世界で聞こえた言葉は好きだよ
[31.00s -> 37.00s] 夢の中で見えた 未来のこと
[37.00s -> 43.00s] 夏の夜 君と 並ぶ影が二つ
[43.00s -> 49.00s] 最後の花火が 空に乗って消えたら
[49.00s -> 52.00s] それを愛する
[52.00s -> 56.00s] いつも通りの 朝に
[56.00s -> 59.00s] いつも通りの 朝に
[59.00s -> 62.00s] いつも通りの 君の姿
[62.00s -> 67.00s] 思わず目をそらしてしまったのは
[67.00s -> 71.00s] どうやったって 忘れられない
[71.00s -> 77.00s] 君の言葉 今もずっと響いてるから
[79.00s -> 85.00s] 夜を抜けて 夢の先
[85.00s -> 89.00s] 辿り着きたい 未来
[89.00s -> 92.00s] 本当にあの夢に
[92.00s -> 100.00s] 本当にって 今も不安になってしまうけどきっと
[100.00s -> 106.00s] ああ 今を抜けて 明日の先
[106.00s -> 110.00s] 二人だけの場所へ
[110.00s -> 113.00s] もうちょっと どうか変わらないで
[113.00s -> 116.00s] もうちょっと 君からの言葉
[116.00s -> 118.00s] あの未来で待っているよ
[135.00s -> 141.00s] 二人だけの夜
[141.00s -> 146.00s] 待ちこがれていた景色と重なる
[146.00s -> 150.00s] 夏の空に 未来と今 繋がるように
[150.00s -> 153.00s] 開く花火 君とここで
[153.00s -> 157.00s] ほら あの夢をなぞる
[157.00s -> 162.00s] 見上げた空を飾る
[162.00s -> 168.00s] 光が今 照ら

In [5]:
segments, info = model.transcribe("song5.mp3", beam_size=5)

print("Detected language '%s' with probability %f" % (info.language, info.language_probability))

for segment in segments:
    print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
    translated_text = translate_text(segment.text)
    print(translated_text)


Detected language 'ja' with probability 0.979980
[0.00s -> 5.00s] 夜の空を飾る 綺麗な花
Beautiful flowers in the night sky.
[5.00s -> 11.00s] 街の声をキュッと 光が包み込む
The city's voice is wrapped in light.
[11.00s -> 19.00s] 音のナイフ二人だけの 世界で聞こえた言葉は好きだよ
I like the words you heard in the world, just two sound knives.
[31.00s -> 37.00s] 夢の中で見えた 未来のこと
The future I saw in my dreams.
[37.00s -> 43.00s] 夏の夜 君と 並ぶ影が二つ
It's summer night. There's two shadows that line you.
[43.00s -> 49.00s] 最後の花火が 空に乗って消えたら
When the last fireworks go out in the sky,
[49.00s -> 52.00s] それを愛する
I love it.
[52.00s -> 56.00s] いつも通りの 朝に
The usual morning.
[56.00s -> 59.00s] いつも通りの 朝に
The usual morning.
[59.00s -> 62.00s] いつも通りの 君の姿
Just like you always do.
[62.00s -> 67.00s] 思わず目をそらしてしまったのは
(Laughter)
[67.00s -> 71.00s] どうやったって 忘れられない
I can't help but forget how I did it.
[71.00s -> 77.00s] 君の言葉 今もずっと響いてるから
Your words are still ringing.
[79.00s -> 85.00s] 夜を抜けて 夢の先
Through the night, beyond the dreams.
[85.00s -> 89.00s] 辿り着きたい 未来
I want 