In [6]:
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer

ko_text = "안녕하세요, 감사해요, 잘 있어요, 다시 만나요"


model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")

tokenizer.src_lang = "ko"
encoded_hi = tokenizer(ko_text, return_tensors="pt")
generated_tokens = model.generate(**encoded_hi, forced_bos_token_id=tokenizer.get_lang_id("ja"))
kor_to_jap = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
print('한국어 -> 일본어 :', kor_to_jap)


tokenizer.src_lang = "ko"
encoded_zh = tokenizer(ko_text, return_tensors="pt")
generated_tokens = model.generate(**encoded_zh, forced_bos_token_id=tokenizer.get_lang_id("en"))
kor_to_eng = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
print('한국어 -> 영어 :', kor_to_eng)

tokenizer.src_lang = "ja"
encoded_hi = tokenizer(kor_to_jap, return_tensors="pt")
generated_tokens = model.generate(**encoded_hi, forced_bos_token_id=tokenizer.get_lang_id("ko"))
kor_to_jap = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
print('일본어 -> 한국어 :', kor_to_jap)

tokenizer.src_lang = "en"
encoded_hi = tokenizer(kor_to_eng, return_tensors="pt")
generated_tokens = model.generate(**encoded_hi, forced_bos_token_id=tokenizer.get_lang_id("ko"))
kor_to_jap = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
print('영어 -> 한국어 :', kor_to_jap)


한국어 -> 일본어 : ['こんにちは、ありがとうございます、元気です、また会いましょう。']
한국어 -> 영어 : ['Hello, thank you, it’s good, we’ll see you again.']
일본어 -> 한국어 : ['안녕하세요, 고마워요, 잘 지내고 다시 만나겠습니다.']
영어 -> 한국어 : ['안녕하세요, 감사합니다, 좋은 일입니다, 우리는 다시 당신을 볼 것입니다.']


In [8]:
# -*- coding: utf-8 -*-
"""
📖 다국어 번역기 (Gradio + M2M100)
- transformers의 facebook/m2m100_418M 모델 사용
- Gradio로 웹 UI 구성
"""

from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
import gradio as gr

def load_model():
    """모델과 토크나이저 로드"""
    model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
    tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
    return model, tokenizer

# 1. 모델 초기화
model, tokenizer = load_model()

# 2. 지원 언어 매핑
LANGS = {
    "한국어": "ko",
    "영어": "en",
    "일본어": "ja",
    "중국어(간체)": "zh",
    "스페인어": "es",
    "프랑스어": "fr"
}

# 3. 번역 함수 정의

def translate(text: str, src_lang: str, tgt_lang: str) -> str:
    """
    text: 번역할 문장
    src_lang: 입력 언어 코드 (ex: 'ko')
    tgt_lang: 출력 언어 코드 (ex: 'en')
    """
    tokenizer.src_lang = src_lang
    encoded = tokenizer(text, return_tensors="pt")
    generated = model.generate(
        **encoded,
        forced_bos_token_id=tokenizer.get_lang_id(tgt_lang)
    )
    return tokenizer.batch_decode(generated, skip_special_tokens=True)[0]

# 4. Gradio 인터페이스

def build_interface():
    # 입력 컴포넌트 설정
    txt = gr.Textbox(lines=3, placeholder="번역할 텍스트를 입력하세요", label="원문")
    src = gr.Dropdown(choices=list(LANGS.keys()), label="원문 언어", value="한국어")
    tgt = gr.Dropdown(choices=list(LANGS.keys()), label="목표 언어", value="영어")
    output = gr.Textbox(label="번역 결과")

    # 인터페이스 생성
    iface = gr.Interface(
        fn=lambda text, s, t: translate(text, LANGS[s], LANGS[t]),
        inputs=[txt, src, tgt],
        outputs=output,
        title="📖 다국어 번역기",
        description="facebook/m2m100_418M 모델 기반 간편 다국어 번역 서비스"
    )
    return iface

# 5. 데모 실행
if __name__ == "__main__":
    demo = build_interface()
    # 로컬 실행 및 외부 공유 활성화
    demo.launch(share=True)



* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://4b533e89781be58a0a.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


In [20]:
# !pip install datasets
# !pip install soundfile

In [21]:
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
from datasets import load_dataset
import torch
import soundfile as sf
from datasets import load_dataset

processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")

inputs = processor(text="Hello, my dog is cute.", return_tensors="pt")

# load xvector containing speaker's voice characteristics from a dataset
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)

speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)

sf.write("speech.wav", speech.numpy(), samplerate=16000)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip i

In [23]:
# -*- coding: utf-8 -*-
"""
📖 다국어 번역기 + 읽어주기 기능 (Gradio + M2M100 + SpeechT5)
- transformers의 facebook/m2m100_418M 모델로 번역
- microsoft/speecht5_tts 모델로 영어 TTS
- Gradio로 웹 UI 구성
"""

import torch
import soundfile as sf
import gradio as gr
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
from datasets import load_dataset

# 1. 모델 및 토크나이저 초기화
model_mt = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
tokenizer_mt = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
processor_tts = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
model_tts = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
vocoder_tts = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")

# 2. 지원 언어 매핑
LANGS = {
    "한국어": "ko",
    "영어": "en",
    "일본어": "ja",
    "중국어(간체)": "zh",
    "스페인어": "es",
    "프랑스어": "fr"
}

# 3. 스피커 임베딩 로드 (예시용)
emb_ds = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
speaker_embedding = torch.tensor(emb_ds[7306]["xvector"]).unsqueeze(0)

# 4. 번역 및 TTS 함수 정의

def translate_and_tts(text: str, src_lang_label: str, tgt_lang_label: str):
    # 레이블을 언어 코드로 변환
    src_code = LANGS[src_lang_label]
    tgt_code = LANGS[tgt_lang_label]

    # 번역
    tokenizer_mt.src_lang = src_code
    encoded = tokenizer_mt(text, return_tensors="pt")
    gen = model_mt.generate(
        **encoded,
        forced_bos_token_id=tokenizer_mt.get_lang_id(tgt_code)
    )
    translated = tokenizer_mt.batch_decode(gen, skip_special_tokens=True)[0]

    # 영어일 경우 TTS 생성
    audio_data = None
    if tgt_code == "en":
        inputs_tts = processor_tts(text=translated, return_tensors="pt")
        speech = model_tts.generate_speech(
            inputs_tts["input_ids"], speaker_embedding, vocoder=vocoder_tts
        )
        audio_data = speech.cpu().numpy()

    # 반환 형식: (번역문, (샘플레이트, 오디오) or None)
    return translated, (16000, audio_data) if audio_data is not None else None

# 5. Gradio 인터페이스 구축

def build_interface():
    txt = gr.Textbox(lines=3, placeholder="번역할 텍스트 입력", label="원문")
    src = gr.Dropdown(choices=list(LANGS.keys()), label="원문 언어", value="한국어")
    tgt = gr.Dropdown(choices=list(LANGS.keys()), label="목표 언어", value="영어")
    out_txt = gr.Textbox(label="번역 결과")
    out_audio = gr.Audio(label="읽어주기 (영어)")

    iface = gr.Interface(
        fn=translate_and_tts,
        inputs=[txt, src, tgt],
        outputs=[out_txt, out_audio],
        title="📖 다국어 번역기 + 읽어주기",
        description="번역 및 영어에 한해 TTS(읽어주기) 기능을 제공합니다."
    )
    return iface

# 6. 데모 실행
if __name__ == "__main__":
    demo = build_interface()
    demo.launch(share=True)


* Running on local URL:  http://127.0.0.1:7862
* Running on public URL: https://7f7fecc1c5668ee1ba.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


