In [1]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
import gradio as gr

In [2]:
# 加载分词器
tokenizer = AutoTokenizer.from_pretrained("./local_model/opus-mt-en-zh")

# 加载模型
model = AutoModelForSeq2SeqLM.from_pretrained("saved_model")
# model = AutoModelForSeq2SeqLM.from_pretrained("./local_model/opus-mt-en-zh")

In [3]:
def translate_texts(texts, model, tokenizer,  device):
    # 对输入文本进行分词和编码
    inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=128).to(device)

    # 使用模型生成翻译
    outputs = model.generate(
        input_ids=inputs['input_ids'],
        attention_mask=inputs['attention_mask'],
    )

    # 解码生成的翻译
    translations = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    return translations

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 示例输入文本
texts = [
  "My god, that theme melody playing at the beginning... so nostalgic!",
  "My PC is sweating.",
  "Honestly using your best special forces in the slog of the Frontline in ukraine doesn't seem smart. In this conflict it doesn't matter how skilled you are as a soldier. You are just as likely to die as the freshly trained conscript next to you.  Special forces are going to be safer and more effective in places where they can play to their advantages.",
  "I've always been writing. Ever since I was three. I think we are all writing our own stories with every thought, word and action.",
  "As a lifelong sufferer of PTSD from an early childhood event...I just paused at 7:33 (with tears) because casting off old misperceptions of rejection is like another rejection in and of itself.  I have indeed been editing my story for several years now.   I know what I want my story to be.  You have just given me a window into a future that I can actually get to.  No.  More like an open door.  Thanks.",
  "I usually do something else while listening to ted talks or podcasts, but this one had me glued to the end",
  "I think investors should always put their cash to work, especially In 2024, we'll start to see more market diversification. I'm hoping to invest about $350k of my savings in stocks against next year. Hope to make millions in 2024"
]

# 进行翻译
translations = translate_texts(texts, model, tokenizer, device)

# 打印翻译结果
for text, translation in zip(texts, translations):
    print(f"原始文本: {text}")
    print(f"翻译: {translation}")
    print()


原始文本: My god, that theme melody playing at the beginning... so nostalgic!
翻译: 天哪,那个主题从一开始就在演奏... 如此怀旧!

原始文本: My PC is sweating.
翻译: 我的个人电脑在出汗。

原始文本: Honestly using your best special forces in the slog of the Frontline in ukraine doesn't seem smart. In this conflict it doesn't matter how skilled you are as a soldier. You are just as likely to die as the freshly trained conscript next to you.  Special forces are going to be safer and more effective in places where they can play to their advantages.
翻译: 在这场冲突中,你作为士兵的技巧并不重要。 你和旁边受过训练的应征士兵一样可能死去。

原始文本: I've always been writing. Ever since I was three. I think we are all writing our own stories with every thought, word and action.
翻译: 我从三岁起就一直在写作,我觉得我们都在用一切思想、语言和行动写故事。

原始文本: As a lifelong sufferer of PTSD from an early childhood event...I just paused at 7:33 (with tears) because casting off old misperceptions of rejection is like another rejection in and of itself.  I have indeed been editing my story for several years now.   I know what

In [6]:
def translate_text(text):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    translations = translate_texts([text], model, tokenizer, device)
    return translations[0]


# 创建 Gradio 接口
iface = gr.Interface(
    fn=translate_text,
    inputs="text",
    outputs="text",
    title="文本翻译",
    description="输入英文文本，获取中文翻译结果"
)

# 启动 Gradio 接口
iface.launch()

Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.


