In [1]:
!pip install requests



In [2]:
import re

def split_text(text, chunk_size=500):
    pattern = r"(?<=[。！？.!?;；\n])\s*"
    sentences = [s.strip() for s in re.split(pattern, text) if s.strip()]
    chunks = []
    current_chunk = ""

    for sentence in sentences:
        if len(sentence) > chunk_size:
            if current_chunk:
                chunks.append(current_chunk)
            for i in range(0, len(sentence), chunk_size):
                chunks.append(sentence[i:i+chunk_size])
            current_chunk = ""
        elif len(current_chunk) + len(sentence) <= chunk_size:
            current_chunk += sentence
        else:
            chunks.append(current_chunk)
            current_chunk = sentence

    if current_chunk:
        chunks.append(current_chunk)

    return chunks

In [3]:
import requests

def translate_with_gemma3_4b(text, target_language="zh"):
    url = "http://yourhost:11434/api/generate"
    headers = {
        "Content-Type": "application/json",
    }
    data = {
        "model": "gemma3:4b",
        "prompt": f"请将以下文本翻译为{target_language}文：{text}",
        "stream": False
    }
    response = requests.post(url, json=data, headers=headers)
    if response.status_code == 200:
        return response.json().get("response", "")
    else:
        print(f"翻译请求失败，状态码：{response.status_code}")
        return ""

In [4]:
def batch_translate_file(input_file_path, output_file_path, target_language="zh"):
    with open(input_file_path, "r", encoding="utf-8") as file:
        text = file.read()

    chunks = split_text(text)
    translated_text = ""

    for chunk in chunks:
        translated_chunk = translate_with_gemma3_4b(chunk, target_language)
        translated_text += translated_chunk

    with open(output_file_path, "w", encoding="utf-8") as file:
        file.write(translated_text)

In [None]:
input_file = "01.note.txt"  # 替换为你的输入文件路径
output_file = "output_file.txt"  # 替换为你的输出文件路径
batch_translate_file(input_file, output_file)