### 使用DeepL__翻译__ipynb文件

6d919892-4df3-40e8-a80c-e0a1d8043f8e:fx

In [3]:
# ====== KISS: Translate Chinese in ipynb using DeepL (Markdown format preserved) ======
# Run this cell in your notebook. Only change INPUT/OUTPUT paths & CODE_MODE.

import os
import re
from pathlib import Path
import nbformat
import deepl

# --------- USER PATHS (只改这里) ---------
INPUT_IPYNB  = Path("/home/tianqi/D/01_Projects/01_swd/02_code/pipeline/ultralytics_ty/_ty/00_pipeline/01_stage1/10_ty_fo_tools.ipynb")
OUTPUT_IPYNB = Path("/home/tianqi/D/01_Projects/01_swd/02_code/pipeline/ultralytics_ty/_ty/00_pipeline/01_stage1/10_ty_fo_tools_en.ipynb")

# Code cell translation:
#   "none"     -> don't translate code cells
#   "comments" -> translate only Chinese inside comments (# or //)  (recommended)
#   "all"      -> translate any line containing Chinese (may affect strings)
CODE_MODE = "comments"
# ----------------------------------------

# --------- DeepL client ---------
# Required:
#   export DEEPL_AUTH_KEY="your_key"
# Optional for API Free:
#   export DEEPL_SERVER_URL="https://api-free.deepl.com"
auth_key = "6d919892-4df3-40e8-a80c-e0a1d8043f8e:fx"
if not auth_key:
    raise RuntimeError("Missing DEEPL_AUTH_KEY env var. Set it before running.")

server_url = os.environ.get("DEEPL_SERVER_URL", "").strip() or None
translator = deepl.DeepLClient(auth_key=auth_key, server_url=server_url)

def deepl_translate(text: str) -> str:
    """Translate zh->EN; if no Chinese, return unchanged."""
    if not text or not has_zh(text):
        return text
    return translator.translate_text(text, target_lang="EN-US").text

# --------- Chinese detection ---------
RE_ZH = re.compile(r"[\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF]+")

def has_zh(s: str) -> bool:
    return bool(RE_ZH.search(s))

# --------- Line translation strategies ---------
def translate_line_md_safe(line: str) -> str:
    """
    Markdown-safe:
    ONLY translate Chinese spans, keep all other characters exactly the same.
    This preserves headings, bullets, **bold**, links, etc.
    """
    if not has_zh(line):
        return line
    return RE_ZH.sub(lambda m: deepl_translate(m.group(0)), line)

def translate_line_general(line: str) -> str:
    """
    General (for raw/code comments):
    Try translating whole line; if unchanged, translate only Chinese spans.
    """
    if not has_zh(line):
        return line
    whole = deepl_translate(line)
    if whole != line:
        return whole
    return RE_ZH.sub(lambda m: deepl_translate(m.group(0)), line)

# --------- Markdown: keep fenced code blocks unchanged ---------
def translate_markdown(md: str) -> str:
    if not md or not has_zh(md):
        return md

    out = []
    in_fence = False
    for ln in md.splitlines(keepends=True):
        # toggle fenced code block
        if ln.lstrip().startswith("```"):
            in_fence = not in_fence
            out.append(ln)
            continue

        # inside fenced code -> do not translate
        if in_fence:
            out.append(ln)
        else:
            out.append(translate_line_md_safe(ln))

    return "".join(out)

# --------- Raw cells ---------
def translate_raw(text: str) -> str:
    if not text or not has_zh(text):
        return text
    return "".join(
        translate_line_general(ln) if has_zh(ln) else ln
        for ln in text.splitlines(keepends=True)
    )

# --------- Code cells ---------
def translate_code_comments(code: str) -> str:
    """
    Translate Chinese only inside comments (# or //), keep code intact.
    """
    if not code or not has_zh(code):
        return code

    out = []
    for ln in code.splitlines(keepends=True):
        # // comment
        if "//" in ln:
            i = ln.find("//")
            prefix, comment = ln[:i], ln[i:]
            out.append(prefix + (translate_line_general(comment) if has_zh(comment) else comment))
            continue

        # # comment
        if "#" in ln:
            i = ln.find("#")
            prefix, comment = ln[:i], ln[i:]
            out.append(prefix + (translate_line_general(comment) if has_zh(comment) else comment))
            continue

        out.append(ln)

    return "".join(out)

def translate_code_all(code: str) -> str:
    """
    Translate any line containing Chinese (more aggressive; may touch strings).
    """
    if not code or not has_zh(code):
        return code
    return "".join(
        translate_line_general(ln) if has_zh(ln) else ln
        for ln in code.splitlines(keepends=True)
    )

def translate_code(code: str) -> str:
    if CODE_MODE == "none":
        return code
    if CODE_MODE == "comments":
        return translate_code_comments(code)
    if CODE_MODE == "all":
        return translate_code_all(code)
    raise ValueError(f"Unknown CODE_MODE: {CODE_MODE}")

# --------- Process notebook (structure preserved) ---------
nb = nbformat.read(str(INPUT_IPYNB), as_version=4)

for cell in nb.cells:
    if cell.cell_type == "markdown":
        cell.source = translate_markdown(cell.source)
    elif cell.cell_type == "raw":
        cell.source = translate_raw(cell.source)
    elif cell.cell_type == "code":
        cell.source = translate_code(cell.source)
    # outputs / metadata left untouched

OUTPUT_IPYNB.parent.mkdir(parents=True, exist_ok=True)
nbformat.write(nb, str(OUTPUT_IPYNB))

print(f"✅ Saved to: {OUTPUT_IPYNB}")


✅ Saved to: /home/tianqi/D/01_Projects/01_swd/02_code/pipeline/ultralytics_ty/_ty/00_pipeline/01_stage1/10_ty_fo_tools_en.ipynb
