In [1]:
!pip install argostranslate

Collecting argostranslate
  Downloading argostranslate-1.11.0-py3-none-any.whl.metadata (9.7 kB)
Collecting ctranslate2<5,>=4.0 (from argostranslate)
  Downloading ctranslate2-4.7.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (10 kB)
Collecting minisbd (from argostranslate)
  Downloading minisbd-0.9.3-py3-none-any.whl.metadata (47 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.2/47.2 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
Collecting sacremoses<0.2,>=0.0.53 (from argostranslate)
  Downloading sacremoses-0.1.1-py3-none-any.whl.metadata (8.3 kB)
Collecting stanza==1.10.1 (from argostranslate)
  Downloading stanza-1.10.1-py3-none-any.whl.metadata (13 kB)
Collecting emoji (from stanza==1.10.1->argostranslate)
  Downloading emoji-2.15.0-py3-none-any.whl.metadata (5.7 kB)
Collecting onnxruntime>=1.10.0 (from minisbd->argostranslate)
  Downloading onnxruntime-1.24.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.0

In [2]:
import re
from pathlib import Path

import argostranslate.package
import argostranslate.translate


def _protect_markdown(text: str):
    """
    Protege segmentos que NO deben traducirse (bloques de código, inline code,
    URLs crudas, etc.) reemplazándolos por placeholders.
    """
    patterns = [
        # Bloques de código triple backtick
        (r"```[\s\S]*?```", "CODEBLOCK"),
        # Inline code `...`
        (r"`[^`\n]+`", "INLINECODE"),
        # URLs crudas
        (r"https?://[^\s)]+", "URL"),
    ]

    placeholders = []
    protected = text

    for pattern, tag in patterns:
        def repl(m):
            placeholders.append(m.group(0))
            return f"@@{tag}{len(placeholders)-1}@@"
        protected = re.sub(pattern, repl, protected)

    return protected, placeholders


def _unprotect_markdown(text: str, placeholders):
    """Restaura los placeholders a su contenido original."""
    def repl(m):
        idx = int(m.group(1))
        return placeholders[idx]
    return re.sub(r"@@[A-Z]+(\d+)@@", repl, text)


def ensure_argos_package(from_code="en", to_code="es"):
    """
    Descarga/instala el paquete de traducción Argos si no está instalado.
    (Requiere internet sólo para descargar el paquete la primera vez)
    """
    installed = argostranslate.translate.get_installed_languages()
    installed_codes = {lang.code for lang in installed}

    if from_code in installed_codes and to_code in installed_codes:
        # Podrían estar instalados pero no necesariamente el par; validamos luego.
        pass

    # Descarga el índice de paquetes y busca el par deseado
    argostranslate.package.update_package_index()
    available_packages = argostranslate.package.get_available_packages()

    pkg = next(
        (p for p in available_packages if p.from_code == from_code and p.to_code == to_code),
        None
    )
    if pkg is None:
        raise RuntimeError(f"No encontré paquete Argos para {from_code}->{to_code}.")

    # Instala
    download_path = pkg.download()
    argostranslate.package.install_from_path(download_path)


def translate_markdown(text: str, from_code="en", to_code="es") -> str:
    protected, placeholders = _protect_markdown(text)

    # Traduce todo lo protegido (con placeholders)
    translated = argostranslate.translate.translate(protected, from_code, to_code)

    # Restaura segmentos no traducibles
    restored = _unprotect_markdown(translated, placeholders)
    return restored


def main(
    input_md: str,
    output_md: str,
    from_code="en",
    to_code="es",
):
    in_path = Path(input_md)
    out_path = Path(output_md)

    md_text = in_path.read_text(encoding="utf-8")

    # Asegura modelo Argos
    ensure_argos_package(from_code=from_code, to_code=to_code)

    translated = translate_markdown(md_text, from_code=from_code, to_code=to_code)

    out_path.write_text(translated, encoding="utf-8")
    print(f"✅ Traducción guardada en: {out_path}")


if __name__ == "__main__":
    # Ejemplo:
    # main("input.md", "output_es.md", from_code="en", to_code="es")
    main("/content/L1_A4_English (1).md", "L1_A4_English_es.md", from_code="en", to_code="es")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
INFO:argostranslate.utils:('tokenized', [['▁in', 'de', 'b', 'ted', 'ness', '.'], ['▁The', '▁attempt', '▁to', '▁imita', 'te', '▁the', '▁US', '▁banking', '▁system', '▁favor', 'ed', '▁an']])
INFO:argostranslate.utils:('translated_batches', [TranslationResult(hypotheses=[['▁en', 'de', 'ud', 'amiento', '.']], scores=[-2.1035478115081787], attention=[], logits=[]), TranslationResult(hypotheses=[['▁El', '▁intento', '▁de', '▁imita', 'r', '▁el', '▁sistema', '▁bancario', '▁estadounidense', '▁favor', 'e', 'ció', '▁a', '▁un']], scores=[-2.980151653289795], attention=[], logits=[])])
INFO:argostranslate.utils:('value_hypotheses:', [('endeudamiento. El intento de imitar el sistema bancario estadounidense favoreció a un', -5.083699464797974)])
INFO:argostranslate.utils:('translated_paragraphs:', [[('endeudamiento. El intento de imitar el sistema bancario estadounidense favoreció a un', -5.083699464797974)]])
INFO:argostranslate.utils:('

✅ Traducción guardada en: L1_A4_English_es.md
