In [None]:
# Multilingual_Content_Translation use case


In [None]:
# multilingual_translation.py

# Install dependencies (run this separately before executing the script)
# !pip install transformers sentencepiece --quiet

In [None]:
# save as multilingual_translation.py

import os
import json
from transformers import MarianMTModel, MarianTokenizer, pipeline

# -------------------------------
# 1. CONFIGURATION
# -------------------------------
SOURCE_LANG = "en"
TARGET_LANGS = ["fr", "de", "es", "it", "pt"]  # Add more as needed

INPUT_DIR = "business_docs/"
OUTPUT_DIR = "translated_docs/"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# -------------------------------
# 2. LOAD TRANSLATION MODELS
# -------------------------------
translation_pipelines = {}

for tgt in TARGET_LANGS:
    model_name = f"Helsinki-NLP/opus-mt-{SOURCE_LANG}-{tgt}"
    try:
        print(f"Loading model for: {SOURCE_LANG} → {tgt}")
        tokenizer = MarianTokenizer.from_pretrained(model_name)
        model = MarianMTModel.from_pretrained(model_name)
        translation_pipelines[tgt] = pipeline("translation", model=model, tokenizer=tokenizer)
    except Exception as e:
        print(f"❌ Could not load model for {SOURCE_LANG}-{tgt}: {e}")

# -------------------------------
# 3. PROCESS DOCUMENTS
# -------------------------------
for file_name in os.listdir(INPUT_DIR):
    if file_name.endswith(".txt"):
        input_path = os.path.join(INPUT_DIR, file_name)
        with open(input_path, "r", encoding="utf-8") as f:
            text = f.read().strip()

        translations = {SOURCE_LANG: text}

        for lang_code, translator in translation_pipelines.items():
            try:
                print(f"Translating {file_name} to {lang_code}...")
                result = translator(text, max_length=512)
                translations[lang_code] = result[0]["translation_text"]
            except Exception as e:
                print(f"⚠️ Translation error for {file_name} [{lang_code}]: {e}")
                translations[lang_code] = "[Translation Error]"

        # Save to JSON
        output_file = os.path.join(OUTPUT_DIR, file_name.replace(".txt", "_translations.json"))
        with open(output_file, "w", encoding="utf-8") as out_f:
            json.dump(translations, out_f, ensure_ascii=False, indent=2)

        print(f"✅ Saved: {output_file}")

In [None]:
#A sample folder structure with corpus docs and project folder view

project_root/
├── multilingual_translation.py
├── business_docs/
│   ├── example1.txt
│   └── example2.txt
├── translated_docs/
│   └── example1_translations.json



In [None]:
# Sample Input: business_docs/example1.txt

Welcome to our product support portal. We are here to help you with any issues you may encounter.




In [None]:
#Sample Output: translated_docs/example1_translations.json

{
  "en": "Welcome to our product support portal. We are here to help you with any issues you may encounter.",
  "fr": "Bienvenue sur notre portail d'assistance produit. Nous sommes là pour vous aider en cas de problème.",
  "de": "Willkommen auf unserem Produkt-Support-Portal. Wir helfen Ihnen bei allen Problemen weiter.",
  "es": "Bienvenido a nuestro portal de soporte de productos. Estamos aquí para ayudarte con cualquier problema.",
  "it": "Benvenuto nel nostro portale di supporto prodotto. Siamo qui per aiutarti con qualsiasi problema.",
  "pt": "Bem-vindo ao nosso portal de suporte ao produto. Estamos aqui para ajudá-lo com qualquer problema."
}



In [None]:
# Optional Enhancements to enrich it
•	Add Markdown/HTML preservation with BeautifulSoup.
•	Connect output to a CMS (Contentful, WordPress).
•	Add logging or database storage for large-scale use.
