# 🇩🇪→🇬🇧 PDF Translator

Translate German PDF documents into English using a pre-trained Transformer model (MarianMT) from Hugging Face.

## 📦 Install Dependencies

In [None]:
!pip install transformers pdfplumber fpdf

## 📚 Import Libraries

In [None]:
from transformers import MarianMTModel, MarianTokenizer
import pdfplumber
from fpdf import FPDF
from google.colab import files

## 📤 Upload a German PDF File

In [None]:
print("⬆️ Please upload a German PDF file")
uploaded = files.upload()
input_pdf = [f for f in uploaded if f.endswith('.pdf')][0]

## 🔁 Load Pre-trained Translation Model (German to English)

In [None]:
model_name = 'Helsinki-NLP/opus-mt-de-en'
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)

## 🔤 Define Translation Function

In [None]:
def translate_de_to_en(text):
    batch = tokenizer.prepare_seq2seq_batch([text], return_tensors="pt")
    gen = model.generate(**batch)
    return tokenizer.decode(gen[0], skip_special_tokens=True)

## 📄 Translate the PDF and Save Output

In [None]:
def translate_pdf(input_pdf_path, output_pdf_path):
    pdf_out = FPDF()
    pdf_out.set_auto_page_break(auto=True, margin=15)
    pdf_out.add_page()
    pdf_out.set_font("Arial", size=12)

    with pdfplumber.open(input_pdf_path) as pdf:
        for page in pdf.pages:
            text = page.extract_text()
            if not text:
                continue
            lines = text.split("\n")
            for line in lines:
                line = line.strip()
                if not line:
                    continue
                translated = translate_de_to_en(line)
                print(f"🇩🇪 {line}\n🇬🇧 {translated}\n")
                pdf_out.multi_cell(0, 10, translated)

    pdf_out.output(output_pdf_path)
    print(f"✅ Translated PDF saved as: {output_pdf_path}")

## ▶️ Run the Translation

In [None]:
output_pdf = "translated_output.pdf"
translate_pdf(input_pdf, output_pdf)

## 📥 Download Translated PDF

In [None]:
files.download(output_pdf)