In [32]:
import fitz  # PyMuPDF for PDF processing
from docx import Document  # For Word document manipulation
from PyPDF2 import PdfWriter  # To convert Word to PDF
import openai  # For text translation
import os

In [None]:

# Set up OpenAI API key
openai.api_key = "..."

os.environ['OPENAI_API_KEY'] = "..."

In [34]:

# Translate text using OpenAI
def translate_text(text, target_language="French"):
    if not text.strip():
        return text  # Skip empty strings

    prompt = f"""
    You are a professional translator. Translate the following text into {target_language}, maintaining formatting and tone.
    
    Text: "{text}"
    Translation:
    """

    from openai import Client
    try:
        client = Client()
        response=client.chat.completions.create(
            model="gpt-4",  # Use "gpt-4" for better translations; "gpt-3.5-turbo" is also an option.
            messages=[
                {"role": "system", "content": "You are an expert translator."},
                {"role": "user", "content": prompt},
            ],
            max_tokens=500,
            temperature=0.7,
        )
        print("Coversions: ", response.choices[0].message.content)
        translated_text = response.choices[0].message.content
        return translated_text
    except Exception as e:
        print(f"Error translating text: {e}")
        return text

In [35]:

# Step 1: Convert PDF to Word
def convert_pdf_to_word(input_pdf, output_word):
    doc = Document()
    pdf_document = fitz.open(input_pdf)

    for page_number in range(len(pdf_document)):
        page = pdf_document[page_number]
        text = page.get_text("text")  # Extract plain text
        if text.strip():  # Avoid empty pages
            doc.add_paragraph(text)  # Add text to Word document
            doc.add_paragraph("")  # Add spacing between pages

    pdf_document.close()
    doc.save(output_word)
    print(f"Converted PDF to Word: {output_word}")

In [36]:

# Step 2: Translate Text in Word
def translate_word(input_word, output_word, target_language="French"):
    doc = Document(input_word)
    for paragraph in doc.paragraphs:
        if paragraph.text.strip():  # Skip empty paragraphs
            translated_text = translate_text(paragraph.text, target_language)
            paragraph.text = translated_text
    doc.save(output_word)
    print(f"Translated Word document saved to: {output_word}")


In [37]:

# Step 3: Convert Word back to PDF
def convert_word_to_pdf(input_word, output_pdf):
    from comtypes.client import CreateObject
    word = CreateObject("Word.Application")
    doc = word.Documents.Open(os.path.abspath(input_word))
    doc.SaveAs(os.path.abspath(output_pdf), FileFormat=17)  # FileFormat=17 for PDF
    doc.Close()
    word.Quit()
    print(f"Converted Word to PDF: {output_pdf}")

In [38]:

# Or alternative: using PyPDF2 if no Word application is available
def alternative_word_to_pdf(input_word, output_pdf):
    doc = Document(input_word)
    pdf_writer = PdfWriter()

    for paragraph in doc.paragraphs:
        pdf_writer.add_blank_page()
        pdf_writer.add_text(paragraph.text)

    with open(output_pdf, 'wb') as f:
        pdf_writer.write(f)
    print(f"Converted Word to PDF (Alternative): {output_pdf}")


In [41]:

# Full Workflow
def pdf_to_translated_pdf(input_pdf, output_pdf, target_language="French"):
    temp_word = "translated_files/temp.docx"
    translated_word = "translated_files/translated.docx"

    # Step 1: Convert PDF to Word
    convert_pdf_to_word(input_pdf, temp_word)

    # Step 2: Translate the Word document
    translate_word(temp_word, translated_word, target_language)

    # Step 3: Convert Translated Word back to PDF
    convert_word_to_pdf(translated_word, output_pdf)


In [42]:

# Example Usage
input_pdf = "sample_files/example.pdf"
output_pdf = "translated_files/translated_example.pdf"
target_language = "French"

pdf_to_translated_pdf(input_pdf, output_pdf, target_language)


Converted PDF to Word: translated_files/temp.docx
Coversions:  "Bonjour monde
Python est le meilleur langage de programmation
"
Coversions:  "Mon nom est Anand Vishwakarma
• Le développeur d'IA"
Coversions:  "Il n'est plus nécessaire d'apprendre des langues étrangères. 
Grâce à l'IA, vous parlez dans votre téléphone, et il traduit.
Alors, où que vous soyez dans le monde, les serveurs savent comment vous voulez votre 
café."
Translated Word document saved to: translated_files/translated.docx
Converted Word to PDF: translated_files/translated_example.pdf
