# Install

In [1]:
!pip install -q --upgrade requests
!pip install -q --upgrade PyPDF2
!pip install -q --upgrade python-dotenv

# Imports

In [None]:
import os
import requests
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from docx import Document

# Functions

In [45]:
load_dotenv("data.env")

subscription_key = os.getenv("SUBSCRIPTION_KEY")
endpoint = os.getenv("ENDPOINT")
location = os.getenv("LOCATION")
target_language = 'pt-br'
folder_path = '.'

def translator_text(text, target_language):
    path = '/translate'

    constructed_url = endpoint + path

    headers = {
        'Ocp-Apim-Subscription-Key': subscription_key,
        'Ocp-Apim-Subscription-Region': location,
        'Content-Type': 'application/json',
        'X-ClientTraceId': str(os.urandom(16))
    }

    body = [{'text': text}]
    
    params = {
        'api-version': '3.0',
        'from': 'en',
        'to': target_language
    }

    request = requests.post(constructed_url, params=params, headers=headers, json=body)
    response = request.json()

    return response[0]['translations'][0]['text']

In [46]:
def translate_document(path, target_language):
    reader = PdfReader(path)
    full_text = []

    for page in reader.pages:
        page_text = page.extract_text()
        if page_text:
            translated_text = translator_document(page_text, target_language)
            full_text.append(translated_text)

    translated_doc = Document()
    for paragraph in full_text:
        translated_doc.add_paragraph(paragraph)

    path_translated = path.replace('.pdf', f"_{target_language}.docx")
    translated_doc.save(path_translated)

    print(f"Translated file saved: {path_translated}")
    return path_translated


def translate_all_pdfs(folder_path, target_language):
    pdf_files = [f for f in os.listdir(folder_path) if f.lower().endswith('.pdf')]
    print("PDFs found:", pdf_files)

    translated_paths = []
    for pdf_file in pdf_files:
        pdf_path = os.path.join(folder_path, pdf_file)
        translated_path = translate_document(pdf_path, target_language)
        translated_paths.append(translated_path)

    return translated_paths

# Use case

In [None]:
translate_all_pdfs(folder_path, target_language)