In [1]:
import sys
sys.path.append('../modules/') # Import local modules

from IPython.display import display, Markdown
import os
from dotenv import dotenv_values
import json
from azure.storage.blob import BlobServiceClient
from azure.identity import DefaultAzureCredential
from pdf2image import convert_from_bytes

from samples.app_settings import AppSettings
from samples.utils.stopwatch import Stopwatch
from samples.utils.storage_utils import create_data_file

from samples.language.language_native_translator_client import LanguageNativeTranslatorClient

In [None]:
# Set the working directory to the root of the repo
working_dir = os.path.abspath('../../../')
settings = AppSettings(dotenv_values(f"{working_dir}/.env"))
sample_path = f"{working_dir}/samples/python/translation"
sample_name = "document-translation-language-native-document"

# Configure the default credential for accessing Azure services using Azure CLI credentials
credential = DefaultAzureCredential(
    exclude_workload_identity_credential=True,
    exclude_developer_cli_credential=True,
    exclude_environment_credential=True,
    exclude_managed_identity_credential=True,
    exclude_powershell_credential=True,
    exclude_shared_token_cache_credential=True,
    exclude_interactive_browser_credential=True
)

language_translator_client = LanguageNativeTranslatorClient(
    endpoint=settings.azure_ai_services_endpoint,
    credential=credential
)

storage_account_name = settings.azure_storage_account_name

blob_service_client = BlobServiceClient(
    account_url=f"https://{storage_account_name}.blob.core.windows.net",
    credential=credential
)

In [3]:
path = f"{working_dir}/samples/assets/invoices/"
metadata_fname = "invoice_6.json"  # Change this to the file you want to evaluate
metadata_fpath = f"{path}{metadata_fname}"

# Load the metadata from the JSON file
with open(metadata_fpath, "r") as f:
    data = json.load(f)

# Extract the PDF file name and path from the metadata
pdf_fname = data['fname']
pdf_fpath = f"{path}{pdf_fname}"

In [4]:
blob_container_name = "translation-samples"
input_blob_container_folder = "raw"
output_blob_container_folder = "processed"

# Create the sample blob container if it doesn't exist
blob_container_client = blob_service_client.get_container_client(blob_container_name)
if not blob_container_client.exists():
    blob_container_client.create_container()
    
# Upload the sample PDF file to the blob container in the input folder
input_blob_client = blob_container_client.get_blob_client(f"{input_blob_container_folder}/{pdf_fname}")

with open(pdf_fpath, "rb") as data:
    input_blob_client.upload_blob(data, overwrite=True)

In [8]:
with Stopwatch() as pii_stopwatch:
    result = language_translator_client.begin_analyze_document(
        analyze_request={ 
            "inputs": [
                {
                    "storageType": "File",
                    "source": {
                        "sourceUrl": f"https://{storage_account_name}.blob.core.windows.net/{blob_container_name}/{input_blob_container_folder}/{pdf_fname}"
                    },
                    "targets": [
                        {
                            "targetUrl": f"https://{storage_account_name}.blob.core.windows.net/{blob_container_name}/{output_blob_container_folder}/en3_{pdf_fname}",
                            "language": "en"
                        }
                    ]
                }
            ]
        })