In [None]:
%load_ext rich

In [None]:
import json
import os

import requests
from rich.console import Console
from tqdm import tqdm

console = Console(width=100)

API_URL = "http://localhost:8999"  # Url for debugger. change it to your own

In [None]:
print(f"API URL: {API_URL}")

In [None]:
doc_path = "/resources/data/sample/document-03.docx"

## /document-extract endpoint output

In [None]:
# Function to extract document using the API
def extract_document(file_path: str) -> dict:
    # Open the file in binary mode and send the POST request
    with open(file_path, "rb") as file:
        files = {"file": file}
        response = requests.post(url=f"{API_URL}/document/extract", files=files)
    response.raise_for_status()
    return response.json()

In [None]:
# /document-extract endpoint output
extracted_document = extract_document(doc_path)
console.print(extracted_document)

In [None]:
len(extracted_document["paragraphs"])

## Inference

In [None]:
import uuid


# Function to make inference using the API
def get_predictions(paragraph_id: uuid.UUID) -> dict:
    response = requests.get(
        url=f"{API_URL}/pipeline/anonymization/paragraph/{paragraph_id}/predict",
        params={"use_cache": True},
    )
    response.raise_for_status()
    return response.json()

In [None]:
predictions = [
    get_predictions(paragraph["id"])
    for paragraph in tqdm(extracted_document["paragraphs"])
]
predictions

In [None]:
document_id = extracted_document["id"]

response = requests.get(
    url=f"{API_URL}/pipeline/anonymization/document/{document_id}/compile",
)
response.raise_for_status()


output_dir = "output"
os.makedirs(output_dir, exist_ok=True)

filename = os.path.basename(doc_path)
filename, ext = os.path.splitext(filename)
with open(f"{output_dir}/{filename}.odt", "wb") as file:
    file.write(response.content)