In [None]:
import pprint
import json

In [None]:
import os
# prints parent directory
parent_directory = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
print("Parent directory:", parent_directory)

In [None]:
# Before calling the API, replace filename and ensure sdk is installed: "pip install unstructured-client"
# See https://docs.unstructured.io/api-reference/api-services/sdk for more details

import unstructured_client
from unstructured_client.models import operations, shared
from collections import Counter

client = unstructured_client.UnstructuredClient(
    api_key_auth= os.getenv("UNSTRUCTURED_API_KEY"),
    server_url="https://api.unstructuredapp.io",
)

filename = parent_directory + "/Doc_Panthera/Gestionale/VEN_Contratti_Vendita_Ordini_Aperti.pdf"
output_file = os.getcwd() + "/Unstructured_Output/Unstruct_VEN_Contratti_Vendita_Ordini_Aperti.pdf"
with open(filename, "rb") as f:
    data = f.read()

req = operations.PartitionRequest(
    partition_parameters=shared.PartitionParameters(
        files=shared.Files(
            content=data,
            file_name=filename,
        ),
        # --- Other partition parameters ---
        # Note: Defining 'strategy', 'chunking_strategy', and 'output_format'
        # parameters as strings is accepted, but will not pass strict type checking. It is
        # advised to use the defined enum classes as shown below.
        strategy=shared.Strategy.HI_RES,  
        languages=['ita'],
    ),
)

try:
    res = client.general.partition(request=req)
    element_dicts = [element for element in res.elements]

    # Write the processed data to a local file.
    json_elements = json.dumps(element_dicts, indent=2)

    with open(output_file, "w") as file:
        file.write(json_elements)
except Exception as e:
    print(e)


In [None]:
pprint.pprint(len(res.elements))

In [None]:
display(Counter(element['type'] for element in element_dicts))
print("")

In [None]:
pprint.pprint("\n\n".join([el['text'] for el in element_dicts if el['type'] == "Table"]))