In [None]:
!pip show azure-ai-formrecognizer

In [None]:
# import libraries
import re
import os
import json
import pandas as pd
from azure.ai.formrecognizer import DocumentAnalysisClient
from azure.core.credentials import AzureKeyCredential

In [None]:
file_names=['example1.pdf','example2.pdf']

In [None]:
# set `<your-endpoint>` and `<your-key>` variables with the values from the Azure portal
endpoint = ""
key = ""

## LAYOUT Model

In [None]:
def format_polygon(polygon):
    if not polygon:
        return "N/A"
    return ", ".join(["[{}, {}]".format(p.x, p.y) for p in polygon])

def analyze_layout(docpath):

    endpoint = ""
    key = ""
    
    document_analysis_client = DocumentAnalysisClient(
        endpoint=endpoint, credential=AzureKeyCredential(key)
    )

    with open(docpath, "rb") as file:
        file_content = file.read()

    poller = document_analysis_client.begin_analyze_document(
        "prebuilt-layout", file_content
    )
    result = poller.result()

    for idx, style in enumerate(result.styles):
        print(
            "Document contains {} content".format(
                "handwritten" if style.is_handwritten else "no handwritten"
            )
        )


    for page in result.pages:
        print("----Analyzing layout from page #{}----".format(page.page_number))
        print(
            "Page has width: {} and height: {}, measured with unit: {}".format(
                page.width, page.height, page.unit
            )
        )

        for line_idx, line in enumerate(page.lines):
            words = line.get_words()
            print(
                "...Line # {} has word count {} and text '{}' within bounding polygon '{}'".format(
                    line_idx,
                    len(words),
                    line.content,
                    format_polygon(line.polygon),
                )
            )

            for word in words:
                print(
                    "......Word '{}' has a confidence of {}".format(
                        word.content, word.confidence
                    )
                )

        for selection_mark in page.selection_marks:
            print(
                "...Selection mark is '{}' within bounding polygon '{}' and has a confidence of {}".format(
                    selection_mark.state,
                    format_polygon(selection_mark.polygon),
                    selection_mark.confidence,
                )
            )

    for table_idx, table in enumerate(result.tables):
        print(
            "Table # {} has {} rows and {} columns".format(
                table_idx, table.row_count, table.column_count
            )
        )
        for region in table.bounding_regions:
            print(
                "Table # {} location on page: {} is {}".format(
                    table_idx,
                    region.page_number,
                    format_polygon(region.polygon),
                )
            )
        for cell in table.cells:
            print(
                "...Cell[{}][{}] has content '{}'".format(
                    cell.row_index,
                    cell.column_index,
                    cell.content,
                )
            )
            for region in cell.bounding_regions:
                print(
                    "...content on page {} is within bounding polygon '{}'".format(
                        region.page_number,
                        format_polygon(region.polygon),
                    )
                )

    print("----------------------------------------")
    return result

In [None]:
for doc in file_names:
    docpath = f'path/to/inputfiles/folder/{doc}'
    # Model result
    result = (analyze_layout(docpath))
    result_dict = result.to_dict()
    # Convert the dictionary to a JSON string
    result_json = json.dumps(result_dict, indent=4)  # indent for pretty formatting
    name = doc.split('.')[0]
    file_path = f"path/to/api_results_outputfiles/folder/layout_model_{name}.json"
    # Open the file in write mode and write results
    with open(file_path, "w",) as file:
            file.write(result_json)