In [10]:
"""
This code sample shows Prebuilt Read operations with the Azure Form Recognizer client library. 
The async versions of the samples require Python 3.6 or later.

To learn more, please visit the documentation - Quickstart: Document Intelligence (formerly Form Recognizer) SDKs
https://learn.microsoft.com/azure/ai-services/document-intelligence/quickstarts/get-started-sdks-rest-api?pivots=programming-language-python
"""

from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import DocumentAnalysisClient

"""
Remember to remove the key from your code when you're done, and never post it publicly. For production, use
secure methods to store and access your credentials. For more information, see 
https://docs.microsoft.com/en-us/azure/cognitive-services/cognitive-services-security?tabs=command-line%2Ccsharp#environment-variables-and-application-configuration
"""
endpoint = "https://1507docuintelli.cognitiveservices.azure.com/"
key = "2ca4c2433352430f906ee0c520ca69bf"

def format_bounding_box(bounding_box):
    if not bounding_box:
        return "N/A"
    return ", ".join(["[{}, {}]".format(p.x, p.y) for p in bounding_box])

def analyze_read():
    # sample document
    # formUrl = "https://raw.githubusercontent.com/Azure-Samples/cognitive-services-REST-api-samples/master/curl/form-recognizer/sample-layout.pdf"


    document_analysis_client = DocumentAnalysisClient(
        endpoint=endpoint, credential=AzureKeyCredential(key)
    )
    path_to_sample_list = ["./File/korecipe.png", "./File/korid.png", "./File/korsentence.png"]
    # poller = document_analysis_client.begin_analyze_document_from_url(
    #         "prebuilt-read", formUrl)
    for i in path_to_sample_list:
        with open(i, "rb") as f:
            poller = document_analysis_client.begin_analyze_document(
                "prebuilt-read", document=f)
        result = poller.result()

        print ("Document contains content: ", result.content)
        
        for idx, style in enumerate(result.styles):
            print(
                "Document contains {} content".format(
                    "handwritten" if style.is_handwritten else "no handwritten"
                )
            )

        for page in result.pages:
            print("----Analyzing Read from page #{}----".format(page.page_number))
            print(
                "Page has width: {} and height: {}, measured with unit: {}".format(
                    page.width, page.height, page.unit
                )
            )

            for line_idx, line in enumerate(page.lines):
                print(
                    "...Line # {} has text content '{}' within bounding box '{}'".format(
                        line_idx,
                        line.content,
                        format_bounding_box(line.polygon),
                    )
                )

            for word in page.words:
                print(
                    "...Word '{}' has a confidence of {}".format(
                        word.content, word.confidence
                    )
                )

        print("----------------------------------------")


if __name__ == "__main__":
    analyze_read()


Document contains content:  STARBUCKS
현금(소득공제) 화성시청점 T:1522-3232 경기 화성 시청로 113 대표 : 충데이비드호선 201-81-21515 [매장#3794. POS 01] 2021-08-11 13:02:33
주문번호 A-01
T)콜드 브루 4,500 1 4,500
I-T) 아메리카노 4. 100 2 8,200
합계
12,700
결제금액
12, 700
(부가세포함)
(1, 154)
카카오페이
12, 700
결제 수단 :
카카오페이머니(머니)
바코드 번호 : 281006 ************** 0800
승인 번호 :
29508878
현금영수증 발급 500
010 -****- 2995
승인번호: 177263830 현금영수증 문의 : 126-1-1
결제수단 변경은 구입하신 매장에서 가능하며, 반드시 구매 영수증과 원거래 결제수단을 지참하셔야 합니다. (변경 가능 기간 : ~ 2021-08-25)
www. starbucks. co. kr
*20130482101130749* 본 영수증은 BPA Free 종이를 사용합니다.
고객요청에의해 일회용컵이 제공되었습니다
Document contains handwritten content
----Analyzing Read from page #1----
Page has width: 420.0 and height: 560.0, measured with unit: pixel
...Line # 0 has text content 'STARBUCKS' within bounding box '[109.0, 61.0], [272.0, 59.0], [272.0, 82.0], [109.0, 84.0]'
...Line # 1 has text content '현금(소득공제)' within bounding box '[121.0, 93.0], [260.0, 90.0], [261.0, 105.0], [121.0, 108.0]'
...Line # 2 has text content '화성시청점' within bo