In [1]:
import requests
import gradio as gr
from PIL import Image, ImageDraw, ImageFont
import json
import platform


def request_vision(image_path):
    endpoint = f"https://computervision06.cognitiveservices.azure.com/computervision/imageanalysis:analyze"
    api_key = "c7703f4920df4cda8965e7e170e94214"

    headers = {
        "Content-Type": "application/octet-stream",
        "Ocp-Apim-Subscription-Key": api_key
    }

    params = {
        'api-version': '2024-02-01',
        'features': 'read,caption'
    }

    with open(image_path, 'rb') as image:
        image_data = image.read()

    response = requests.post(endpoint,
                             headers=headers,
                             params=params,
                             data=image_data)
    
    response_json = response.json()
    return response_json


def change_image(image_path):

    image = Image.open(image_path)
    draw = ImageDraw.Draw(image)

    font_size = 20
    if platform.system() == 'Darwin':
        font = ImageFont.truetype('AppleGothic.ttf', size=font_size)
    elif platform.system() == 'Windows':
        font = ImageFont.truetype('malgun.ttf', size=font_size)
    else:
        font = ImageFont.load_default(size=font_size)
    
    response_json = request_vision(image_path=image_path)
    
    output_data = []

    if 'readResult' in response_json and 'blocks' in response_json['readResult']:
        block_list = response_json['readResult']['blocks']

        for block in block_list:
            line_list = block['lines']
            for line in line_list:
                text = line['text']
                bounding_polygon = line['boundingPolygon']
                polygon = list(map(lambda p: (p['x'], p['y']), bounding_polygon))
                draw.polygon(polygon, outline='red', fill=None, width=3)
                draw.text((bounding_polygon[3]['x'], bounding_polygon[3]['y'] + 3), text=text, fill='green', font=font)
                
                output_data.append({
                    "text": text,
                    "bounding_polygon": bounding_polygon
                })

    with open('ocr_output.json', 'w') as json_file:
        json.dump(output_data, json_file, indent=4)

    return image

with gr.Blocks() as demo:

    input_image = gr.Image(label='이미지 선택', type='filepath', width=800)
    output_image = gr.Image(label='결과 이미지', type='pil', interactive=False, width=800)

    input_image.change(fn=change_image, inputs=[input_image], outputs=[output_image])

demo.launch()


  from .autonotebook import tqdm as notebook_tqdm


Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.




In [7]:
## pdf를 ocr로 인식하는 법

In [4]:
pip install PyMuPDF

Collecting PyMuPDF
  Downloading PyMuPDF-1.24.10-cp312-none-win_amd64.whl.metadata (3.4 kB)
Collecting PyMuPDFb==1.24.10 (from PyMuPDF)
  Downloading PyMuPDFb-1.24.10-py3-none-win_amd64.whl.metadata (1.4 kB)
Downloading PyMuPDF-1.24.10-cp312-none-win_amd64.whl (3.2 MB)
   ---------------------------------------- 0.0/3.2 MB ? eta -:--:--
   ---------------------------------------- 3.2/3.2 MB 31.9 MB/s eta 0:00:00
Downloading PyMuPDFb-1.24.10-py3-none-win_amd64.whl (13.2 MB)
   ---------------------------------------- 0.0/13.2 MB ? eta -:--:--
   ------------------------ --------------- 8.1/13.2 MB 41.8 MB/s eta 0:00:01
   ---------------------------------------- 13.2/13.2 MB 36.0 MB/s eta 0:00:00
Installing collected packages: PyMuPDFb, PyMuPDF
Successfully installed PyMuPDF-1.24.10 PyMuPDFb-1.24.10
Note: you may need to restart the kernel to use updated packages.




In [1]:
import requests
import gradio as gr
from PIL import Image, ImageDraw, ImageFont
import json
import platform
import fitz  # PyMuPDF

def pdf_to_images(pdf_path):
    doc = fitz.open(pdf_path)
    images = []
    for page_num in range(len(doc)):
        page = doc.load_page(page_num)
        pix = page.get_pixmap()
        img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
        images.append(img)
    return images

def request_vision(image):
    endpoint = f"https://computervision06.cognitiveservices.azure.com/computervision/imageanalysis:analyze"
    api_key = "c7703f4920df4cda8965e7e170e94214"

    headers = {
        "Content-Type": "application/octet-stream",
        "Ocp-Apim-Subscription-Key": api_key
    }

    params = {
        'api-version': '2024-02-01',
        'features': 'read,caption'
    }

    image_data = image.tobytes()

    response = requests.post(endpoint,
                             headers=headers,
                             params=params,
                             data=image_data)
    
    response_json = response.json()
    return response_json

def change_image(image):
    draw = ImageDraw.Draw(image)

    font_size = 20
    if platform.system() == 'Darwin':
        font = ImageFont.truetype('AppleGothic.ttf', size=font_size)
    elif platform.system() == 'Windows':
        font = ImageFont.truetype('malgun.ttf', size=font_size)
    else:
        font = ImageFont.load_default(size=font_size)
    
    response_json = request_vision(image)
    
    output_data = []

    if 'readResult' in response_json and 'blocks' in response_json['readResult']:
        block_list = response_json['readResult']['blocks']

        for block in block_list:
            line_list = block['lines']
            for line in line_list:
                text = line['text']
                bounding_polygon = line['boundingPolygon']
                polygon = list(map(lambda p: (p['x'], p['y']), bounding_polygon))
                draw.polygon(polygon, outline='red', fill=None, width=3)
                draw.text((bounding_polygon[3]['x'], bounding_polygon[3]['y'] + 3), text=text, fill='green', font=font)
                
                output_data.append({
                    "text": text,
                    "bounding_polygon": bounding_polygon
                })

    with open('ocr_output.json', 'w') as json_file:
        json.dump(output_data, json_file, indent=4)

    return image

def process_pdf(pdf_path):
    images = pdf_to_images(pdf_path)
    processed_images = [change_image(image) for image in images]
    return processed_images

with gr.Blocks() as demo:

    input_pdf = gr.File(label='PDF 파일 선택', type='filepath')
    output_images = gr.Gallery(label='결과 이미지', type='pil', interactive=False)

    input_pdf.change(fn=process_pdf, inputs=[input_pdf], outputs=[output_images])

demo.launch()


  from .autonotebook import tqdm as notebook_tqdm


Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


