In [13]:
import gradio as gr
from pdf2image import convert_from_bytes
from PIL import Image, ImageDraw
import pytesseract

# OCR config
custom_oem_psm_config = r'--oem 3 --psm 6'

# Function to draw OCR segments
def draw_ocr_boxes(image):
    draw = ImageDraw.Draw(image)
    w, h = image.size
    data = pytesseract.image_to_data(image, config=custom_oem_psm_config, output_type=pytesseract.Output.DICT)

    for i in range(len(data["text"])):
        if int(data["conf"][i]) > 60 and data["text"][i].strip():
            (x, y, w_box, h_box) = (data["left"][i], data["top"][i], data["width"][i], data["height"][i])
            draw.rectangle([x, y, x + w_box, y + h_box], outline="red", width=2)
            draw.text((x, y - 10), data["text"][i][:15], fill="blue")

    return image

# Main function: take PDF and output annotated pages
def process_pdf(pdf_file):
    # Convert PDF pages to images
    images = convert_from_bytes(pdf_file, dpi=200, first_page=1, last_page=4)
    results = []

    for img in images:
        annotated_img = draw_ocr_boxes(img.copy())
        results.append(annotated_img)

    return results

In [14]:
gr.Interface(
    fn=process_pdf,
    inputs=gr.File(type="binary", label="Upload a PDF (Max 4 pages for demo)"),
    outputs=gr.Gallery(label="Annotated Pages", columns=2, height="auto"),
    title="PDF Segment Viewer",
    description="Upload a short PDF (3–4 pages). The app will convert pages to images, run OCR, and display bounding boxes."
).launch()

* Running on local URL:  http://127.0.0.1:7863
* To create a public link, set `share=True` in `launch()`.


