Extract Text Coordinates using OCR (pytesseract)
We will use pytesseract (Tesseract OCR) to detect text and get bounding boxes.

In [7]:
import os
import json
import numpy as np
import cv2
from PIL import Image, ImageDraw
import pytesseract

In [8]:


# Define paths
augmented_folder = "../artifacts/augmented_outputs"
mask_folder = "../artifacts/mask_folder"
coords_folder = "../artifacts/outputs"

# Create output directories
os.makedirs(mask_folder, exist_ok=True)
os.makedirs(coords_folder, exist_ok=True)

def extract_text_coordinates(image_path):
    """Extract bounding boxes of text using OCR (Tesseract)."""
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Use Tesseract to detect text and get bounding boxes
    data = pytesseract.image_to_data(gray, output_type=pytesseract.Output.DICT)

    text_coordinates = []
    for i in range(len(data["text"])):
        if data["text"][i].strip():  # Ignore empty results
            x, y, w, h = data["left"][i], data["top"][i], data["width"][i], data["height"][i]
            text_coordinates.append({"x": x, "y": y, "width": w, "height": h, "text": data["text"][i]})

    return text_coordinates

def create_mask(image_path, text_coordinates, mask_path):
    """Generate binary mask from text bounding boxes."""
    image = Image.open(image_path)
    mask = Image.new("L", image.size, 0)  # Black background (0)

    draw = ImageDraw.Draw(mask)
    for box in text_coordinates:
        x, y, w, h = box["x"], box["y"], box["width"], box["height"]
        draw.rectangle([x, y, x + w, y + h], fill=255)  # White text (255)

    mask.save(mask_path)

def process_images(folder):
    """Process all images in the augmented folder."""
    for image_file in os.listdir(folder):
        if image_file.endswith(".png") or image_file.endswith(".jpg"):
            image_path = os.path.join(folder, image_file)
            
            # Extract text coordinates
            text_coords = extract_text_coordinates(image_path)

            # Save coordinates
            coords_path = os.path.join(coords_folder, image_file.replace(".png", ".json").replace(".jpg", ".json"))
            with open(coords_path, "w") as f:
                json.dump(text_coords, f, indent=4)

            # Generate and save mask
            mask_path = os.path.join(mask_folder, image_file.replace(".png", "_mask.png").replace(".jpg", "_mask.png"))
            create_mask(image_path, text_coords, mask_path)

    print("Segmentation mask generation complete. Masks and coordinates saved.")

# Run processing on the augmented images
process_images(augmented_folder)


Segmentation mask generation complete. Masks and coordinates saved.
