In [None]:
import easyocr

# Initialize the reader
reader = easyocr.Reader(['en'])  # Specify language(s)

# Perform OCR
results = reader.readtext(r"C:\Users\mthil\local\DE\Data-Engineering\ocr\ocr-images\20241223_095814.jpg")
for (bbox, text, confidence) in results:
    print(f"Detected text: {text} (Confidence: {confidence})")

In [103]:
def group_items(results, vertical_threshold=430, horizontal_separation=300):
    """
    Groups OCR results into clusters based on vertical proximity and logical horizontal separation.

    Args:
        results (list): List of OCR results containing bounding boxes and text.
        vertical_threshold (int): Maximum vertical distance between items to be grouped.
        horizontal_separation (int): Minimum horizontal distance to separate groups.

    Returns:
        list: List of grouped items, where each group is a list of texts.
    """
    def get_y_range(bbox):
        """Return the vertical range (top and bottom) of a bounding box."""
        top_left, _, bottom_right, _ = bbox
        return top_left[1], bottom_right[1]

    def get_x_range(bbox):
        """Return the horizontal range (left and right) of a bounding box."""
        top_left, _, bottom_right, _ = bbox
        return top_left[0], bottom_right[0]

    # Sort results by x-coordinate (left) first, then by y-coordinate (top)
    results.sort(key=lambda x: (get_x_range(x[0])[0], get_y_range(x[0])[0]))

    columns = []
    for item in results:
        bbox, text, confidence = item
        x_left, x_right = get_x_range(bbox)
        y_top, y_bottom = get_y_range(bbox)

        # Check if the item belongs to an existing column
        added_to_column = False
        for column in columns:
            # Compare with the last item's bounding box in the column
            last_bbox = column[-1][0]
            last_x_left, last_x_right = get_x_range(last_bbox)

            # Use broader horizontal separation
            if abs(x_left - last_x_left) <= horizontal_separation:
                column.append(item)
                added_to_column = True
                break

        # If not added to any column, start a new column
        if not added_to_column:
            columns.append([item])

    # Group items within each column based on vertical proximity
    final_groups = []
    for column in columns:
        column.sort(key=lambda x: get_y_range(x[0])[0])  # Sort by Y-coordinate within column
        current_group = []
        for item in column:
            bbox, text, confidence = item
            y_top, y_bottom = get_y_range(bbox)

            if not current_group:
                current_group.append(item)
                continue

            # Compare with the last item's bounding box in the current group
            last_bbox = current_group[-1][0]
            last_y_top, last_y_bottom = get_y_range(last_bbox)

            # Use broader vertical threshold
            if y_top - last_y_bottom <= vertical_threshold:
                current_group.append(item)
            else:
                final_groups.append(current_group)
                current_group = [item]

        # Add the last group in the column
        if current_group:
            final_groups.append(current_group)

    # Merge small groups or isolated items into larger ones (post-processing)
    merged_groups = []
    temp_group = []
    for group in final_groups:
        if len(group) <= 1 and len(temp_group) > 0:
            # Merge with the last group if isolated
            temp_group.extend(group)
        else:
            if temp_group:
                merged_groups.append(temp_group)
            temp_group = group

    if temp_group:
        merged_groups.append(temp_group)

    # Convert groups to text-only format
    grouped_texts = [
        [text for _, text, _ in group]
        for group in merged_groups
    ]

    return grouped_texts


# Single File

In [None]:
import easyocr

# Initialize the reader
reader = easyocr.Reader(['en']) 

# Perform OCR
results = reader.readtext(r"C:\Users\mthil\local\DE\Data-Engineering\ignore\ocr-images\20241223_123132.jpg")
for (bbox, text, confidence) in results:
    print(f"Detected text: {text} (Confidence: {confidence})")

In [8]:
reader_model_lang = reader.getChar

In [9]:
reader_model_lang

<bound method Reader.getChar of <easyocr.easyocr.Reader object at 0x000001AF8E5716D0>>

In [None]:
grouped_text = group_items(results)

# Display grouped text
for i, group in enumerate(grouped_text, start=1):
    print(f"Group {i}: {group}")

# Multi File

In [None]:
import os
import easyocr

# Directory containing the images
input_folder = r"C:\Users\mthil\local\DE\Data-Engineering\ocr\ocr-images"
# Directory where the results will be stored
output_folder = r"C:\Users\mthil\local\DE\Data-Engineering\ocr\ocr-results"

# Create the output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Initialize EasyOCR reader
reader = easyocr.Reader(['en'])  # Add other languages if needed

# Loop through each image in the input folder
for image_file in os.listdir(input_folder):
    if image_file.lower().endswith(('.png', '.jpg', '.jpeg')):
        # Full path of the image
        image_path = os.path.join(input_folder, image_file)
        
        # Read text from the image
        results = reader.readtext(image_path)
        
        # File to store results (one text file per image)
        result_file_path = os.path.join(output_folder, f"{os.path.splitext(image_file)[0]}_ocr_results.txt")
        
        # Write results to the file
        with open(result_file_path, 'w', encoding='utf-8') as f:
            for result in results:
                f.write(f"{result},\n")
        
        print(f"Results saved for {image_file} in {result_file_path}")

print("OCR processing complete!")





In [None]:
# Process each OCR results file
for txt_file in os.listdir(output_folder):
    if txt_file.endswith("_ocr_results.txt"):
        txt_file_path = os.path.join(output_folder, txt_file)

        # Read OCR results from the file
        with open(txt_file_path, 'r', encoding='utf-8') as file:
            raw_results = file.readlines()

        # Convert raw string results to Python objects
        results = [eval(line.strip().rstrip(',')) for line in raw_results]

        # Apply grouping function
        grouped_text = group_items(results)

        # Save grouped results
        grouped_file_path = os.path.join(output_folder, f"grouped_{txt_file}")
        with open(grouped_file_path, 'w', encoding='utf-8') as file:
            for i, group in enumerate(grouped_text, start=1):
                file.write(f"Group {i}: {group}\n")

        print(f"Grouped results saved for {txt_file} as {grouped_file_path}")

print("Grouping process complete!")