In [7]:
import easyocr as eo
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import cv2
import re
import pandas as pd
import os

In [2]:
from PIL import Image, ExifTags
import numpy as np

def get_oriented_image_array(image_path):
    """
    Loads an image, checks its EXIF orientation metadata, and applies the correct rotation.
    Returns a NumPy array suitable for OCR or OpenCV processing.
    """
    image = Image.open(image_path)

    try:
        for orientation in ExifTags.TAGS:
            if ExifTags.TAGS[orientation] == 'Orientation':
                break

        exif = image._getexif()
        if exif is not None:
            orientation_value = exif.get(orientation, None)

            if orientation_value == 3:
                image = image.rotate(180, expand=True)
            elif orientation_value == 6:
                image = image.rotate(270, expand=True)
            elif orientation_value == 8:
                image = image.rotate(90, expand=True)
    except Exception as e:
        print("❗ Orientation adjustment skipped:", e)

    return np.array(image)


In [3]:
def preprocess_for_ocr(image_np):
    gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)

    # Resize if needed (EasyOCR prefers readable font size)
    if gray.shape[0] < 1000:
        gray = cv2.resize(gray, None, fx=2.0, fy=2.0, interpolation=cv2.INTER_LINEAR)

    # Slight denoising
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)

    # Adaptive thresholding to make text pop
    thresh = cv2.adaptiveThreshold(
        blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY, 31, 15
    )

    return thresh


In [11]:
# === STEP 3: Cut into Slices ===
def slice_image(image_np, slice_height_ratio=0.3, overlap_ratio=0.3):
    h, w = image_np.shape[:2]
    slice_h = int(h * slice_height_ratio)
    overlap = int(slice_h * overlap_ratio)
    
    slices = []
    positions = []
    y = 0
    while y < h:
        y_end = min(y + slice_h, h)
        slices.append(image_np[y:y_end, :])
        positions.append((y, y_end))
        y += slice_h - overlap  # move down with overlap
    return slices, positions

# === STEP 4: Run OCR on All Slices ===
def run_ocr_on_slices(reader, slices, positions):
    all_results = []
    for idx, (img_slice, (y1, y2)) in enumerate(zip(slices, positions)):
        results = reader.readtext(img_slice)
        for bbox, text, conf in results:
            # Shift bbox vertically back to full image coordinates
            shifted_bbox = [(x, y + y1) for (x, y) in bbox]
            all_results.append((shifted_bbox, text, conf))
    return all_results

In [12]:
# === STEP 5: Display Results ===
def display_results(results, image_np, result_path):
    image = image_np.copy()
    for bbox, text, confidence in results:
        pts = [tuple(map(int, point)) for point in bbox]
        cv2.polylines(image, [np.array(pts)], isClosed=True, color=(0, 255, 0), thickness=3)
        cv2.putText(image, text, pts[0], cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    plt.figure(figsize=(12, 12))
    plt.imshow(image_rgb)
    plt.axis('off')
    plt.title("OCR Results")
    plt.show()
    os.makedirs('results', exist_ok=True)
    cv2.imwrite(f'results/{result_path}.jpg', image)


In [14]:
# === MAIN EXECUTION ===
img_path = 'fantastiko/fantastiko_1.jpeg'
image_np = get_oriented_image_array(img_path)
preprocessed_img = preprocess_for_ocr(image_np)

reader = eo.Reader(['en', 'bg'])

slices, positions = slice_image(preprocessed_img)
results = run_ocr_on_slices(reader, slices, positions)

display_results(results, image_np, 'fantastiko_2_sliced')

# Print text results
for bbox, text, conf in results:
    pts = [tuple(map(int, point)) for point in bbox]
    print(f'{pts} - {text} (conf: {conf:.2f})')

Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


KeyboardInterrupt: 