<a href="https://colab.research.google.com/github/SunSlick2/booktrade/blob/main/Test_EASYOCR_v2debug.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import mss
import mss.tools
from PIL import Image
import easyocr
import re
import os
import sys
import numpy as np
import cv2

# --- Set OpenMP environment variable to suppress warnings (must be at the very top) ---
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

# --- Configuration ---
# Define the snip coordinates as (top_left_x, top_left_y) and (bottom_right_x, bottom_right_y)
top_left_x = 331
top_left_y = 383
bottom_right_x = 418
bottom_right_y = 406

# Calculate width and height from the provided coordinates
capture_region = {
    "left": top_left_x,
    "top": top_left_y,
    "width": bottom_right_x - top_left_x,
    "height": bottom_right_y - top_left_y
}

# --- Output File Path Configuration ---
# This is where the extracted date will be written.
# Based on your previous traceback, it seems you intend for it to be in a 'Book Trade' subfolder.
# Ensure this path is correct and accessible.
# If 'Book Trade' is a fixed subfolder relative to your script's location:
script_dir = os.path.dirname(os.path.abspath(__file__))
output_dir = os.path.join(script_dir, "Book Trade")
output_file_name = "extracted_date_easyocr.txt"
output_file_path = os.path.join(output_dir, output_file_name)

# If 'Book Trade' is *not* a subfolder of your script, and it's a fixed absolute path:
# output_file_path = "C:\\Users\\abc\\OneDrive\\Documents\\Tools\\PythonScripts\\Book Trade\\extracted_date_easyocr.txt"


# Define the directory where EasyOCR models are stored locally (e.g., after manual download)
easyocr_model_dir = os.path.join(os.path.expanduser('~'), '.EasyOCR', 'model')

# Initialize EasyOCR reader once globally for efficiency
try:
    sys.stdout.write("Attempting to initialize EasyOCR reader...\n")
    sys.stdout.flush()
    reader = easyocr.Reader(
        ['en'], # Languages to use for OCR. 'en' for English.
        model_storage_directory=easyocr_model_dir,
        download_enabled=False # Crucial: tells EasyOCR NOT to try downloading from the internet
    )
    sys.stdout.write("✅ EasyOCR reader initialized successfully from local models.\n")
    sys.stdout.flush()
except Exception as e:
    sys.stderr.write(f"❌ Error initializing EasyOCR reader. Please ensure models are in '{easyocr_model_dir}' and are correct: {e}\n")
    sys.stderr.flush()
    sys.exit(1) # Exit if reader cannot be initialized


def capture_screen_region(region):
    """Captures a specific region of the screen."""
    try:
        sys.stdout.write("Capturing screen region...\n")
        sys.stdout.flush()
        with mss.mss() as sct:
            sct_img = sct.grab(region)
            # Convert to PIL Image for processing
            img = Image.frombytes("RGB", sct_img.size, sct_img.rgb)
            return img
    except Exception as e:
        sys.stderr.write(f"❌ Error capturing screen: {e}\n")
        sys.stderr.flush()
        return None

def ocr_image_for_date(image):
    """Performs OCR on an image and tries to find a date using EasyOCR."""
    if image is None:
        return None

    try:
        sys.stdout.write("Performing OCR with EasyOCR...\n")
        sys.stdout.flush()

        # Convert PIL Image to NumPy array
        image_np = np.array(image)

        # --- Image Pre-processing for better OCR accuracy ---
        # 1. Convert to grayscale
        gray_image = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)

        # 2. Apply Otsu's thresholding to get a binary image (black and white)
        _, binary_image = cv2.threshold(gray_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

        # EasyOCR can work with binary images (NumPy arrays)
        results = reader.readtext(binary_image)

        full_ocr_text = ""
        for (bbox, text, prob) in results:
            full_ocr_text += text + " "

        sys.stdout.write(f"DEBUG: EasyOCR Raw Text (concatenated): '{full_ocr_text.strip()}'\n")
        sys.stdout.flush()

        # Regular expression for dd/mm/yyyy format
        date_pattern = r'\b(\d{1,2})[/\-.](0[1-9]|1[0-2])[/\-.]((?:19|20)\d{2})\b'

        for (bbox, text, prob) in results:
            match = re.search(date_pattern, text)
            if match:
                day = match.group(1).zfill(2)
                month = match.group(2)
                year = match.group(3)
                return f"{day}/{month}/{year}"

        match = re.search(date_pattern, full_ocr_text)
        if match:
            day = match.group(1).zfill(2)
            month = match.group(2)
            year = match.group(3)
            return f"{day}/{month}/{year}"

        return None

    except Exception as e:
        sys.stderr.write(f"❌ Error during EasyOCR processing or date parsing: {e}\n")
        sys.stderr.flush()
        return None

def write_result_to_file(value, file_path):
    """Writes the extracted value to a specified file."""
    try:
        sys.stdout.write(f"Attempting to write result to file: {file_path}\n")
        sys.stdout.flush()
        # Ensure the directory exists before trying to open the file
        # This will create 'Book Trade' if it doesn't exist within the script's directory
        os.makedirs(os.path.dirname(file_path), exist_ok=True)
        sys.stdout.write(f"Directory '{os.path.dirname(file_path)}' ensured to exist.\n")
        sys.stdout.flush()

        with open(file_path, "w") as f:
            f.write(value if value is not None else "")
        sys.stdout.write(f"✔️ File '{file_path}' written successfully.\n")
        sys.stdout.flush()
    except Exception as e:
        sys.stderr.write(f"❌ CRITICAL ERROR: Could not write to file '{file_path}': {e}\n")
        sys.stderr.flush()
        # You might want to re-raise the exception or handle it more severely here
        # if file writing is absolutely essential.

def main():
    sys.stdout.write("Entering main function...\n")
    sys.stdout.flush()
    extracted_date = None
    try:
        screenshot = capture_screen_region(capture_region)
        if screenshot:
            screenshot_debug_path = "captured_date_region_debug.png"
            screenshot.save(screenshot_debug_path)
            sys.stdout.write(f"Captured region saved to {screenshot_debug_path}\n")
            sys.stdout.flush()

            extracted_date = ocr_image_for_date(screenshot)

    except Exception as e:
        sys.stderr.write(f"❌ An unexpected error occurred in main: {e}\n")
        sys.stderr.flush()
    finally:
        sys.stdout.write("Exiting main function (finally block)...\n")
        sys.stdout.flush()
        write_result_to_file(extracted_date, output_file_path)
        if extracted_date:
            sys.stdout.write(f"✔️ Successfully extracted and processed date: '{extracted_date}'\n")
        else:
            sys.stdout.write(f"⚠️ No date found or error occurred in OCR. Wrote empty string to output file.\n")
        sys.stdout.flush()


if __name__ == "__main__":
    main()
    sys.stdout.write("--- Script finished ---\n")
    sys.stdout.flush()