In [1]:
!python --version

Python 3.9.23


In [2]:
import easyocr
import time
import tracemalloc #used to measure memory usage of your Python program.
import cv2

In [21]:
IMAGE_PATH = "C:\hope\Text Extraction Benchmarking\spanish-custom-text-watch-out-aluminum-sign-12-x-18.webp"
img = cv2.imread(IMAGE_PATH)
resized = cv2.resize(img, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
gray = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY)
GROUND_TRUTH = ['CUIDADO', 'SU', 'MENSAJE', 'AQUI']  # Manually set based on actual image
_, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)

# Save preprocessed image
cv2.imwrite("resized_preprocessed.png", thresh)

True

In [22]:
#Start measuring time and memory
start_time = time.time()
tracemalloc.start() #starts tracking memory allocation.

In [23]:
reader = easyocr.Reader(['de'])  # Change to the correct language
results = reader.readtext("resized_preprocessed.png")

predicted_text = [text for (_, text, _) in results]
# Flatten the list: split phrases into words
predicted_text = [word for phrase in predicted_text for word in phrase.split()]


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


In [24]:
# results  in the form of tuples which returns bounding box dimension, predicted text and confidence interval(how confident the model is about its prediction.) of the model 
# for predicted text we jus need text from the results 

In [25]:
predicted_text

['CUIDADO', 'SU', 'MENSAJE', 'AQUI']

In [26]:
# Stop measuring time and memory
end_time = time.time()
current, peak = tracemalloc.get_traced_memory() #peak is the highest memory used at any point since tracemalloc.start() we need peak
tracemalloc.stop()

In [27]:
# Accuracy calculation
correct_words = sum(1 for word in predicted_text if word in GROUND_TRUTH)
total_words = len(GROUND_TRUTH)
accuracy = (correct_words / total_words) * 100 if total_words else 0

In [28]:
# Print evaluation results
output = {
    "Tool": "EasyOCR",
    "Language(s)": "English, German",
    "Free or Paid": "Free (Open-source)",
    "Text Extracted": ' '.join(predicted_text), # converts the list to a single string
    "Correct Words": f"{correct_words} / {total_words}",
    "Accuracy": f"{accuracy:.2f}%",
    "Time Taken": f"{end_time - start_time:.2f} seconds",
    "Memory Used": f"{peak / 10**6:.2f} MB"
}


In [29]:
output

{'Tool': 'EasyOCR',
 'Language(s)': 'English, German',
 'Free or Paid': 'Free (Open-source)',
 'Text Extracted': 'CUIDADO SU MENSAJE AQUI',
 'Correct Words': '4 / 4',
 'Accuracy': '100.00%',
 'Time Taken': '17.28 seconds',
 'Memory Used': '60.04 MB'}