# Introduction

In this notebook, we will perform text detection in images using OCR. We will start with image preprocessing and then conduct OCR using Tesseract, EasyOCR, and KerasOCR. By the end of this notebook, you will understand how to enhance image quality for OCR and extract text effectively from images.



# Install and Import Libraries

In [None]:
!apt-get update
!apt-get install -y tesseract-ocr

In [None]:
!pip install pytesseract
!pip install easyocr
!pip install keras-ocr

In [None]:
import cv2
import pytesseract
import easyocr
import keras_ocr
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

# Image Preprocessing

In [None]:
# Grayscale conversion
def get_grayscale(image):
    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# Noise removal
def remove_noise(image):
    return cv2.medianBlur(image, 5)

# Sharpening
def sharpen_image(image):
    kernel = np.array([[0, -1, 0], [-1, 5,-1], [0, -1, 0]])  # Sharpening kernel
    return cv2.filter2D(image, -1, kernel)

# Binarization
def binarize_image(image):
    return cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

# Helper display function
def display_image(image, title, is_rgb=False):
    if is_rgb:
        # OpenCV loads images in BGR format by default, so we have to convert BGR to RGB for proper color display
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        plt.imshow(image)
    else:
        plt.imshow(image, cmap='gray')
    plt.title(title)
    plt.axis('off')
    plt.show()

In [None]:
image_path = 'Basic_OCR.png'
image = cv2.imread(image_path)

display_image(image, "Original Image",  is_rgb=True)

gray_image = get_grayscale(image)
display_image(gray_image, "Grayscale Image")


denoised_image = remove_noise(gray_image)
display_image(denoised_image, "Denoised Image")


sharpened_image = sharpen_image(denoised_image)
display_image(sharpened_image, "Sharpened Image")


binarized_image = binarize_image(sharpened_image)
display_image(binarized_image, "Binarized Image")

# Text Detection using Tesseract

Drawing Bounding Boxes

In [None]:
image_path = 'Basic_OCR.png'
image = cv2.imread(image_path)

boxes = pytesseract.image_to_boxes(gray_image)
boxes_image = image.copy()

for box in boxes.splitlines():
    box = box.split(' ')
    x, y, w, h = int(box[1]), int(box[2]), int(box[3]), int(box[4])
    # Draw the rectangle
    cv2.rectangle(boxes_image, (x, boxes_image.shape[0] - y), (w, boxes_image.shape[0] - h), (0, 255, 0), 2)

display_image(boxes_image, "Image with Bounding Boxes", is_rgb=True)

Extract text

In [None]:
# Apply any type of preprocessing if needed
gray_image = get_grayscale(image)

extracted_text = pytesseract.image_to_string(gray_image)
print(extracted_text)

# Text Detection using EasyOCR

In [None]:
# Initializing  EasyOCR reader
reader = easyocr.Reader(['en'])  #  'en' for English language

In [None]:
image_path = 'Basic_OCR.png'
image = cv2.imread(image_path)

result = reader.readtext(image)

print (result)

Print only the detected words

In [None]:
for detection in result:
    print(detection[1])

Draw bounding box

In [None]:
for detection in result:
    bounding_box = detection[0]

    top_left = tuple([int(coord) for coord in bounding_box[0]])
    bottom_right = tuple([int(coord) for coord in bounding_box[2]])


    cv2.rectangle(image, top_left, bottom_right, (0, 255, 0), 2)

display_image(image, "Image with Bounding Boxes", is_rgb=True)

# Text Detection using KerasOCR

In [None]:
# Warning! Don't run this code unless you had an error when creating a pipeline for OCR using KerasOCR in the next code cell
#!pip install tensorflow==2.9.1 keras==2.9.0
# After runnig this code you will have to restart the session and import libraries again

In [None]:
# Create a pipeline for OCR using KerasOCR
pipeline = keras_ocr.pipeline.Pipeline()

In [None]:
image_path = 'Basic_OCR.png'

image = keras_ocr.tools.read(image_path)

# KerasOCR requires the image to be passed as a list, even if you're working with a single image
images = [image]

# Perform OCR on the image
prediction_groups = pipeline.recognize(images)

# Print the recognized text
for predictions in prediction_groups:
    for prediction in predictions:
        print(prediction[0])

KerasOCR provides a built-in method `keras_ocr.tools.drawAnnotations()` that simplifies the process of drawing bounding boxes and displaying text annotations on images.

In [None]:
fig, ax = plt.subplots(figsize=(10, 10))
keras_ocr.tools.drawAnnotations(image=images[0], predictions=prediction_groups[0], ax=ax)


plt.title("Image with KerasOCR Annotations")
plt.axis('off')
plt.show()