In [None]:
# Basic Reading of an image input and binarizing it into only Black and White to perform better visualization of text

import cv2
import numpy as np
import pytesseract

# Load the image
img = cv2.imread('image.png')

# Convert the image to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Apply thresholding
_, thresholded = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

cv2.imwrite("binary_image.jpg", thresholded)

text = pytesseract.image_to_string(thresholded, lang='eng')
print(text)

In [None]:
# Showing the binarized image and performing connected component analysis

import cv2
import numpy as np

# Load the binary image
binary_img = cv2.imread("binary_image.jpg", 0)

# Perform connected component analysis
num_components, labels, stats, centroids = cv2.connectedComponentsWithStats(binary_img, connectivity=8)

# Display the results
for i in range(1, num_components):
    x, y, w, h, area = stats[i]
    cv2.rectangle(binary_img, (x, y), (x + w, y + h), (255, 0, 0), 2)

# Show the image with connected components
cv2.imshow("Connected Components", binary_img)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [None]:
# Adding character bounding boxes to the binarized images

import cv2
import numpy as np

# Load the binary image
binary_img = cv2.imread("binary_image.jpg", 0)

# Find the contours of the connected components
contours, _ = cv2.findContours(binary_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# Loop over each contour and compute the bounding box
for contour in contours:
    x, y, w, h = cv2.boundingRect(contour)
    cv2.rectangle(binary_img, (x, y), (x + w, y + h), (255, 0, 0), 2)

# Save the image with the character bounding boxes
cv2.imwrite("character_bounding_boxes.jpg", binary_img)