In [2]:
import cv2
import os
import matplotlib.pyplot as plt
from skimage.io import imsave

# Path to the folder containing images
input_dir = 'dataset'
output_dir = 'preprocessed'

# Function to apply reverse thresholding
def reverse_threshold(img, threshold):
    # Convert to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    # Invert colors
    inv = cv2.bitwise_not(gray)
    # Apply thresholding
    thresh = cv2.threshold(inv, threshold, 255, cv2.THRESH_BINARY)[1]
    # Invert colors again
    thresh = cv2.bitwise_not(thresh)
    return thresh

# Function to crop image and resize it
def crop_and_resize(image_path):
    # Load image
    img = cv2.imread(image_path)
    # Apply reverse thresholding
    threshold = 200
    thresh_img = reverse_threshold(img, threshold)
    # Find contours
    contours, hierarchy = cv2.findContours(thresh_img.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    # Get largest contour
    c = max(contours, key=cv2.contourArea)
    # Get bounding rectangle
    x, y, w, h = cv2.boundingRect(c)
    # Crop and resize image
    crop_img = img[y:y + h, x:x + w]
    resize_img = cv2.resize(crop_img, (7, 22))
    return resize_img

# Loop through all subdirectories of the input directory
for root, dirs, files in os.walk(input_dir):
    # Create corresponding subdirectories in the output directory
    for subdir in dirs:
        subdir_path = os.path.join(output_dir, os.path.relpath(os.path.join(root, subdir), input_dir))
        if not os.path.exists(subdir_path):
            os.makedirs(subdir_path)
    # Preprocess all images in the current subdirectory and save them to the output directory
    for file in files:
        if file.endswith(".png"):
            image_path = os.path.join(root, file)
            preprocessed_image = crop_and_resize(image_path)
            output_path = os.path.join(output_dir, os.path.relpath(root, input_dir), file)
            # Save preprocessed image
            plt.imsave(output_path, preprocessed_image, cmap='gray')
