In [14]:
from PIL import Image
import os
import matplotlib.pyplot as plt
# Morphological filtering
from skimage.morphology import opening
from skimage.morphology import disk

# Data handling
import numpy as np

# Connected component filtering
import cv2

black = 0
white = 255
threshold = 170

# Define the minimum size (number of pixels) a component should consist of
minimum_size = 100

# Define the folder path where the images are located
folder_path = "Hiragana/か"
# Define the list of excluded image filenames
excluded_files = [32,61]

# Iterate over all the images in the folder
for filename in os.listdir(folder_path):
    if filename.endswith(".png"):
         # Split the filename into character and number parts
        character, number = filename.split("_")
        actual = int(number.split(".")[0])
        if actual in excluded_files:
            # Skip this image if it is in the list of excluded files
            continue

        # Load the image
        img = Image.open(os.path.join(folder_path, filename)).convert("LA")
        pixels = np.array(img)[:,:,0]

        # Apply the thresholding
        pixels[pixels > threshold] = white
        pixels[pixels < threshold] = black

        # Morphological opening
        blobSize = 1 # Select the maximum radius of the blobs you would like to remove
        structureElement = disk(blobSize)  # you can define different shapes, here we take a disk shape
        # We need to invert the image such that black is background and white foreground to perform the opening
        pixels = np.invert(opening(np.invert(pixels), structureElement))

        newImg = Image.fromarray(pixels).convert('RGB')
        nb_components, output, stats, centroids = cv2.connectedComponentsWithStats(np.invert(pixels), connectivity=8)

        # Create the new filename for the modified image
        new_filename = f"{character}_{number}"

        # Save the modified image with the new filename
        newImg.save(os.path.join(folder_path, new_filename))