In [None]:
# Importing of general libraries
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
import pandas as pd
import sklearn as sk
import seaborn as sns

# Image libraries
from PIL import Image
import cv2

# Importing tsorflow libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense


In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Load the dataset
train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
train_dataset = train_datagen.flow_from_directory(
    'Dataset/Hiragana/model1',
    target_size=(32, 32),
    batch_size=32,
    class_mode='categorical',
    subset='training')
val_dataset = train_datagen.flow_from_directory(
    'Dataset/Hiragana/model1',
    target_size=(32, 32),
    batch_size=32,
    class_mode='categorical',
    subset='validation')

In [None]:
fprop = fm.FontProperties(fname='NotoSansJP-Bold.ttf')
print("hello", matplotlib.matplotlib_fname())

# Set the path to the parent folder containing the subfolders with images
parent_folder = "Images/alpha"

# Create a list to store the number of images in each subfolder
image_counts = []

# Loop through each subfolder
for folder_name in os.listdir(parent_folder):
    folder_path = os.path.join(parent_folder, folder_name)
    
    # Check if the subfolder is actually a folder (not a file)
    if os.path.isdir(folder_path):
        # Count the number of files in the subfolder and append to image_counts list
        count = len(os.listdir(folder_path))
        image_counts.append(count)


# Create the countplot with subfolder names as labels
plt.figure(figsize=(10,6))
plt.title("Number of Images in Each Hiragana Character Subfolder")
plt.xlabel("Subfolder Names")
plt.ylabel("Number of Images")
plt.xticks(rotation=0, fontproperties=fprop ,fontsize=12)
plt.bar(os.listdir(parent_folder), image_counts)
plt.show()

In [None]:
# Build the CNN model
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(32, 32, 3)),
    tf.keras.layers.MaxPooling2D((2,2)),
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2,2)),
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(5, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
# Train the model
history = model.fit(train_dataset,
                    validation_data=val_dataset,
                    epochs=40)

# Evaluate the model
test_datagen = ImageDataGenerator(rescale=1./255)
test_dataset = test_datagen.flow_from_directory(
    'Hiragan_test',
    target_size=(32, 32),
    batch_size=32,
    class_mode='categorical')
test_loss, test_acc = model.evaluate(test_dataset)
print('Test accuracy:', test_acc)

In [None]:
plt.figure(figsize = (5,9))
pred = model.predict(test_dataset)
y_pred = np.argmax(pred, axis=1)
y_true = np.argmax(test_dataset, axis=1)
cm = confusion_matrix(y_true,y_pred)
sns.heatmap(cm, annot = True, fmt = 'd', xticklabels = class_names, yticklabels = class_names)

# Testing

In [None]:
img = Image.open('Test_imagees/Hiragana_letter_A.png')

# Morphological filtering
from skimage.morphology import opening
from skimage.morphology import disk
# Connected component filtering
import cv2
# Data handling
test_datagen = ImageDataGenerator(rescale=1./255)

# Convert image to RGB mode
img = img.convert('RGB')
img = img.resize((32, 32))

black = 0
white = 255
threshold = 250

# Load the image
pixels = np.array(img)[:,:,0]

# Apply the thresholding
pixels[pixels > threshold] = white
pixels[pixels < threshold] = black

# Morphological opening
blobSize = 1 # Select the maximum radius of the blobs you would like to remove
structureElement = disk(blobSize)  # you can define different shapes, here we take a disk shape
# We need to invert the image such that black is background and white foreground to perform the opening
pixels = np.invert(opening(np.invert(pixels), structureElement))

newImg = Image.fromarray(pixels).convert('RGB')
nb_components, output, stats, centroids = cv2.connectedComponentsWithStats(np.invert(pixels), connectivity=8)

plt.imshow(newImg)
plt.show()
img_array = np.array(newImg)
img_array = np.expand_dims(img_array, axis=0)
img_array = test_datagen.flow(img_array).next()

import os

# Get the class names
class_names = sorted(os.listdir('Hiragan_test'))
print("Class names:", class_names)

prediction = model.predict(img_array)
predicted_label = class_names[np.argmax(prediction)]

print("Predicted label:", predicted_label)