In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.metrics import precision_score, recall_score, f1_score
import os
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pathlib
import tensorflow_datasets as tfds
from sklearn.metrics import f1_score
import sklearn
import seaborn as sn
import pandas as pd

In [None]:
# Change the path to the folder containing the folders train and set
datasets_path = 'C:/Users/andma/OneDrive/Documenti/hiragana images/hiragana_images'
train_data_path = os.path.join(datasets_path, 'train')
test_data_path = os.path.join(datasets_path, 'test')

In [None]:
# Create the dataset
train_data, val_data = tf.keras.preprocessing.image_dataset_from_directory(
    train_data_path, 
    color_mode='grayscale',
    labels = 'inferred',
    image_size=(84, 84),
    shuffle=True,
    validation_split=0.25,
    seed=22,
    subset='both'

)
test_data = tf.keras.preprocessing.image_dataset_from_directory(
    test_data_path,
    color_mode='grayscale',
    labels = 'inferred',
    image_size=(84, 84),
    shuffle=False,
    validation_split=None,
    seed=22,
    subset=None)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D, Rescaling
from tensorflow.keras import regularizers

In [None]:
img_height = 84
img_width = 84

rescale_layer = Rescaling(1./255, input_shape=(img_height, img_width, 1))

# Create a new dataset by applying the rescaling layer to the tdataset
train_data_rescaled = train_data.map(lambda x, y: (rescale_layer(x), y))
val_data_rescaled = val_data.map(lambda x, y: (rescale_layer(x), y))
test_data_rescaled = test_data.map(lambda x, y: (rescale_layer(x), y))

In [None]:
filters = 32
num_classes = 50
dropout_coeff = 0.2

model = Sequential()

# First convolution layer
model.add(Conv2D(filters, (3,3), input_shape=(img_height, img_width, 1)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size = (2,2)))
model.add(Dropout(dropout_coeff, input_shape=(39, 39)))

# Second convolution layer
model.add(Conv2D(filters, (3,3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size = (2,2)))
model.add(Dropout(dropout_coeff, input_shape=(17, 17)))

# Third convolution layer
model.add(Conv2D(filters, (3,3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size = (2,2)))
model.add(Dropout(dropout_coeff, input_shape=(8, 8)))

# Fully Connected Layer #1
model.add(Flatten())
model.add(Dense(64))
model.add(Activation("tanh")) #relu

# Fully Connected Layer #2
model.add(Dense(64))
model.add(Activation("tanh")) #relu

# Last Fully Connected Layer, 50 Outputs
model.add(Dense(num_classes))
model.add(Activation("softmax"))

In [None]:
model.summary()

In [None]:
tf.keras.optimizers.Adam(learning_rate=0.01)

# Compile the Model
model.compile(loss = 'sparse_categorical_crossentropy', optimizer = 'Adam', metrics = ['accuracy'])

In [None]:
# Traing with the second splitting of the dataset
epochs = 30

# Create a ModelCheckpoint callback
checkpoint = ModelCheckpoint(filepath='weights.best.hdf5', 
                             save_best_only=True, 
                             monitor='val_loss', 
                             mode='min', 
                             save_weights_only=False)

# Train the model
history = model.fit(train_data_rescaled, 
                    validation_data=val_data_rescaled, 
                    epochs=epochs,
                    callbacks=[checkpoint])

In [None]:
epoch_num = list(range(0, epochs))
plt.plot(epoch_num, history.history['loss'], label = 'loss')
plt.plot(epoch_num, history.history['accuracy'], label = 'accuracy')
plt.plot(epoch_num, history.history['val_loss'], label = 'val_loss')
plt.plot(epoch_num, history.history['val_accuracy'], label = 'val_accuracy')
plt.legend()
plt.show()

In [None]:
model.load_weights('weights.best.hdf5')

# Predict on the test data
prediction = model.predict(test_data_rescaled)

In [None]:
y_true = np.concatenate([y for x, y in test_data_rescaled], axis=0)
y_pred = tf.argmax(prediction, axis=1).numpy()

# Convert the predictions to one-hot encoded labels
prediction_labels = np.argmax(prediction, axis=1)

# Get the true labels for the test data
test_labels = np.argmax(y_true)

# Calculate precision, recall, and F1 score
precision = precision_score(y_true, prediction_labels, average='micro')
recall = recall_score(y_true, prediction_labels, average='micro')
f1 = f1_score(y_true, prediction_labels, average='micro')

# Print the results
print("Precision: {:.4f}".format(precision))
print("Recall: {:.4f}".format(recall))
print("F1 Score: {:.4f}".format(f1))

In [None]:
# Build the confusion matrix
conf_matrix = tf.math.confusion_matrix(y_true, y_pred).numpy()

# Plot the confusion matrix
df_cm = pd.DataFrame(conf_matrix, range(50), range(50)) #range(50)
plt.figure(figsize=(10,7))
sn.set(font_scale=0.6) # for label size
sn.heatmap(df_cm, annot=True, annot_kws={"size": 6}) # font size

plt.show()

In [None]:
evaluation = model.evaluate(
    test_data_rescaled,
    batch_size=32,
    verbose='auto',
    sample_weight=None,
    steps=None,
    callbacks=None,
    max_queue_size=10,
    workers=1,
    use_multiprocessing=False,
    return_dict=False,
)

evaluation