In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score
import os
import tensorflow as tf
import numpy as np
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import pathlib
import tensorflow_datasets as tfds
from sklearn.metrics import f1_score
import sklearn
import seaborn as sns
import pandas as pd

In [None]:
# Change the path to the folder containing the folders train and set
%store -r dst_dir
datasets_path = dst_dir 
train_data_path = os.path.join(datasets_path, 'train')
test_data_path = os.path.join(datasets_path, 'test')

In [None]:
# Create the dataset
train_data, val_data = tf.keras.preprocessing.image_dataset_from_directory(
    train_data_path, 
    color_mode='grayscale',
    labels = 'inferred',
    image_size=(84, 84),
    shuffle=True,
    validation_split=0.25,
    seed=22,
    subset='both'

)
test_data = tf.keras.preprocessing.image_dataset_from_directory(
    test_data_path,
    color_mode='grayscale',
    labels = 'inferred',
    image_size=(84, 84),
    shuffle=False,
    validation_split=None,
    seed=22,
    subset=None)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D, Rescaling
from tensorflow.keras.callbacks import ModelCheckpoint

In [None]:
img_height = 84
img_width = 84

rescale_layer = Rescaling(1./255, input_shape=(img_height, img_width, 1))

@tf.function
def rescale_fn(x, y):
    return rescale_layer(x), y

train_data_rescaled = train_data.map(rescale_fn)
val_data_rescaled = val_data.map(rescale_fn)
test_data_rescaled = test_data.map(rescale_fn)

In [None]:
filters = 32
num_classes = 50
dropout_coeff = 0.2

model = Sequential()

# First convolution layer
model.add(Conv2D(filters, (3,3), input_shape=(img_height, img_width, 1)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size = (2,2)))
model.add(Dropout(dropout_coeff, input_shape=(39, 39)))

# Second convolution layer
model.add(Conv2D(filters, (3,3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size = (2,2)))
model.add(Dropout(dropout_coeff, input_shape=(17, 17)))

# Third convolution layer
model.add(Conv2D(filters, (3,3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size = (2,2)))
model.add(Dropout(dropout_coeff, input_shape=(8, 8)))

# Fully Connected Layer #1
model.add(Flatten())
model.add(Dense(64))
model.add(Activation("tanh")) #relu

# Fully Connected Layer #2
model.add(Dense(64))
model.add(Activation("tanh")) #relu

# Last Fully Connected Layer, 50 Outputs
model.add(Dense(num_classes))
model.add(Activation("softmax"))

In [None]:
model.summary()

In [None]:
tf.keras.optimizers.Adam(learning_rate=0.01)

# Compile the Model
model.compile(loss = 'sparse_categorical_crossentropy', optimizer = 'Adam', metrics = ['accuracy'])

In [None]:
epochs = 30

# ModelCheckpoint callback to save the best parameters
checkpoint = ModelCheckpoint(filepath='weights.best.hdf5', 
                             save_best_only=True, 
                             monitor='val_loss', 
                             mode='min', 
                             save_weights_only=False)

# Train the model
history = model.fit(train_data_rescaled, 
                    validation_data=val_data_rescaled, 
                    epochs=epochs,
                    callbacks=[checkpoint])

In [None]:
#Training and validation loss/accuracy trend
epoch_num = list(range(0, epochs))
plt.plot(epoch_num, history.history['loss'], label = 'loss')
plt.plot(epoch_num, history.history['accuracy'], label = 'accuracy')
plt.plot(epoch_num, history.history['val_loss'], label = 'val_loss')
plt.plot(epoch_num, history.history['val_accuracy'], label = 'val_accuracy')
plt.legend()
plt.show()

In [None]:
model.load_weights('weights.best.hdf5')

# Predict on the test data
prediction = model.predict(test_data_rescaled)

In [None]:
# Evaluate on test data
evaluation = model.evaluate(
    test_data_rescaled,
    batch_size=32,
    verbose=0,
    sample_weight=None,
    steps=None,
    callbacks=None,
    max_queue_size=10,
    workers=1,
    use_multiprocessing=False,
    return_dict=False,
)

print('Loss: ', evaluation[0])
print('Accuracy: ', evaluation[1])

In [None]:
# True labels of the test set
y_true = np.concatenate([y for x, y in test_data_rescaled], axis=0)
# Predicted labels
y_pred = tf.argmax(prediction, axis=1).numpy()

# Get the true labels for the test data
test_labels = np.argmax(y_true)

# Calculate precision, recall, and F1 score
precision = precision_score(y_true, y_pred, average='micro')
recall = recall_score(y_true, y_pred, average='micro')
f1 = f1_score(y_true, y_pred, average='micro')

print("Precision: {:.4f}".format(precision))
print("Recall: {:.4f}".format(recall))
print("F1 Score: {:.4f}".format(f1))

In [None]:
# Build the confusion matrix
conf_matrix = tf.math.confusion_matrix(y_true, y_pred).numpy()

# Plot the confusion matrix
df_cm = pd.DataFrame(conf_matrix, range(50), range(50)) 
plt.figure(figsize=(10,7))
sns.set(font_scale=0.6) # for label size
sns.heatmap(df_cm, annot=True, annot_kws={"size": 6}) # font size

plt.show()

In [None]:
y_prob = [max(vector) for vector in prediction]

# Show the confidence for the predictions in the test set
plt.plot(range(2000) ,y_prob)
plt.title("Prediction Confidence", fontsize=16)
plt.xlabel("Samples", fontsize=12)
plt.ylabel("Probability", fontsize=12)
plt.show()

In [None]:
# Plot the confidence for the predictions in the test set
plt.plot(range(2000), y_prob, linewidth=2) # increase linewidth
plt.title("Prediction Confidence", fontsize=16) # increase title fontsize
plt.xlabel("Samples", fontsize=12) # increase x-axis label fontsize
plt.ylabel("Probability", fontsize=12) # increase y-axis label fontsize

plt.show()

In [None]:
target_prob = [0.6, 0.7, 0.8, 0.9]
for prob in target_prob:
    y_low = [val for val in y_prob if val < prob]
    print('Prediction with a probability of less than ',prob ,': ', len(y_low), 'on 2000 predictions')

In [None]:
wrong_predictions = []

for i in range(len(y_true)):
    if y_true[i] != y_pred[i]:
        wrong_predictions.append([i, y_true[i], y_pred[i]])

In [None]:
%store -r label_dict

def wrong_prediction(i):  # Prints the i-th wrong predicted image
    try:
        labels = os.listdir(test_data_path)
        image_folder = labels[wrong_predictions[i][0]//40]
        image_folder_path = os.path.join(test_data_path, image_folder)
        images = os.listdir(image_folder_path)
        wrong_image_path = os.path.join(image_folder_path, images[wrong_predictions[i][0]%40])
        wrong_image =  mpimg.imread(wrong_image_path)
        plt.imshow(wrong_image, cmap = plt.cm.binary)
        plt.grid(None)
        plt.show()
        print('True label:', label_dict[wrong_predictions[i][1]], '        Predicted label:', label_dict[wrong_predictions[i][2]])
    except:
        print(f"The directory does not contain {i} files")
    return

In [None]:
# Print a wrongly predicted image
wrong_prediction(12)