In [None]:
import tensorflow as tf
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.keras.models import load_model

# **Load all Classification models**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# this is the first layer model which is used to recognise the digit of the input image
digit_recognition_model = load_model(os.path.join('drive/MyDrive/AI/models', 'digit_recognition_model.h5'))

In [None]:
# Load handwriting recognition model
# the 10 models are stored in a list
# the corresponding model will only be called based on the digit recognition model above
handwriting_model_list = []

for i in range(10):
  curr_model = load_model(os.path.join('drive/MyDrive/AI/models', f'handwriting_{i}_recognition_model.h5'))
  handwriting_model_list.append(curr_model)

# **Load Testing Dataset**

In [None]:
testing_dir = 'drive/MyDrive/AI/test_set_with_labels'
member_list = ['CY', 'YJ', 'YO', 'HT', 'ZY']

testing_files = []
filename_list = []

for i in range(10):
  temp = []
  filename_temp = []
  curr_dir = f'{testing_dir}/{i}'
  for filename in os.listdir(curr_dir):
    # Check if the file has an image extension
    if filename.endswith('.png'):
        # Create the full file path
        file_path = os.path.join(curr_dir, filename)
        # Append the file path to the list
        temp.append(file_path)
        filename_temp.append(filename)

  testing_files.append(temp)
  filename_list.append(filename_temp)

print(testing_files)
print(filename_list)
print(len(testing_files))

loaded_images = []

for i in range(len(testing_files)):
    temp = []
    # fig, axes = plt.subplots(1, 5)

    # Display each image in a separate subplot
    for j in range(len(testing_files[i])):
        print(filename_list[i][j])
        img = cv2.imread(testing_files[i][j])
        temp.append(img)
        # Display the image on the subplot
        # axes[j].imshow(img)

        # Remove the axis labels for each subplot
        # axes[j].axis('off')

    loaded_images.append(temp)
    # Adjust the spacing between subplots
    # plt.tight_layout()

    # Show the figure
    # plt.show()

[['drive/MyDrive/AI/test_set_with_labels/0/YO_0_resized.png', 'drive/MyDrive/AI/test_set_with_labels/0/HT_0_resized.png', 'drive/MyDrive/AI/test_set_with_labels/0/CY_0_resized.png', 'drive/MyDrive/AI/test_set_with_labels/0/ZY_0_resized.png', 'drive/MyDrive/AI/test_set_with_labels/0/YJ_0_resized.png'], ['drive/MyDrive/AI/test_set_with_labels/1/HT_1_resized.png', 'drive/MyDrive/AI/test_set_with_labels/1/YO_1_resized.png', 'drive/MyDrive/AI/test_set_with_labels/1/CY_1_resized.png', 'drive/MyDrive/AI/test_set_with_labels/1/YJ_1_resized.png', 'drive/MyDrive/AI/test_set_with_labels/1/ZY_1_resized.png'], ['drive/MyDrive/AI/test_set_with_labels/2/HT_2_resized.png', 'drive/MyDrive/AI/test_set_with_labels/2/YJ_2_resized.png', 'drive/MyDrive/AI/test_set_with_labels/2/YO_2_resized.png', 'drive/MyDrive/AI/test_set_with_labels/2/ZY_2_resized.png', 'drive/MyDrive/AI/test_set_with_labels/2/CY_2_resized.png'], ['drive/MyDrive/AI/test_set_with_labels/3/YO_3_resized.png', 'drive/MyDrive/AI/test_set_with_

## Predict the digit of the input image

In [None]:
# the predicted digits are stored in a list
pred_res = []

for i in range(len(loaded_images)):
  print(f'iteration: {i}')
  temp = []
  for j in range(len(loaded_images[i])):
    y_pred = digit_recognition_model.predict(np.expand_dims(loaded_images[i][j]/255, 0))
    y_pred_classes = tf.argmax(y_pred, axis=1)
    print(y_pred)
    print(y_pred_classes)
    temp.append(int(y_pred_classes))
  pred_res.append(temp)

print(pred_res)

accuracy_count = 0

for i in range(10):
  for j in range(5):
    if i == pred_res[i][j]:
      accuracy_count+=1

print(f'Accuracy: {accuracy_count/50*100}%')

iteration: 0
[[9.9984372e-01 3.1197359e-16 2.0487664e-07 2.2997231e-11 6.5456374e-09
  5.6824882e-14 4.2152650e-09 3.7832980e-13 1.5597443e-04 5.1866766e-12]]
tf.Tensor([0], shape=(1,), dtype=int64)
[[9.3777525e-01 1.9116081e-13 5.2203193e-02 1.6748200e-07 1.9771763e-07
  3.8849582e-13 2.1740540e-09 1.4356144e-05 1.0000879e-02 5.9456315e-06]]
tf.Tensor([0], shape=(1,), dtype=int64)
[[9.0621769e-01 1.1455526e-07 7.1559843e-05 1.5279790e-06 7.2780843e-03
  5.3087929e-06 1.4516750e-02 6.4087919e-07 1.4542723e-02 5.7365596e-02]]
tf.Tensor([0], shape=(1,), dtype=int64)
[[1.00000000e+00 2.53374173e-26 9.17224196e-13 1.71456936e-22
  1.08229304e-16 3.57889721e-23 1.54432879e-15 1.07090718e-19
  7.16397192e-19 1.55889486e-23]]
tf.Tensor([0], shape=(1,), dtype=int64)
[[1.0000000e+00 3.3252090e-26 6.9500109e-12 6.2814955e-23 1.9015753e-12
  4.0590783e-21 1.3981298e-13 6.4179289e-17 7.9157744e-18 2.5435233e-18]]
tf.Tensor([0], shape=(1,), dtype=int64)
iteration: 1
[[9.0934336e-08 9.9996078e-01 5.

In [None]:
def most_frequent_element(lst):
    return max(lst, key=lst.count)

for res in pred_res:
  result = most_frequent_element(res)
  print(result)

0
1
2
3
4
5
6
7
8
9


## Predict the handwriting of the input images

In [None]:
!pip install shap

In [25]:
import glob
from PIL import Image
import shap
from tensorflow.python.ops.numpy_ops import np_config
np_config.enable_numpy_behavior()

In [None]:
def load_and_preprocess_image(image_path):
    img = Image.open(image_path)
    img = img.resize((28, 28))  # Resize image

    img_rgb = img.convert('RGB')  # Convert to RGB for model
    img_rgb = np.array(img_rgb)  # Convert image to array
    img_rgb = img_rgb / 255.0  # Normalize
    img_rgb = np.expand_dims(img_rgb, 0)  # Add batch dimension

    img_bin = img.convert('1')  # Convert to binary for visualization
    img_bin = np.array(img_bin, dtype=np.int32)  # Convert image to integer array

    return img_rgb, img_bin


samples_per_class = 1  # We only need one representative sample per class
representative_samples = []
representative_images = []

# Load one image per class from each class folder
for i in range(10):
    image_paths = glob.glob(f'{testing_dir}/{i}/*_resized.png')[:samples_per_class]
    for path in image_paths:
        img_rgb, img_bin = load_and_preprocess_image(path)
        representative_samples.append(img_rgb)
        representative_images.append(np.expand_dims(img_bin, -1))  # Add an extra dimension for grayscale

# Convert to Tensor for compatibility with SHAP
to_explain = tf.convert_to_tensor(np.concatenate(representative_samples))

# Convert representative_images to a numpy array and invert the colors
representative_images = 1 - np.array(representative_images)

# Convert representative_images to a numpy array
representative_images = np.array(representative_images)

# Create an explainer object
explainer = shap.DeepExplainer(new_model, to_explain)

# Calculate SHAP values
shap_values = explainer.shap_values(to_explain)

shap.image_plot(shap_values, -representative_images)

In [None]:
score = 0
member_error_list = []
for i in range(len(pred_res)):
  for j in range(len(pred_res[i])):
    curr_res = pred_res[i][j] # this will take the predicted result from digit recognition model
    # load the current image into the corresponding model based on the previous predicted result
    # for example, if the digit detected from previous model is 4
    # then the current image will be fitted into the "handwriting_4_recognition_model"
    y_pred = handwriting_model_list[curr_res].predict(np.expand_dims(loaded_images[i][j]/255, 0))
    # take the result with the highest probability
    y_pred_classes = int(tf.argmax(y_pred, axis=1))
    # print the filename, the predicted digit and the predicted memeber who wrote that image
    print(y_pred)
    print(f'member {y_pred_classes}')
    print("-------------------------------------------------------------------------------------")
    print(f'filename: {filename_list[i][j]} --- predicted digit: {curr_res} --- predicted written by: {member_list[y_pred_classes]}')
    print("-------------------------------------------------------------------------------------")
    parts = filename_list[i][j].split('_')
    member = parts[0]
    digit = int(parts[1])
    # compare the predicted digit and member with the filename (actual digit and member name)
    # count the correct predictions
    if member == member_list[y_pred_classes] and digit == curr_res:
      print('---> right prediction\n')
      score+=1
    else:
      print('---> wrong prediction\n')
      member_error_list.append(member)

# calculate the accuracy of the testing set in prercentage
print(f'Accuracy: {score/50*100}%')

print(member_error_list)

[[0.0041054  0.14009868 0.8421198  0.01238233 0.00129377]]
member 2
-------------------------------------------------------------------------------------
filename: YO_0_resized.png --- predicted digit: 0 --- predicted written by: YO
-------------------------------------------------------------------------------------
---> right prediction

[[2.0729951e-04 9.9757845e-03 6.5185567e-03 3.9160859e-02 9.4413751e-01]]
member 4
-------------------------------------------------------------------------------------
filename: HT_0_resized.png --- predicted digit: 0 --- predicted written by: ZY
-------------------------------------------------------------------------------------
---> wrong prediction

[[9.9999630e-01 2.0733594e-07 3.6404387e-07 2.7896178e-06 3.9186077e-07]]
member 0
-------------------------------------------------------------------------------------
filename: CY_0_resized.png --- predicted digit: 0 --- predicted written by: CY
-----------------------------------------------------

In [None]:
# calculate the total number of wrong prediction by each member
from collections import Counter

counts = Counter(member_error_list)

for member, count in counts.items():
    print(f'{member} error(s): {count}')

HT error(s): 2
YJ error(s): 5
CY error(s): 1


The result above shows Hon Ting(HT), Yan Jian(YJ) and Ching Yen(CY) have at least one wrongly predicted handwriting out of 10 test images provided.
The other two members who are Yun Onn(YO) and Zheng Yu(ZY) have all the handwritting images predicted correctly.
It means that except for YJ, the other 4 members have a relatively consistent handwriting pattern.

However, YJ has more 5 wrong predictions out of 10 test images provided which are digit 2, 3, 4, 7 and 8.
It means that YJ has a relatively inconsistent handwriting pattern.