
---

#### *Introduction*

In this notebook, we will extend our previous work by training a convolutional neural network (CNN) to classify coins from three mints:  
`al-Mansuriyah`, `Misr`, and `al-Mahdiyah`.  
We aim to analyze the model's performance and conduct various experiments to understand the importance of different coin regions in classification.

---



In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.vgg16 import preprocess_input
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
import os
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Flatten, Dense, Dropout
from tensorflow.keras.models import Model
import cv2
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model

In [2]:
data_dir = 'Organized_images'
classes = [ 'Misr','al-Mansuriyah', 'al-Mahdiyah']

In [3]:
# Function to load and preprocess images
def load_and_preprocess_images(data_dir, classes):
    images = []
    labels = []
    for class_label in classes:
        class_dir = os.path.join(data_dir, class_label)
        for img_name in os.listdir(class_dir):
            img_path = os.path.join(class_dir, img_name)
            img = load_img(img_path, target_size=(224, 224))
            img_array = img_to_array(img)
            img_array = preprocess_input(img_array) # VGG16 preprocessing includes normalization
            images.append(img_array)
            labels.append(class_label)
    return np.array(images), np.array(labels)

In [4]:
# Load and preprocess the images
images, labels = load_and_preprocess_images(data_dir, classes)

# Encode labels as integers
label_to_index = {label: idx for idx, label in enumerate(classes)}
index_to_label = {idx: label for label, idx in label_to_index.items()}
labels = np.array([label_to_index[label] for label in labels])

In [5]:
# Split the dataset into training, validation, and testing sets
X_train_val, X_test, y_train_val, y_test = train_test_split(images, labels, test_size=0.2, random_state=42, stratify=labels, shuffle=True)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.2, random_state=42, stratify=y_train_val, shuffle=True)

# Convert labels to categorical
y_train = to_categorical(y_train, num_classes=len(classes))
y_val = to_categorical(y_val, num_classes=len(classes))
y_test = to_categorical(y_test, num_classes=len(classes))

print(f"Training set size: {len(X_train)}")
print(f"Validation set size: {len(X_val)}")
print(f"Test set size: {len(X_test)}")

Training set size: 306
Validation set size: 77
Test set size: 96


In [12]:
# Load pre-trained VGG16 model + higher level layers
def create_model(num_classes):
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

    # Add custom top layers for transfer learning
    x = base_model.output
    x = Flatten()(x)
    x = Dense(1024, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.5)(x)
    predictions = Dense(num_classes, activation='softmax')(x)

    # Combine base model and new top layers
    model = Model(inputs=base_model.input, outputs=predictions)

    # Freeze the layers of the base model (not trainable)
    for layer in base_model.layers:
        layer.trainable = False

    return model

In [7]:
# Create and compile the model
num_classes = len(classes)
print(f"Number of classes: {num_classes}")

model = create_model(num_classes)
model.summary()
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

Number of classes: 3


In [8]:
# Train the model
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, batch_size=32)

# Save the trained model
model.save('three_class_model.h5')

Epoch 1/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 4s/step - accuracy: 0.4242 - loss: 75.4959 - val_accuracy: 0.7922 - val_loss: 26.2685
Epoch 2/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 4s/step - accuracy: 0.6792 - loss: 45.7804 - val_accuracy: 0.8052 - val_loss: 8.6454
Epoch 3/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 4s/step - accuracy: 0.7260 - loss: 25.0287 - val_accuracy: 0.8312 - val_loss: 11.5192
Epoch 4/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 4s/step - accuracy: 0.8072 - loss: 15.0377 - val_accuracy: 0.8442 - val_loss: 10.1117
Epoch 5/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 4s/step - accuracy: 0.8067 - loss: 16.1123 - val_accuracy: 0.7792 - val_loss: 20.3994
Epoch 6/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 4s/step - accuracy: 0.8445 - loss: 10.5164 - val_accuracy: 0.8701 - val_loss: 9.8132
Epoch 7/10
[1m10/10[0m [32m



In [9]:
# Evaluate model performance on the test set
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test accuracy: {accuracy:.4f}")

Test accuracy: 0.8438


In [10]:
# Load the trained model
model = load_model('three_class_model.h5')



In [11]:
# Predict the class of each image
predictions = model.predict(X_test)
predicted_labels = predictions.argmax(axis=1)
print(predicted_labels)

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3s/step
[2 1 1 0 1 0 1 1 1 2 1 0 0 1 0 1 0 1 0 2 0 0 0 0 1 1 0 1 0 0 1 0 1 1 0 1 1
 0 1 0 0 1 1 0 0 0 1 1 1 0 0 0 1 0 0 0 0 2 0 0 1 1 0 0 0 0 0 1 0 1 1 2 1 0
 0 0 0 0 0 0 0 0 0 1 0 1 1 1 0 0 0 0 2 0 0 1]


In [12]:
y_test_al_mahdiyah_index = np.where(y_test.argmax(axis=1) == 2)[0]

In [13]:
# find the index of 'al-mahdiyah' class in the test set
y_test_al_mahdiyah_index

array([ 0, 13, 19, 47, 86, 89, 92], dtype=int64)

In [14]:
# find which class the model predicted for the 'al-mahdiyah' class
print(np.argmax(predictions[y_test_al_mahdiyah_index], axis=1))

[2 1 2 1 1 0 2]


The model predicted the `al-mahdiyah` class correctly only 3 times out of 7, this could be due to the similarity between the `al-mahdiyah` and `al-mansuriyah` classes

--- 

Next thing we will try to do is to train the model on the `al-mahdiyah` and `al-mansuriyah` classes only, and see if the model can distinguish between them.
After that, we will also try to train the model on the `al-mahdiyah` and `Misr` classes only, and see if the model can distinguish between them.

####  `al-mahdiyah` and `al-mansuriyah`

We need to remember that the al-Mahdiyah class is smaller than al-Mansuriyah and Misr classes.

In [6]:
class_names = ['al-Mansuriyah', 'al-Mahdiyah']

# Load and preprocess the images
images, labels = load_and_preprocess_images(data_dir, class_names)

# Encode labels as integers
label_to_index = {label: idx for idx, label in enumerate(class_names)}
index_to_label = {idx: label for label, idx in label_to_index.items()}
labels = np.array([label_to_index[label] for label in labels])

In [10]:
print(label_to_index)

{'al-Mansuriyah': 0, 'al-Mahdiyah': 1}


In [15]:
# Split the dataset into training, validation, and testing sets
X_train_val, X_test, y_train_val, y_test = train_test_split(images, labels, test_size=0.2, random_state=42, stratify=labels, shuffle=True)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.2, random_state=42, stratify=y_train_val, shuffle=True)

# Convert labels to categorical
y_train = to_categorical(y_train, num_classes=len(class_names))
y_val = to_categorical(y_val, num_classes=len(class_names))
y_test = to_categorical(y_test, num_classes=len(class_names))

print(f"Training set size: {len(X_train)}")
print(f"Validation set size: {len(X_val)}")
print(f"Test set size: {len(X_test)}")

Training set size: 156
Validation set size: 39
Test set size: 49


In [16]:
# Create and compile the model
num_classes = len(class_names)
print(f"Number of classes: {num_classes}")

model = create_model(num_classes)
model.summary()
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

Number of classes: 2


In [22]:
# Train the model
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, batch_size=48)

Epoch 1/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 6s/step - accuracy: 0.9652 - loss: 2.5167 - val_accuracy: 0.8462 - val_loss: 18.3926
Epoch 2/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 5s/step - accuracy: 0.9612 - loss: 1.7756 - val_accuracy: 0.8205 - val_loss: 10.9797
Epoch 3/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 5s/step - accuracy: 0.9410 - loss: 1.8117 - val_accuracy: 0.8462 - val_loss: 13.2568
Epoch 4/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 5s/step - accuracy: 0.9450 - loss: 1.6271 - val_accuracy: 0.8462 - val_loss: 31.9350
Epoch 5/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 5s/step - accuracy: 0.9589 - loss: 2.6712 - val_accuracy: 0.8462 - val_loss: 32.8824
Epoch 6/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 5s/step - accuracy: 0.9573 - loss: 1.4641 - val_accuracy: 0.8205 - val_loss: 16.3842
Epoch 7/10
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━

In [23]:
# Evaluate model performance on the test set
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test accuracy: {accuracy:.4f}")

Test accuracy: 0.8776


In [29]:
# Predict the class of each image
predictions = model.predict(X_test)
predicted_labels = predictions.argmax(axis=1)
print(predicted_labels)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1s/step
[0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0]


In [30]:
y_test_al_mahdiyah_index = np.where(y_test.argmax(axis=1) == 1)[0]

In [31]:
y_test_al_mahdiyah_index

array([ 6, 31, 32, 33, 36, 39, 40], dtype=int64)

In [32]:
# find which class the model predicted for the 'al-mahdiyah' class
print(np.argmax(predictions[y_test_al_mahdiyah_index], axis=1))

[1 0 0 0 0 0 0]


As we can see the model is struggling to predict the 'al-mahdiyah' class, maybe beacuse of the small number of images in the dataset or because of the similarity between the two classes.

---

####  `al-mahdiyah` and `Misr`

In [34]:
class_names = ['Misr', 'al-Mahdiyah']

# Load and preprocess the images
images, labels = load_and_preprocess_images(data_dir, class_names)

# Encode labels as integers
label_to_index = {label: idx for idx, label in enumerate(class_names)}
index_to_label = {idx: label for label, idx in label_to_index.items()}
labels = np.array([label_to_index[label] for label in labels])

In [35]:
print(label_to_index)

{'Misr': 0, 'al-Mahdiyah': 1}


In [36]:
# Split the dataset into training, validation, and testing sets
X_train_val, X_test, y_train_val, y_test = train_test_split(images, labels, test_size=0.2, random_state=42, stratify=labels, shuffle=True)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.2, random_state=42, stratify=y_train_val, shuffle=True)

# Convert labels to categorical
y_train = to_categorical(y_train, num_classes=len(class_names))
y_val = to_categorical(y_val, num_classes=len(class_names))
y_test = to_categorical(y_test, num_classes=len(class_names))

print(f"Training set size: {len(X_train)}")
print(f"Validation set size: {len(X_val)}")
print(f"Test set size: {len(X_test)}")

Training set size: 173
Validation set size: 44
Test set size: 55


In [37]:
# Create and compile the model
num_classes = len(class_names)
print(f"Number of classes: {num_classes}")

model = create_model(num_classes)
model.summary()
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

Number of classes: 2


In [38]:
# Train the model
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, batch_size=32)

Epoch 1/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 7s/step - accuracy: 0.6249 - loss: 63.0112 - val_accuracy: 0.8636 - val_loss: 39.1741
Epoch 2/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 4s/step - accuracy: 0.8578 - loss: 47.6586 - val_accuracy: 0.9091 - val_loss: 2.7393
Epoch 3/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 4s/step - accuracy: 0.6706 - loss: 32.4272 - val_accuracy: 0.8636 - val_loss: 24.3866
Epoch 4/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 4s/step - accuracy: 0.8730 - loss: 13.8604 - val_accuracy: 0.9318 - val_loss: 7.3838
Epoch 5/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 4s/step - accuracy: 0.8783 - loss: 12.0778 - val_accuracy: 0.8864 - val_loss: 14.0455
Epoch 6/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 4s/step - accuracy: 0.8869 - loss: 5.5528 - val_accuracy: 0.9318 - val_loss: 14.9290
Epoch 7/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━

In [39]:
# Evaluate model performance on the test set
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test accuracy: {accuracy:.4f}")

Test accuracy: 0.9636


In [40]:
# Predict the class of each image
predictions = model.predict(X_test)
predicted_labels = predictions.argmax(axis=1)
print(predicted_labels)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3s/step
[0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0]


In [41]:
y_test_al_mahdiyah_index = np.where(y_test.argmax(axis=1) == 1)[0]
y_test_al_mahdiyah_index

array([ 4,  8, 14, 16, 28, 43, 49], dtype=int64)

In [42]:
# find which class the model predicted for the 'al-mahdiyah' class
print(np.argmax(predictions[y_test_al_mahdiyah_index], axis=1))

[0 1 1 1 1 1 1]


Through our experiments, it is evident that the second model can accurately distinguish between the classes `Misr` and `al-Mahdiyah`.   
However, the first model struggles to differentiate between `al-Mansuriyah` and `al-Mahdiyah`.   
This suggests that while the model has learned to identify distinct features in `Misr` and `al-Mahdiyah` coins,   
it finds it challenging to separate `al-Mansuriyah` from `al-Mahdiyah`, potentially due to similarities in their visual characteristics.

---