# CNN Model for UNO Card Detection

For the creation of the CNN model from scratch, we will be using the following modules:
1. Tensorflow: The main framework for the creation of the model
2. Matplotlib: For Analysis of the model

*~Done By Mikaia, Akilesh and Vaibhav~*

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input,
    Conv2D,
    MaxPooling2D,
    GlobalAveragePooling2D,
    Flatten,
    Dense,
    Dropout,
    Rescaling,
    RandomRotation,
    RandomZoom,
    RandomBrightness,
    RandomContrast,
)
from tensorflow.keras.optimizers import Adam
from matplotlib import pyplot as plt

In [None]:
# Define the file directory of the dataset
data_dir = 'Uno Dataset 2'

In [None]:
# Define the parameter of the input 
batch_size = 1
img_height = 224
img_width = 224
epochs=10

In [21]:
# Make use of the tf.keras.utils.image_dataset_from_directory to load the dataset folder and obtain the class names
full_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir,
  shuffle=True,
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

Found 10442 files belonging to 54 classes.


In [22]:
# Calculate dataset size and split
dataset_size = full_ds.cardinality().numpy()
train_size = int(0.7 * dataset_size) 
val_size = int(0.15 * dataset_size)  
test_size = dataset_size - train_size - val_size

In [23]:
# Split into train (70%), validation (15%), and test (15%) sets
train_ds = full_ds.take(train_size)
val_ds = full_ds.skip(train_size).take(val_size)
test_ds = full_ds.skip(train_size + val_size)

In [24]:
# Print the class names extracted by keras
class_names = full_ds.class_names
class_names

['blue_0',
 'blue_1',
 'blue_2',
 'blue_3',
 'blue_4',
 'blue_5',
 'blue_6',
 'blue_7',
 'blue_8',
 'blue_9',
 'blue_draw_2',
 'blue_reverse',
 'blue_skip',
 'green_0',
 'green_1',
 'green_2',
 'green_3',
 'green_4',
 'green_5',
 'green_6',
 'green_7',
 'green_8',
 'green_9',
 'green_draw_2',
 'green_reverse',
 'green_skip',
 'red_0',
 'red_1',
 'red_2',
 'red_3',
 'red_4',
 'red_5',
 'red_6',
 'red_7',
 'red_8',
 'red_9',
 'red_draw_2',
 'red_reverse',
 'red_skip',
 'wild_change_colour',
 'wild_draw_four',
 'yellow_0',
 'yellow_1',
 'yellow_2',
 'yellow_3',
 'yellow_4',
 'yellow_5',
 'yellow_6',
 'yellow_7',
 'yellow_8',
 'yellow_9',
 'yellow_draw_2',
 'yellow_reverse',
 'yellow_skip']

In [25]:
# Create a normalization layer to standardize the input from 0 to 255 to 0 to 1
# Apply the normalization layer to all the dataset (train, validation, and test)
normalization_layer = Rescaling(1./255)
normalized_ds = train_ds.map (lambda x,y : (normalization_layer(x), y))
normalized_val_ds = val_ds.map(lambda x,y : (normalization_layer(x), y))
normalized_test_ds = test_ds.map(lambda x,y : (normalization_layer(x), y))

In [26]:
# Create and Define a simple data augmentation pipeline
# !NOTE: The parameter are to be adjusted slowly to avoid overfitting
data_augmentation = tf.keras.Sequential([
    RandomRotation(0.05),
    RandomBrightness(0.05),
    RandomZoom(0.0,0.1),
    RandomContrast(0.05)
])

In [27]:
# Apply the data augmentation to the normalised training dataset
augmented_train_dataset = normalized_ds.map(
    lambda x, y: (data_augmentation(x, training=True), y)
)

In [28]:
# Combine both the normalized and augmented dataset
combined_train_dataset = normalized_ds.concatenate(augmented_train_dataset)

## Model Details
---

**The Model Contains 4 Categories of Layes**:
1. Input Layer
2. Features Extraction Layer
3. Classification Layer (denoted as X)
4. Output Layer

### Input Layer:
- The Input Layer takes image size of 224x224 pixels, RGB coloured.

### Features Extraction Layer:
- It consist of 8, 2D Convolutional Layer *with ReLU* (Rectified Linear Unit) Activation Function
- The *First 4 Convolutional Layers* are with a padding of "valid" which remove the padding.
- The Next Convolutional Layers are with a padding of "same" so that the corner are included when performing the Pooling
- Each Convolutional Layer, is followed by *MaxPooling Layer*, exept the last one which is followed by an *Average Pooling Layer*.
- The Pooling Layers have *a sliding window of 2x2* and *a default strides of 1x1*.
- The Convolutional Layers Filter's Configuration is as follow: *32,32,64,64,128,256,512,512.*

### Classification Layer:
- This Layer start with average pooling layer, to reduce the spatial dimension and converts features map into a single vector.
- It contains 2 Dense layer with 1024 units followed by a dropout layer with a 0.2 rate of regularization.

### Output Layer:
- This layer consist of a singular Dense layer with *54 units* and *SIGMOID* activation function.


---

**Compilation**

- The model was compiled with the following parameter:
    1. Adam with a initial learning rate of 0.0001 as Optimizer
    2. Sparse Catergorical Cross Entropy as loss function, which performs best for multi class classification.
    3. A singular metric which is accuracy

  

In [None]:
# Build The Model
input_shape = (224, 224, 3)
inputs = Input(shape=input_shape)

features_extraction = Conv2D(32, 3, padding="valid", activation="relu")(inputs)
features_extraction = MaxPooling2D((2, 2))(features_extraction)
features_extraction = Conv2D(32, 3, padding="valid", activation="relu")(features_extraction)
features_extraction = MaxPooling2D((2, 2))(features_extraction)
features_extraction = Conv2D(64, 3, padding="valid", activation="relu")(features_extraction)
features_extraction = MaxPooling2D((2, 2))(features_extraction)
features_extraction = Conv2D(64, 3, padding="valid", activation="relu")(features_extraction)
features_extraction = MaxPooling2D((2, 2))(features_extraction)
features_extraction = Conv2D(128, 3, padding="same", activation="relu")(features_extraction)
features_extraction = MaxPooling2D((2, 2))(features_extraction)
features_extraction = Conv2D(256, 3, padding="same", activation="relu")(features_extraction)
features_extraction = MaxPooling2D((2, 2))(features_extraction)
features_extraction = Conv2D(512, 3, padding="same", activation="relu")(features_extraction)
features_extraction = MaxPooling2D((2, 2))(features_extraction)
features_extraction = Conv2D(512, 3, padding="same", activation="relu")(features_extraction)



x = GlobalAveragePooling2D()(features_extraction)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.2)(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.2)(x)


outputs = Dense(54, activation='sigmoid', name='value_output')(x)


model = Model(inputs=inputs, outputs=outputs)

model.compile(
    optimizer=Adam(0.0001),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=["accuracy"]
)

# Display the model summary
model.summary()

In [34]:
# Obtain the buffer size of the combined train dataset
buffer_size = combined_train_dataset.cardinality().numpy()

# Shuffle the combined train dataset
shuffled_train_dataset = combined_train_dataset.shuffle(buffer_size=buffer_size, reshuffle_each_iteration=True)

# Apply batch to the train, validation, and test dataset
batched_train_dataset = shuffled_train_dataset.batch(5)
batched_val_dataset = normalized_val_ds.batch(5)

In [None]:
# Train the model
history = model.fit(
    shuffled_train_dataset,
    validation_data=normalized_val_ds,
    epochs=epochs
)

In [None]:
model2.save("model_93.keras")

In [40]:
model.save('model1.h5')



In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from datetime import datetime

# Implementation of Early Stopping to prevent overfitting
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=3,              
    verbose=1,               
    restore_best_weights=True 
)
# Implementation of Model Checkpoint to save the best model
model_checkpoint = ModelCheckpoint(
    filepath=f'best_model_{datetime.now().strftime("%d_%m_%Y_%H_%M_%S")}.keras',    
    monitor='val_loss',          
    save_best_only=True,         
    verbose=1
)

*Train An Already Existing Model*:

The code below will load and allow the already saved to be improved further with newer dataset.



In [None]:
# Import load_model function from keras.models
# Load the model
from tensorflow.keras.models import load_model

trained_model = load_model("model_93.keras")

In [32]:
# Compile the model with the same parameters as before
trained_model.compile(
    optimizer=Adam(0.0001),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=["accuracy"],
)


In [36]:
# Train the model and save the history for further analysis
# The Epoch should be varied based on the accuracy of the model being loaded
trained_history = trained_model.fit(
    shuffled_train_dataset,
    validation_data=normalized_val_ds,
    shuffle=True,
    epochs=5, 
    callbacks=[early_stopping, model_checkpoint],
    batch_size=5
)

Epoch 1/5


2024-11-07 01:51:40.599515: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:41: Filling up shuffle buffer (this may take a while): 2135 of 14618
2024-11-07 01:51:50.600565: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:41: Filling up shuffle buffer (this may take a while): 4290 of 14618
2024-11-07 01:52:10.595955: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:41: Filling up shuffle buffer (this may take a while): 8580 of 14618
2024-11-07 01:52:20.596942: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:41: Filling up shuffle buffer (this may take a while): 10674 of 14618


[1m    3/14618[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m7:33[0m 31ms/step - accuracy: 0.3889 - loss: 2.4225        

2024-11-07 01:52:39.658541: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:480] Shuffle buffer filled.


[1m14618/14618[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.6742 - loss: 1.3178

  output, from_logits = _get_logits(



Epoch 1: val_loss improved from inf to 0.35068, saving model to best_model_07_11_2024_01_43_32.keras
[1m14618/14618[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m485s[0m 28ms/step - accuracy: 0.6742 - loss: 1.3178 - val_accuracy: 0.9074 - val_loss: 0.3507
Epoch 2/5


2024-11-07 01:59:46.030818: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:41: Filling up shuffle buffer (this may take a while): 1715 of 14618
2024-11-07 02:00:05.994235: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:41: Filling up shuffle buffer (this may take a while): 5269 of 14618
2024-11-07 02:00:16.001933: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:41: Filling up shuffle buffer (this may take a while): 6951 of 14618
2024-11-07 02:00:36.035262: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:41: Filling up shuffle buffer (this may take a while): 9961 of 14618
2024-11-07 02:00:55.998290: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:41: Filling up shuffle buffer (this may take a while): 12905 of 14618
2024-11-07 02:01:06.009059: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:41: Filling up shuffle buffer (this 

[1m    1/14618[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m371:27:37[0m 91s/step - accuracy: 1.0000 - loss: 0.0380

2024-11-07 02:01:07.331179: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:480] Shuffle buffer filled.


[1m14617/14618[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 30ms/step - accuracy: 0.6881 - loss: 1.3027
Epoch 2: val_loss did not improve from 0.35068
[1m14618/14618[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m590s[0m 34ms/step - accuracy: 0.6881 - loss: 1.3027 - val_accuracy: 0.9272 - val_loss: 0.4445
Epoch 3/5


2024-11-07 02:09:36.288136: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:41: Filling up shuffle buffer (this may take a while): 1506 of 14618
2024-11-07 02:09:46.302157: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:41: Filling up shuffle buffer (this may take a while): 3013 of 14618
2024-11-07 02:09:56.350374: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:41: Filling up shuffle buffer (this may take a while): 4564 of 14618
2024-11-07 02:10:16.257932: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:41: Filling up shuffle buffer (this may take a while): 7550 of 14618
2024-11-07 02:10:26.256860: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:41: Filling up shuffle buffer (this may take a while): 8946 of 14618
2024-11-07 02:10:36.260061: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:41: Filling up shuffle buffer (this m

[1m14618/14618[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.6976 - loss: 1.2261
Epoch 3: val_loss did not improve from 0.35068
[1m14618/14618[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m665s[0m 38ms/step - accuracy: 0.6976 - loss: 1.2261 - val_accuracy: 0.9253 - val_loss: 0.4927
Epoch 3: early stopping
Restoring model weights from the end of the best epoch: 1.


In [37]:
# Test the model with test dataset and outputing the accuracy
_, test_accuracy = trained_model.evaluate(normalized_test_ds)
test_accuracy

[1m1567/1567[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 12ms/step - accuracy: 0.9068 - loss: 0.4225


0.9004467129707336

In [20]:
# Save the model with the following name format: model_{test_accuracy}.keras
trained_model.save(f"model_{test_accuracy*100:.1f}.keras")

## Analysis of the model

In [None]:
# Classification Report
from sklearn.metrics import classification_report
import numpy as np

# Obtain the predictions of the test dataset
predictions = trained_model.predict(normalized_test_ds)

# Obtain the predicted class of the test dataset
predicted_classes = np.argmax(predictions, axis=1)

true_classes = np.concatenate([y for x, y in normalized_test_ds], axis=0)


report = classification_report(true_classes, predicted_classes, target_names=class_names)

print(report)

In [None]:
# Confusion Matrix Plot
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import seaborn as sns

# Compute confusion matrix
cm = confusion_matrix(true_classes, predicted_classes)

# Plot confusion matrix
plt.figure(figsize=(12, 10))
sns.heatmap(cm, annot=False, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted Class')
plt.ylabel('True Class')
plt.show()

In [None]:
# Assess the per-class accuracy 
from sklearn.metrics import accuracy_score

# Compute per-class accuracy
per_class_accuracy = []
classes = np.unique(true_classes)

for cls in classes:
    idx = np.where(true_classes == cls)
    acc = accuracy_score(true_classes[idx], predicted_classes[idx])
    per_class_accuracy.append(acc)

# Plot per-class accuracy
plt.figure(figsize=(14, 6))
plt.bar(classes, per_class_accuracy)
plt.xlabel('Class')
plt.ylabel('Accuracy')
plt.title('Per-Class Accuracy')
plt.xticks(classes)
plt.grid(True)
plt.show()