## **Libraries and Dependencies**

In [4]:
# Imports
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers, optimizers, applications
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.layers import RandomZoom, RandomRotation, RandomFlip, Rescaling, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, GlobalAveragePooling2D
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input
from PIL import Image
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, LearningRateScheduler
from tensorflow.keras.applications import VGG16
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
import numpy as np
import os
import random
import pandas as pd
import cv2
import logging
import warnings
import json


# Suppress warnings from the logging module
logging.getLogger('tensorflow').setLevel(logging.ERROR)
warnings.filterwarnings("ignore", category=UserWarning)


## **Tensorflow Version**

In [2]:
# Tensorflow Version
print(tf.__version__)

2.10.0


## **GPU Checker**

In [3]:
# Check if any GPU devices are detected
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print(f"GPUs detected: {len(gpus)}")
else:
    print("No GPU detected.")

GPUs detected: 1


In [6]:
# Suppress TensorFlow logging except for fatal errors.
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

## **Global Variables**

In [5]:
# Set seed
SEED = 338424

# Global variables
IMG_SIZE = (64, 64)
BATCH_SIZE = 32
num_classes = 18 # Number of folders in dataset
AUTOTUNE = tf.data.AUTOTUNE

## **Dataset: Loading, Splitting, Shuffling, Caching**

In [7]:
# Load Dataset
dataset_dir = 'dataset/hagridset'
full_ds = tf.keras.utils.image_dataset_from_directory(
    dataset_dir,
    shuffle=True,
    seed=SEED,
    image_size=(IMG_SIZE),
    batch_size=BATCH_SIZE,
    label_mode='categorical'
)

# Split into training, validation, and test sets
train_ratio = 0.7
val_ratio = 0.2
test_ratio = 0.1

# Total length of the dataset
total_size = len(full_ds)

# Compute indices for the splits
train_size = int(total_size * train_ratio)
val_size = int(total_size * val_ratio)
test_size = total_size - (train_size + val_size)

# Split the dataset and shuffle
train_ds = full_ds.take(train_size).shuffle(train_size, seed=SEED)
val_ds = full_ds.skip(train_size).take(val_size).shuffle(val_size, seed=SEED)
test_ds = full_ds.skip(train_size + val_size).shuffle(test_size, seed=SEED)

# Cache the dataset in memory (or use a directory to store it on disk if necessary)
train_ds = full_ds.take(train_size).shuffle(train_size, seed=SEED).cache().prefetch(buffer_size=AUTOTUNE)
val_ds = full_ds.skip(train_size).take(val_size).shuffle(val_size, seed=SEED).cache().prefetch(buffer_size=AUTOTUNE)
test_ds = full_ds.skip(train_size + val_size).cache().prefetch(buffer_size=AUTOTUNE)

# Count samples in each subset
def count_samples(dataset):
    sample_count = sum(1 for _ in dataset.unbatch())
    return sample_count

# Output the number of samples for each dataset
print(f'Using {count_samples(train_ds)} samples in the Training set')
print(f'Using {count_samples(val_ds)} samples in the Validation set')
print(f'Using {count_samples(test_ds)} samples in the Test set')

Found 125912 files belonging to 18 classes.
Using 88128 samples in the Training set
Using 25184 samples in the Validation set
Using 12600 samples in the Test set


In [7]:
# Get class names
class_names = full_ds.class_names
class_names

['call',
 'dislike',
 'fist',
 'four',
 'like',
 'mute',
 'ok',
 'one',
 'palm',
 'peace',
 'peace_inverted',
 'rock',
 'stop',
 'stop_inverted',
 'three',
 'three2',
 'two_up',
 'two_up_inverted']

#### **Dataset: Shallow - Save**

In [8]:
path_to_save = 'savedDatasetShallowRGB'

tf.data.experimental.save(train_ds, path_to_save + '/train')
tf.data.experimental.save(val_ds, path_to_save + '/val')
tf.data.experimental.save(test_ds, path_to_save + '/test')

## **Regularization Factors**

This code snippet defines the values for L1 and L2 regularization, which are both set to 0.01. It then creates an "Elastic Net Regularizer" that combines these L1 and L2 values to help prevent the model from overfitting by penalizing overly complex or large weight values in the model's learning process.

In [9]:
# Define L1 and L2 regularization factors
l1_factor = 0.01  # Example value
l2_factor = 0.01  # Example value

# Elastic Net Regularizer
elastic_net_regularizer = regularizers.l1_l2(l1=l1_factor, l2=l2_factor)

## **Callbacks: Learning Rate Scheduler and Early Stopping**

In [10]:
# Define a learning rate schedule
def lr_time_based_decay(epoch, lr):
    # This function adjusts the learning rate over each epoch based on the initial learning rate,
    # applying a decay factor that increases with the epoch number. It effectively reduces the 
    # learning rate over time, which can help in calibrating the model adjustments as it 
    # approaches a minimum in the loss surface.
    return lr * 1 / (1 + 0.01 * epoch)

# Define callbacks
callbacks = [
    # EarlyStopping prevents overfitting by stopping training when the validation loss 
    # has not improved for 3 consecutive epochs ('patience=3'). It also restores the 
    # weights of the model to those of the epoch with the best validation loss, ensuring 
    # the model retains the best learned features even if it starts to overfit afterward.
    EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True),
    # LearningRateScheduler adjusts the learning rate according to the lr_time_based_decay function above.
    # It logs the new learning rate at the start of each epoch ('verbose=1'), helping to control
    # the step size of model updates, which can be crucial for reaching convergence efficiently.
    LearningRateScheduler(lr_time_based_decay, verbose=1)
]

## **Data Augmentation Layers**

In [11]:
# Data Augmentation
data_augmentation_layers = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.05),
])

---

# **Convolutional Neural Networks (CNN): Shallow Models**

---

## **CNN Model: Shallow**

In [12]:
# Define the CNN Model from Scratch
def build_scratch_cnn_shallow():
    model = models.Sequential()
    model.add(tf.keras.Input(shape=(IMG_SIZE[0], IMG_SIZE[1], 3)))
    model.add(layers.Rescaling(1.0 / 255))  # Normalize pixel values
    
    model.add(layers.Conv2D(16, 3, padding='same', activation='relu')) 
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())
    model.add(layers.Dropout(0.1))
    
    model.add(layers.Conv2D(32, 3, padding='same', activation='relu'))  
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())
    model.add(layers.Dropout(0.1))
    
    model.add(layers.Conv2D(64, 3, padding='same', activation='relu'))  
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())
    model.add(layers.Dropout(0.1))
    
    model.add(layers.Flatten())

    model.add(layers.Dense(num_classes, activation='softmax'))

    # Compile model with an initial learning rate
    model.compile(
        # Adam optimizer is used with a specified initial learning rate of 0.001. The learning rate
        # controls how much the weights of the model are adjusted relative to the gradient of the loss 
        # function. A higher learning rate might converge quickly, but too high can cause the training 
        # to diverge. A lower learning rate ensures more reliable convergence but at the risk of slowing
        # down the training process. The chosen rate of 0.001 is a starting point that balances these factors.
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss='categorical_crossentropy',
        metrics=['accuracy']
        )
    return model

# Instantiate and summarize the lighter model
scratch_model_shallow = build_scratch_cnn_shallow()
scratch_model_shallow.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 64, 64, 3)         0         
                                                                 
 conv2d (Conv2D)             (None, 64, 64, 16)        448       
                                                                 
 batch_normalization (BatchN  (None, 64, 64, 16)       64        
 ormalization)                                                   
                                                                 
 max_pooling2d (MaxPooling2D  (None, 32, 32, 16)       0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 32, 32, 16)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 32, 32, 32)       

### **CNN Model: Shallow - Training**

In [13]:
# Train CNN Model
history_shallow = scratch_model_shallow.fit(
    train_ds,
    validation_data=val_ds,
    epochs=10,
    callbacks=callbacks
)


Epoch 1: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 1/10

Epoch 2: LearningRateScheduler setting learning rate to 0.0009900990569281696.
Epoch 2/10

Epoch 3: LearningRateScheduler setting learning rate to 0.0009706853341092082.
Epoch 3/10

Epoch 4: LearningRateScheduler setting learning rate to 0.0009424129424128428.
Epoch 4/10

Epoch 5: LearningRateScheduler setting learning rate to 0.0009061662869778676.
Epoch 5/10

Epoch 6: LearningRateScheduler setting learning rate to 0.0008630154964824517.
Epoch 6/10

Epoch 7: LearningRateScheduler setting learning rate to 0.0008141655444149982.
Epoch 7/10

Epoch 8: LearningRateScheduler setting learning rate to 0.000760902402591761.
Epoch 8/10

Epoch 9: LearningRateScheduler setting learning rate to 0.0007045392757626595.
Epoch 9/10

Epoch 10: LearningRateScheduler setting learning rate to 0.0006463663297953135.
Epoch 10/10


#### **CNN Model: Shallow - Save**

In [14]:
# Save Model: .json
# Saves the Model Architecture
for key in history_shallow.history.keys():
    history_shallow.history[key] = [float(i) for i in history_shallow.history[key]]

# Write the JSON file
with open('json/cnn_model_shallow.json', 'w') as f:
    json.dump(history_shallow.history, f)


# Save Model: .h5
# Saves the Model Weights and Configurations
scratch_model_shallow.save('h5/scratch_model_shallow.h5')

## **CNN Model: Shallow Calibrated**

In [15]:
# Define the CNN Model from Scratch
def build_scratch_cnn_shallow_calibrated():
    model = models.Sequential()
    model.add(tf.keras.Input(shape=(IMG_SIZE[0], IMG_SIZE[1], 3)))
    model.add(layers.Rescaling(1.0 / 255))  # Normalize pixel values
    
    model.add(layers.Conv2D(32, 3, padding='same', activation='relu')) 
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())
    model.add(layers.Dropout(0.1))
    
    model.add(layers.Conv2D(64, 3, padding='same', activation='relu')) 
    model.add(layers.BatchNormalization()) 
    model.add(layers.MaxPooling2D())
    model.add(layers.Dropout(0.1))
    
    model.add(layers.Conv2D(128, 3, padding='same', activation='relu'))  
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())
    model.add(layers.Dropout(0.1))

    model.add(layers.Conv2D(256, 3, padding='same', activation='relu'))  
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())
    model.add(layers.Dropout(0.1))

    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dropout(0.3))
    
    model.add(layers.Flatten())

    model.add(layers.Dense(num_classes, activation='softmax'))

    # Compile model with an initial learning rate
    model.compile(
        # Adam optimizer is used with a specified initial learning rate of 0.001. The learning rate
        # controls how much the weights of the model are adjusted relative to the gradient of the loss 
        # function. A higher learning rate might converge quickly, but too high can cause the training 
        # to diverge. A lower learning rate ensures more reliable convergence but at the risk of slowing
        # down the training process. The chosen rate of 0.001 is a starting point that balances these factors.
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss='categorical_crossentropy',
        metrics=['accuracy']
        )
    
    return model

# Instantiate and summarize the lighter model
scratch_model_shallow_calibrated = build_scratch_cnn_shallow_calibrated()
scratch_model_shallow_calibrated.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling_1 (Rescaling)     (None, 64, 64, 3)         0         
                                                                 
 conv2d_3 (Conv2D)           (None, 64, 64, 32)        896       
                                                                 
 batch_normalization_3 (Batc  (None, 64, 64, 32)       128       
 hNormalization)                                                 
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 32, 32, 32)       0         
 2D)                                                             
                                                                 
 dropout_3 (Dropout)         (None, 32, 32, 32)        0         
                                                                 
 conv2d_4 (Conv2D)           (None, 32, 32, 64)       

### **CNN Model: Shallow Calibrated - Training**

In [16]:
# Train the model
history_shallow_calibrated = scratch_model_shallow_calibrated.fit(
    train_ds,
    validation_data=val_ds,
    epochs=10,
    callbacks=callbacks  # Include both callbacks here
)


Epoch 1: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 1/10

Epoch 2: LearningRateScheduler setting learning rate to 0.0009900990569281696.
Epoch 2/10

Epoch 3: LearningRateScheduler setting learning rate to 0.0009706853341092082.
Epoch 3/10

Epoch 4: LearningRateScheduler setting learning rate to 0.0009424129424128428.
Epoch 4/10

Epoch 5: LearningRateScheduler setting learning rate to 0.0009061662869778676.
Epoch 5/10

Epoch 6: LearningRateScheduler setting learning rate to 0.0008630154964824517.
Epoch 6/10

Epoch 7: LearningRateScheduler setting learning rate to 0.0008141655444149982.
Epoch 7/10

Epoch 8: LearningRateScheduler setting learning rate to 0.000760902402591761.
Epoch 8/10

Epoch 9: LearningRateScheduler setting learning rate to 0.0007045392757626595.
Epoch 9/10

Epoch 10: LearningRateScheduler setting learning rate to 0.0006463663297953135.
Epoch 10/10


#### **CNN Model: Shallow Calibrated - Save**

In [17]:
# Save Model: .json
# Saves the Model Architecture
for key in history_shallow_calibrated.history.keys():
    history_shallow_calibrated.history[key] = [float(i) for i in history_shallow_calibrated.history[key]]

# Write the JSON file
with open('json/cnn_model_shallow_calibrated.json', 'w') as f:
    json.dump(history_shallow_calibrated.history, f)


# Save Model: .h5
# Saves the Model Weights and Configurations
scratch_model_shallow_calibrated.save('h5/scratch_model_shallow_calibrated.h5')

## **CNN Model: Shallow Calibrated + Data Augmentation**

In [18]:
# Define the CNN Model from Scratch
def build_scratch_cnn_shallow_calibrated_DA():
    model = models.Sequential()
    model.add(tf.keras.Input(shape=(IMG_SIZE[0], IMG_SIZE[1], 3)))
    model.add(data_augmentation_layers)
    model.add(layers.Rescaling(1.0 / 255))  # Normalize pixel values
    
    # L1 and L2 regularization is applied to the weights of the layer, with a regularization strength of 0.001.
    # This regularization helps prevent overfitting by penalizing large weights, encouraging the model to learn smaller, more general weights.
    model.add(layers.Conv2D(32, 3, padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())
    model.add(layers.Dropout(0.1))
    
    model.add(layers.Conv2D(64, 3, padding='same', activation='relu'))
    model.add(layers.BatchNormalization()) 
    model.add(layers.MaxPooling2D())
    model.add(layers.Dropout(0.1))
    
    model.add(layers.Conv2D(128, 3, padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())
    model.add(layers.Dropout(0.1))

    model.add(layers.Conv2D(256, 3, padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())
    model.add(layers.Dropout(0.1))

    # Elastic Net Regularization is applied to the weights of this layer, with a regularization strength of 0.001.
    # This regularization helps prevent overfitting by penalizing large weights, encouraging the model to learn smaller, more general weights.
    model.add(layers.Dense(256, activation='relu', kernel_regularizer=elastic_net_regularizer))
    model.add(layers.Dropout(0.3))
    
    model.add(layers.Flatten())

    model.add(layers.Dense(num_classes, activation='softmax'))

    # Compile model with an initial learning rate
    model.compile(
        # Adam optimizer is used with a specified initial learning rate of 0.001. The learning rate
        # controls how much the weights of the model are adjusted relative to the gradient of the loss 
        # function. A higher learning rate might converge quickly, but too high can cause the training 
        # to diverge. A lower learning rate ensures more reliable convergence but at the risk of slowing
        # down the training process. The chosen rate of 0.001 is a starting point that balances these factors.
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss='categorical_crossentropy',
        metrics=['accuracy']
        )
    
    return model

# Instantiate and summarize the lighter model
scratch_model_shallow_calibrated_DA = build_scratch_cnn_shallow_calibrated_DA()
scratch_model_shallow_calibrated_DA.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential (Sequential)     (None, 64, 64, 3)         0         
                                                                 
 rescaling_2 (Rescaling)     (None, 64, 64, 3)         0         
                                                                 
 conv2d_7 (Conv2D)           (None, 64, 64, 32)        896       
                                                                 
 batch_normalization_7 (Batc  (None, 64, 64, 32)       128       
 hNormalization)                                                 
                                                                 
 max_pooling2d_7 (MaxPooling  (None, 32, 32, 32)       0         
 2D)                                                             
                                                                 
 dropout_8 (Dropout)         (None, 32, 32, 32)       

### **CNN Model: Shallow Calibrated Data Augmentation - Training**

In [19]:
# Train the model
history_shallow_calibrated_DA = scratch_model_shallow_calibrated_DA.fit(
    train_ds,
    validation_data=val_ds,
    epochs=10,
    callbacks=callbacks  # Include both callbacks here
)


Epoch 1: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 1/10

Epoch 2: LearningRateScheduler setting learning rate to 0.0009900990569281696.
Epoch 2/10

Epoch 3: LearningRateScheduler setting learning rate to 0.0009706853341092082.
Epoch 3/10

Epoch 4: LearningRateScheduler setting learning rate to 0.0009424129424128428.
Epoch 4/10

Epoch 5: LearningRateScheduler setting learning rate to 0.0009061662869778676.
Epoch 5/10

Epoch 6: LearningRateScheduler setting learning rate to 0.0008630154964824517.
Epoch 6/10

Epoch 7: LearningRateScheduler setting learning rate to 0.0008141655444149982.
Epoch 7/10

Epoch 8: LearningRateScheduler setting learning rate to 0.000760902402591761.
Epoch 8/10

Epoch 9: LearningRateScheduler setting learning rate to 0.0007045392757626595.
Epoch 9/10

Epoch 10: LearningRateScheduler setting learning rate to 0.0006463663297953135.
Epoch 10/10


#### **CNN Model: Shallow Calibrated Data Augmentation - Save**

In [20]:
# Save Model: .json
# Saves the Model Architecture
for key in history_shallow_calibrated_DA.history.keys():
    history_shallow_calibrated_DA.history[key] = [float(i) for i in history_shallow_calibrated_DA.history[key]]

# Write the JSON file
with open('json/cnn_model_shallow_calibrated_DA.json', 'w') as f:
    json.dump(history_shallow_calibrated_DA.history, f)


# Save Model: .h5
# Saves the Model Weights and Configurations
scratch_model_shallow_calibrated_DA.save('h5/scratch_model_shallow_calibrated_DA.h5')

## **CNN Model: Shallow Grayscale**

### **Dataset: Loading, Splitting, Shuffling, Caching**

In [21]:
# Load Grayscale Dataset
dataset_dir = 'dataset/hagridset'
full_ds_grayscale = tf.keras.utils.image_dataset_from_directory(
    dataset_dir,
    shuffle=True,
    seed=SEED,
    image_size=(IMG_SIZE),
    batch_size=BATCH_SIZE,
    label_mode='categorical',
    color_mode='grayscale'  # Load images as grayscale
)

# Split into training, validation, and test sets
train_ratio_grayscale = 0.7
val_ratio_grayscale = 0.2
test_ratio_grayscale = 0.1

# Total length of the dataset
total_size_grayscale = len(full_ds_grayscale)

# Compute indices for the splits
train_size_grayscale = int(total_size_grayscale * train_ratio_grayscale)
val_size_grayscale = int(total_size_grayscale * val_ratio_grayscale)
test_size_grayscale = total_size_grayscale - (train_size_grayscale + val_size_grayscale)

# Split the dataset and shuffle
train_ds_grayscale = full_ds_grayscale.take(train_size_grayscale).shuffle(train_size_grayscale, seed=SEED)
val_ds_grayscale = full_ds_grayscale.skip(train_size_grayscale).take(val_size_grayscale).shuffle(val_size_grayscale, seed=SEED)
test_ds_grayscale = full_ds_grayscale.skip(train_size_grayscale + val_size_grayscale).shuffle(test_size_grayscale, seed=SEED)

# Cache the dataset in memory (or use a directory to store it on disk if necessary)
train_ds_grayscale = full_ds_grayscale.take(train_size_grayscale).shuffle(train_size_grayscale, seed=SEED).cache().prefetch(buffer_size=AUTOTUNE)
val_ds_grayscale = full_ds_grayscale.skip(train_size_grayscale).take(val_size_grayscale).shuffle(val_size_grayscale, seed=SEED).cache().prefetch(buffer_size=AUTOTUNE)
test_ds_grayscale = full_ds_grayscale.skip(train_size_grayscale + val_size_grayscale).cache().prefetch(buffer_size=AUTOTUNE)

# Count samples in each subset
def count_samples(dataset):
    sample_count = sum(1 for _ in dataset.unbatch())
    return sample_count

# Output the number of samples for each dataset
print(f'Using {count_samples(train_ds)} samples in the Training set Grayscale')
print(f'Using {count_samples(val_ds)} samples in the Validation set Grayscale')
print(f'Using {count_samples(test_ds)} samples in the Test set Grayscale')

Found 125912 files belonging to 18 classes.
Using 88128 samples in the Training set Grayscale
Using 25184 samples in the Validation set Grayscale
Using 12600 samples in the Test set Grayscale


In [22]:
path_to_save = 'savedGrayDataset'

tf.data.experimental.save(train_ds, path_to_save + '/train')
tf.data.experimental.save(val_ds, path_to_save + '/val')
tf.data.experimental.save(test_ds, path_to_save + '/test')

In [23]:
# Define the CNN Model from Scratch
def build_scratch_cnn_shallow_grayscale():
    model = models.Sequential()
    model.add(tf.keras.Input(shape=(IMG_SIZE[0], IMG_SIZE[1], 1)))
    # model.add(data_augmentation_layers)
    model.add(layers.Rescaling(1.0 / 255))  # Normalize pixel values
    
    # L1 and L2 regularization is applied to the weights of the layer, with a regularization strength of 0.001.
    # This regularization helps prevent overfitting by penalizing large weights, encouraging the model to learn smaller, more general weights.
    model.add(layers.Conv2D(32, 3, padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())
    model.add(layers.Dropout(0.1))
    
    model.add(layers.Conv2D(64, 3, padding='same', activation='relu'))
    model.add(layers.BatchNormalization()) 
    model.add(layers.MaxPooling2D())
    model.add(layers.Dropout(0.1))
    
    model.add(layers.Conv2D(128, 3, padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())
    model.add(layers.Dropout(0.1))

    model.add(layers.Conv2D(256, 3, padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D())
    model.add(layers.Dropout(0.1))

    # Elastic Net Regularization is applied to the weights of this layer, with a regularization strength of 0.001.
    # This regularization helps prevent overfitting by penalizing large weights, encouraging the model to learn smaller, more general weights.
    model.add(layers.Dense(256, activation='relu', kernel_regularizer=elastic_net_regularizer))
    model.add(layers.Dropout(0.3))
    
    model.add(layers.Flatten())

    model.add(layers.Dense(num_classes, activation='softmax'))

    # Compile model with an initial learning rate
    model.compile(
        # Adam optimizer is used with a specified initial learning rate of 0.001. The learning rate
        # controls how much the weights of the model are adjusted relative to the gradient of the loss 
        # function. A higher learning rate might converge quickly, but too high can cause the training 
        # to diverge. A lower learning rate ensures more reliable convergence but at the risk of slowing
        # down the training process. The chosen rate of 0.001 is a starting point that balances these factors.
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss='categorical_crossentropy',
        metrics=['accuracy']
        )
    
    return model

# Instantiate and summarize the lighter model
scratch_model_shallow_grayscale = build_scratch_cnn_shallow_grayscale()
scratch_model_shallow_grayscale.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling_3 (Rescaling)     (None, 64, 64, 1)         0         
                                                                 
 conv2d_11 (Conv2D)          (None, 64, 64, 32)        320       
                                                                 
 batch_normalization_11 (Bat  (None, 64, 64, 32)       128       
 chNormalization)                                                
                                                                 
 max_pooling2d_11 (MaxPoolin  (None, 32, 32, 32)       0         
 g2D)                                                            
                                                                 
 dropout_13 (Dropout)        (None, 32, 32, 32)        0         
                                                                 
 conv2d_12 (Conv2D)          (None, 32, 32, 64)       

### **CNN Model: Grayscale Training**

In [24]:
# Train the model
history_shallow_grayscale = scratch_model_shallow_grayscale.fit(
    train_ds_grayscale,
    validation_data=val_ds_grayscale,
    epochs=10,
    callbacks=callbacks  # Include both callbacks here
)


Epoch 1: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 1/10

Epoch 2: LearningRateScheduler setting learning rate to 0.0009900990569281696.
Epoch 2/10

Epoch 3: LearningRateScheduler setting learning rate to 0.0009706853341092082.
Epoch 3/10

Epoch 4: LearningRateScheduler setting learning rate to 0.0009424129424128428.
Epoch 4/10

Epoch 5: LearningRateScheduler setting learning rate to 0.0009061662869778676.
Epoch 5/10

Epoch 6: LearningRateScheduler setting learning rate to 0.0008630154964824517.
Epoch 6/10

Epoch 7: LearningRateScheduler setting learning rate to 0.0008141655444149982.
Epoch 7/10

Epoch 8: LearningRateScheduler setting learning rate to 0.000760902402591761.
Epoch 8/10

Epoch 9: LearningRateScheduler setting learning rate to 0.0007045392757626595.
Epoch 9/10

Epoch 10: LearningRateScheduler setting learning rate to 0.0006463663297953135.
Epoch 10/10


#### **CNN Model: Grayscale Save**

In [25]:
# Save Model: .json
# Saves the Model Architecture
for key in history_shallow_grayscale.history.keys():
    history_shallow_grayscale.history[key] = [float(i) for i in history_shallow_grayscale.history[key]]

# Write the JSON file
with open('json/cnn_model_shallow_grayscale.json', 'w') as f:
    json.dump(history_shallow_grayscale.history, f)


# Save Model: .h5
# Saves the Model Weights and Configurations
scratch_model_shallow_grayscale.save('h5/scratch_model_shallow_grayscale.h5')