### ResNet: Tensorflow Train

Due to difficulty overcoming overfitting with the previous approaches, and probably the data preprocessing being a potential source of error, 

the whole data importing and preprocessing was revised by first constructing a model with PyTorch and then translating it into tensorflow/keras

### Initialize Notebook & packages

In [1]:
import helper as hp 
hp.initialize_notebook() # initialize with GPU enabled  
# hp.initialize_notebook(False) # to disable GPU 

2024-12-17 20:52:35.179803: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-12-17 20:52:35.194355: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-12-17 20:52:35.198878: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-17 20:52:35.210010: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


GPU enabled. Checking for available GPUs...
1 Physical GPUs, 1 Logical GPUs

Verifying TensorFlow and PyTorch CUDA setup...
TensorFlow version: 2.17.0
Built with CUDA: True
Num GPUs Available: 1

Keras version: 3.6.0

End checks and initialization.


I0000 00:00:1734468757.147531  735806 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1734468757.190649  735806 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1734468757.194269  735806 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1734468757.199414  735806 cuda_executor.cc:1015] successful NUMA node read from SysFS ha

In [2]:
# Import standard libraries
import os
import sys
import gc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import time
import keras
# Import DL libraries
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D, Dropout # General Layers
from keras.layers import RandomRotation, RandomFlip
from keras import layers, models, Model, Input
from keras.utils import to_categorical
from keras.optimizers import Adam
from keras.callbacks import ReduceLROnPlateau, EarlyStopping, TensorBoard
from keras.initializers import glorot_uniform
from keras.utils import plot_model
# Suppress tensorflow warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# Import ML libraries
from sklearn.metrics import f1_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, StratifiedKFold, StratifiedShuffleSplit
from sklearn.utils.class_weight import compute_class_weight

# Import image libraries
from skimage import transform
import tifffile as tiff
from tifffile import imread

# Appends current working dir
current_path = os.getcwd()
sys.path.append(current_path)

# Import custom preprocessing class
from imc_preprocessing import IMCPreprocessor







## IMPORT DATA

In [3]:
images_dir = '/home/jupyter-luvogt/Final_Project_LR/IMC_images' 
metadata_dir = '/home/jupyter-luvogt/Final_Project_LR/metadata.csv'
# --- Load CSV and filter dataset ---
csv_file = metadata_dir
image_folder = images_dir

# Load the CSV
df = pd.read_csv(csv_file)

# Filter rows with NA in PDL1_score and convert to binary
df = df.dropna(subset=["PDL1_score"])
df["PDL1_score"] = df["PDL1_score"].astype(int)

# --- Train-Test-Validation Split ---
train_df, val_and_test_df = train_test_split(df, test_size=0.4, random_state=42, stratify=df["PDL1_score"])
test_df, val_df = train_test_split(val_and_test_df, test_size=0.5, random_state=42, stratify=val_and_test_df["PDL1_score"])

# --- Load Images and Compute Mean and Standard Deviation ---

# Function to load a single image
def load_image(image_path):
    image = imread(image_path)  # Load all 46 channels
    return image.astype(np.float32)

# Initialize variables to accumulate sum and sum of squares
nr_images = 0
sum_images = np.zeros((46, 224, 224))
sum_squared_images = np.zeros((46, 224, 224))

# Accumulate the sum and sum of squares for the training dataset
for _, row in train_df.iterrows():
    image_path = os.path.join(image_folder, f"{row['sample_id']}.tiff")
    image = load_image(image_path)
    nr_images += 1
    sum_images += image
    sum_squared_images += image ** 2

# Compute mean and standard deviation
mean = sum_images / nr_images
std = np.sqrt(sum_squared_images / nr_images - mean ** 2)

# --- Dataset Loading Function ---

def load_dataset(dataframe, image_folder, normalize = True): # augment = False
    images = []
    labels = []
    
    for _, row in dataframe.iterrows():
        image_path = os.path.join(image_folder, f"{row['sample_id']}.tiff")
        image = load_image(image_path)

        if normalize:
            image = (image - mean) / std # Z-Score

        # Convert to channels-last format (224, 224, 46)
        image = np.transpose(image, (1, 2, 0))

        # # Apply data augmentation if specified
        # if augment:
        #     # Random horizontal flip
        #     if np.random.rand() > 0.5:
        #         image = np.flip(image, axis=2)
        #     # Random vertical flip
        #     if np.random.rand() > 0.5:
        #         image = np.flip(image, axis=1)

        images.append(image)
        labels.append(row["PDL1_score"])

    return np.array(images), np.array(labels)

### Creating datasets

In [4]:
# Training dataset without augmentation
X_train, y_train = load_dataset(train_df, image_folder, normalize = True)
# # Training dataset with data augmentation
# X_train_aug, y_train_aug = load_dataset(train_df, image_folder, augment=True)
# Validation dataset
X_val, y_val = load_dataset(val_df, image_folder, normalize = True)
# Test dataset
X_test, y_test = load_dataset(test_df, image_folder, normalize = True)

# One Hot Encoding
y_train= to_categorical(y_train)
y_val= to_categorical(y_val)
y_test = to_categorical(y_test)

print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_val shape: {X_val.shape}, y_val shape: {y_val.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

X_train shape: (590, 224, 224, 46), y_train shape: (590, 2)
X_val shape: (197, 224, 224, 46), y_val shape: (197, 2)
X_test shape: (197, 224, 224, 46), y_test shape: (197, 2)


Verifying Normalization of data

In [5]:
print(np.mean(X_train)) # Mean = 0
print(np.std(X_train)) # Standard deviation = 1

5.562353140428533e-17
0.9999999999582869


#### Define building blocks of ResNet network architecture

In [5]:
from keras.regularizers import l2
from keras import layers
from keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D
from keras.models import Model, load_model
from keras.initializers import glorot_uniform
from keras.utils import plot_model
# from keras.utils.vis_utils import model_to_dot
import keras.backend as K
import tensorflow as tf
def identity_block(x, filter):
    # copy tensor to variable called x_skip
    x_skip = x
    # Layer 1
    x = tf.keras.layers.Conv2D(filter, (3,3), padding = 'same')(x)
    x = tf.keras.layers.BatchNormalization(axis=3)(x)
    x = tf.keras.layers.Activation('relu')(x)
    # Layer 2
    x = tf.keras.layers.Conv2D(filter, (3,3), padding = 'same')(x)
    x = tf.keras.layers.BatchNormalization(axis=3)(x)
    # Add Residue
    x = tf.keras.layers.Add()([x, x_skip])     
    x = tf.keras.layers.Activation('relu')(x)
    return x

def convolutional_block(x, filter):
    # copy tensor to variable called x_skip
    x_skip = x
    # Layer 1
    x = tf.keras.layers.Conv2D(filter, (3,3), padding = 'same', strides = (2,2))(x)
    x = tf.keras.layers.BatchNormalization(axis=3)(x)
    x = tf.keras.layers.Activation('relu')(x)
    # Layer 2
    x = tf.keras.layers.Conv2D(filter, (3,3), padding = 'same')(x)
    x = tf.keras.layers.BatchNormalization(axis=3)(x)
    # Processing Residue with conv(1,1)
    x_skip = tf.keras.layers.Conv2D(filter, (1,1), strides = (2,2))(x_skip)
    # Add Residue
    x = tf.keras.layers.Add()([x, x_skip])     
    x = tf.keras.layers.Activation('relu')(x)
    return x

def ResNet(shape = (32, 32, 3), classes = 10, block_layers = [3, 4, 6, 3]):
    # Step 1 (Setup Input Layer)
    x_input = tf.keras.layers.Input(shape)
    x = tf.keras.layers.ZeroPadding2D((3, 3))(x_input)
    # Step 2 (Initial Conv layer along with maxPool)
    x = tf.keras.layers.Conv2D(64, kernel_size=7, strides=2, padding='same')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Activation('relu')(x)
    x = tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)
    # Define size of sub-blocks and initial filter size
    block_layers = block_layers
    filter_size = 64
    # Step 3 Add the Resnet Blocks
    for i in range(4):
        if i == 0:
            # For sub-block 1 Residual/Convolutional block not needed
            for j in range(block_layers[i]):
                x = identity_block(x, filter_size)
        else:
            # One Residual/Convolutional Block followed by Identity blocks
            # The filter size will go on increasing by a factor of 2
            filter_size = filter_size*2
            x = convolutional_block(x, filter_size)
            for j in range(block_layers[i] - 1):
                x = identity_block(x, filter_size)
    # Step 4 End Dense Network
    x = tf.keras.layers.AveragePooling2D((2,2), padding = 'same')(x)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(512, activation = 'relu')(x)
    x = tf.keras.layers.Dense(classes, activation = 'softmax')(x)
    model = tf.keras.models.Model(inputs = x_input, outputs = x, name = "ResNet34")
    return model

### Train ResNet18 Model

In [6]:
ROWS = 224
COLS = 224
CHANNELS = 46
CLASSES = 2
block_layers = [2,2,2,2]
# Build Network Graph 
model_ResNet18 = ResNet(shape = (ROWS, COLS, CHANNELS), classes = CLASSES, block_layers = block_layers)

# Compile Model 
l_rate = 1.e-4
opt = keras.optimizers.Adam(learning_rate=l_rate)

model_ResNet18.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

# # Apply TensorBoard
# # define the logs folder 
# log_dir = os.path.join("logs_ResNet50", "fit", "model_ResNet50_3Channels" + time.strftime("%Y%m%d-%H%M%S"))
# # Define TensorBoard Callback
# tb_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Train Model 
batch = 64
epochs = 10
start_time = time.time()

history_ResNet18 = model_ResNet18.fit(X_train, y_train,
                                      epochs = epochs, batch_size = batch, 
                                      validation_data = (X_val, y_val))

end_time = time.time()
elapsed_time = end_time - start_time
print(f"\nElapsed time: {elapsed_time} seconds")

2024-12-16 21:56:04.174899: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 5447106560 exceeds 10% of free system memory.
2024-12-16 21:56:06.763387: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 5447106560 exceeds 10% of free system memory.


Epoch 1/10


I0000 00:00:1734386177.548504  707518 service.cc:146] XLA service 0x7f3a84047f90 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1734386177.548548  707518 service.cc:154]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
2024-12-16 21:56:17.779099: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-12-16 21:56:18.721282: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 90300
2024-12-16 21:56:21.114400: W external/local_tsl/tsl/framework/bfc_allocator.cc:291] Allocator (GPU_0_bfc) ran out of memory trying to allocate 7.12GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
I0000 00:00:1734386195.896974  707518 device_compiler.h:188] Compiled cluster using XLA!  This line i

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 2s/step - accuracy: 0.5936 - loss: 1.6235 - val_accuracy: 0.5939 - val_loss: 1.4351
Epoch 2/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 385ms/step - accuracy: 0.8380 - loss: 0.5075 - val_accuracy: 0.6396 - val_loss: 1.2642
Epoch 3/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 394ms/step - accuracy: 0.9316 - loss: 0.2125 - val_accuracy: 0.5076 - val_loss: 1.3793
Epoch 4/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 390ms/step - accuracy: 0.9400 - loss: 0.1483 - val_accuracy: 0.5228 - val_loss: 1.2674
Epoch 5/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 396ms/step - accuracy: 0.9900 - loss: 0.0642 - val_accuracy: 0.6751 - val_loss: 0.9535
Epoch 6/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 399ms/step - accuracy: 0.9989 - loss: 0.0313 - val_accuracy: 0.6599 - val_loss: 0.9824
Epoch 7/10
[1m10/10[0m [32m━━━━━━━━━━━

In [8]:
# model_ResNet18.evaluate(X_test, y_test)

Finally, we have a Validation Accuracy above the baseline accuracy, giving us a proof of concept and giving us the ability to experiment further with: 

For now, we have 68.58% test accuracy

    - Different ResNet Architecture: ResNet18, ResNet50, and pre-trained ResNets
    - Data Augmentation
    - Dimensionality Reduction: Autoencoder
    - Feature Engineering: Channel Selection --> e.g. only biological relevant channels / feed bottleneck layer of autoencoder into ResNet
    - Regularizations: Regularizer and Dropouts
    - Callbacks
    - Monitor hyperparameters with TensorBoard

## ResNet18: with Tensor Board + 46 Channels + without Regularization + no callbacks

In [11]:
# from keras.callbacks import TensorBoard 
# ROWS = 224
# COLS = 224
# CHANNELS = 46
# CLASSES = 2
# block_layers = [2,2,2,2] # ResNet18
# # Build Network Graph 
# model_ResNet18 = ResNet(shape = (ROWS, COLS, CHANNELS), classes = CLASSES, block_layers = block_layers)

# # Compile Model 
# l_rate = 1.e-4
# opt = keras.optimizers.Adam(learning_rate=l_rate)

# model_ResNet18.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

# # Apply TensorBoard
# # define the logs folder 
# log_dir = os.path.join("logs_ResNet50", "fit", "model_ResNet18_46_no_regularization_trained" + time.strftime("%Y%m%d-%H%M%S"))
# # Define TensorBoard Callback
# tb_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# # Train Model 
# batch = 64
# epochs = 10
# start_time = time.time()

# history_ResNet18 = model_ResNet18.fit(X_train, y_train,
#                                       epochs = epochs, batch_size = batch, 
#                                       validation_data = (X_val, y_val), callbacks = tb_callback)

# end_time = time.time()
# elapsed_time = end_time - start_time
# print(f"\nElapsed time: {elapsed_time} seconds")

2024-12-15 21:02:24.733828: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 5447106560 exceeds 10% of free system memory.
2024-12-15 21:02:30.556470: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 5447106560 exceeds 10% of free system memory.


Epoch 1/10


2024-12-15 21:02:44.753749: W external/local_tsl/tsl/framework/bfc_allocator.cc:291] Allocator (GPU_0_bfc) ran out of memory trying to allocate 7.12GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 2s/step - accuracy: 0.5743 - loss: 1.5904 - val_accuracy: 0.6396 - val_loss: 1.6029
Epoch 2/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 574ms/step - accuracy: 0.7853 - loss: 0.5136 - val_accuracy: 0.6447 - val_loss: 0.9970
Epoch 3/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 548ms/step - accuracy: 0.8780 - loss: 0.2858 - val_accuracy: 0.4010 - val_loss: 1.5503
Epoch 4/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 555ms/step - accuracy: 0.9531 - loss: 0.1533 - val_accuracy: 0.6802 - val_loss: 0.7513
Epoch 5/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 561ms/step - accuracy: 0.9848 - loss: 0.0689 - val_accuracy: 0.6701 - val_loss: 0.8414
Epoch 6/10
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 562ms/step - accuracy: 0.9822 - loss: 0.0776 - val_accuracy: 0.6396 - val_loss: 1.0069
Epoch 7/10
[1m10/10[0m [32m━━━━━━━━━━━

In [12]:
# #### Test accuracy
# print("Test Accuracy is: {:.2f}%".format(model_ResNet18.evaluate(X_test, y_test)[1]))

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 62ms/step - accuracy: 0.6562 - loss: 1.0567
Test Accuracy is: 0.65%


#### SAVING MODEL

In [26]:
# import pickle
# # Save the model
# model_ResNet18.save('./models_ResNet/model_ResNet18_46_no_regularization_trained.keras')
# # Save the history of your experiments 
# with open('./models_ResNet/history_red_LR.pkl', 'wb') as f:
#     pickle.dump(history_ResNet18.history, f)

## ResNet18: with Tensor Board + 46 Channels + without Regularization + ReduceLR + EarlyStopping

In [9]:
# Callbacks

log_dir = os.path.join("logs_ResNet50", "fit", "model_ResNet18_46_no_regularization_trained" + time.strftime("%Y%m%d-%H%M%S"))
tb_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

callbacks_list = [
    keras.callbacks.EarlyStopping(
        monitor="val_loss",
        patience=8,   # Optimal patience value for validation accuracy
    ),
    keras.callbacks.ModelCheckpoint(
        filepath="./models_ResNet/model_ResNet18_46_no_regularization_trained.keras", # save validation loss into file
        monitor="val_loss",  # monitor validation accuracy
        save_best_only=True,
    ),
    ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.5,  
    patience=3,
    verbose=1,
    mode="auto",
    min_delta=0.001,
    min_lr=1.e-6,
    ), 
    TensorBoard(log_dir=log_dir, histogram_freq=1)
]

In [None]:
ROWS = 224
COLS = 224
CHANNELS = 46
CLASSES = 2
block_layers = [2,2,2,2] # ResNet18
# Build Network Graph 
model_ResNet18 = ResNet(shape = (ROWS, COLS, CHANNELS), classes = CLASSES, block_layers = block_layers)

# Compile Model 
l_rate = 1.e-4
opt = keras.optimizers.Adam(learning_rate=l_rate)

model_ResNet18.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

# Apply TensorBoard
# define the logs folder 
log_dir = os.path.join("logs_ResNet50", "fit", "model_ResNet18_46_no_regularization_trained" + time.strftime("%Y%m%d-%H%M%S"))
# Define TensorBoard Callback
tb_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Train Model 
batch = 64
epochs = 10
start_time = time.time()

history_ResNet18 = model_ResNet18.fit(X_train, y_train,
                                      epochs = epochs, batch_size = batch, 
                                      validation_data = (X_val, y_val), callbacks = callbacks_list)

end_time = time.time()
elapsed_time = end_time - start_time
print(f"\nElapsed time: {elapsed_time} seconds")

In [16]:
# #### Test accuracy
# print("Test Accuracy is: {:.2f}%".format(model_ResNet18.evaluate(X_test, y_test)[1]))

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 62ms/step - accuracy: 0.7067 - loss: 0.9014
Test Accuracy is: 0.71%


#### COMMENTS

Test accuracy improved a little bit

#### SAVING MODEL

In [17]:
# import pickle
# # Save the model
# model_ResNet18.save('./models_ResNet/model_ResNet18_46_no_regularization_callbacks_trained.keras')
# # Save the history of your experiments 
# with open('./models_ResNet/history_ResNet18_46_no_regularization_callbacks_trained.pkl', 'wb') as f:
#     pickle.dump(history_ResNet18.history, f)

## ResNet18: with Tensor Board + 46 Channels + with Dropout + ReduceLR + EarlyStopping

### Redefine Network with Dropout

In [10]:
def identity_block(x, filter, dropout_rate = 0.5):
    # copy tensor to variable called x_skip
    x_skip = x
    # Layer 1
    x = tf.keras.layers.Conv2D(filter, (3,3), padding = 'same')(x)
    x = tf.keras.layers.BatchNormalization(axis=3)(x)
    x = Dropout(dropout_rate)(x)
    x = tf.keras.layers.Activation('relu')(x)
    # Layer 2
    x = tf.keras.layers.Conv2D(filter, (3,3), padding = 'same')(x)
    x = tf.keras.layers.BatchNormalization(axis=3)(x)
    # Add Residue
    x = tf.keras.layers.Add()([x, x_skip])     
    x = tf.keras.layers.Activation('relu')(x)
    return x

def convolutional_block(x, filter, dropout_rate = 0.5):
    # copy tensor to variable called x_skip
    x_skip = x
    # Layer 1
    x = tf.keras.layers.Conv2D(filter, (3,3), padding = 'same', strides = (2,2))(x)
    x = tf.keras.layers.BatchNormalization(axis=3)(x)
    x = Dropout(dropout_rate)(x)
    x = tf.keras.layers.Activation('relu')(x)
    # Layer 2
    x = tf.keras.layers.Conv2D(filter, (3,3), padding = 'same')(x)
    x = tf.keras.layers.BatchNormalization(axis=3)(x)
    # Processing Residue with conv(1,1)
    x_skip = tf.keras.layers.Conv2D(filter, (1,1), strides = (2,2))(x_skip)
    # Add Residue
    x = tf.keras.layers.Add()([x, x_skip])     
    x = tf.keras.layers.Activation('relu')(x)
    return x

def ResNet(shape = (32, 32, 3), classes = 10, block_layers = [3, 4, 6, 3], dropout_rate = 0.5):
    # Step 1 (Setup Input Layer)
    x_input = tf.keras.layers.Input(shape)
    x = tf.keras.layers.ZeroPadding2D((3, 3))(x_input)
    # Step 2 (Initial Conv layer along with maxPool)
    x = tf.keras.layers.Conv2D(64, kernel_size=7, strides=2, padding='same')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = Dropout(dropout_rate)(x)
    x = tf.keras.layers.Activation('relu')(x)
    x = tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)
    # Define size of sub-blocks and initial filter size
    block_layers = block_layers
    filter_size = 64
    # Step 3 Add the Resnet Blocks
    for i in range(4):
        if i == 0:
            # For sub-block 1 Residual/Convolutional block not needed
            for j in range(block_layers[i]):
                x = identity_block(x, filter_size)
        else:
            # One Residual/Convolutional Block followed by Identity blocks
            # The filter size will go on increasing by a factor of 2
            filter_size = filter_size*2
            x = convolutional_block(x, filter_size)
            for j in range(block_layers[i] - 1):
                x = identity_block(x, filter_size)
    # Step 4 End Dense Network
    x = tf.keras.layers.AveragePooling2D((2,2), padding = 'same')(x)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(512, activation = 'relu')(x)
    x = Dropout(dropout_rate)(x)
    x = tf.keras.layers.Dense(classes, activation = 'softmax')(x)
    model = tf.keras.models.Model(inputs = x_input, outputs = x, name = "ResNet34")
    return model

### Callbacks

In [11]:
# Callbacks

log_dir = os.path.join("logs_ResNet50", "fit", "model_ResNet18_46_dropout_callbacks_trained" + time.strftime("%Y%m%d-%H%M%S"))
tb_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

callbacks_list = [
    keras.callbacks.EarlyStopping(
        monitor="val_loss",
        patience=8,   # Optimal patience value for validation accuracy
    ),
    keras.callbacks.ModelCheckpoint(
        filepath="./models_ResNet/model_ResNet18_46_dropout_callbacks_trained.keras", # save validation loss into file
        monitor="val_loss",  # monitor validation accuracy
        save_best_only=True,
    ),
    ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.5,  
    patience=3,
    verbose=1,
    mode="auto",
    min_delta=0.001,
    min_lr=1.e-6,
    ), 
    TensorBoard(log_dir=log_dir, histogram_freq=1)
]

### Training model

In [12]:
ROWS = 224
COLS = 224
CHANNELS = 46
CLASSES = 2
block_layers = [2,2,2,2] # ResNet18
dropout_rate = 0.5
# Build Network Graph 
model_ResNet18 = ResNet(shape = (ROWS, COLS, CHANNELS), classes = CLASSES, block_layers = block_layers, dropout_rate = dropout_rate)

# Compile Model 
l_rate = 1.e-4
opt = keras.optimizers.Adam(learning_rate=l_rate)

model_ResNet18.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

# Apply TensorBoard
# define the logs folder 
log_dir = os.path.join("logs_ResNet50", "fit", "model_ResNet18_46_dropout_callbacks_trained" + time.strftime("%Y%m%d-%H%M%S"))
# Define TensorBoard Callback
tb_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Train Model 
batch = 64
epochs = 30
start_time = time.time()

history_ResNet18 = model_ResNet18.fit(X_train, y_train,
                                      epochs = epochs, batch_size = batch, 
                                      validation_data = (X_val, y_val), callbacks = callbacks_list)

end_time = time.time()
elapsed_time = end_time - start_time
print(f"\nElapsed time: {elapsed_time} seconds")

2024-12-16 22:38:08.175871: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 5447106560 exceeds 10% of free system memory.
2024-12-16 22:38:10.772640: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 5447106560 exceeds 10% of free system memory.


Epoch 1/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 3s/step - accuracy: 0.5879 - loss: 1.2561 - val_accuracy: 0.3959 - val_loss: 0.9601 - learning_rate: 1.0000e-04
Epoch 2/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 726ms/step - accuracy: 0.6510 - loss: 0.7719 - val_accuracy: 0.6142 - val_loss: 0.7999 - learning_rate: 1.0000e-04
Epoch 3/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 764ms/step - accuracy: 0.6864 - loss: 0.6412 - val_accuracy: 0.6091 - val_loss: 0.7165 - learning_rate: 1.0000e-04
Epoch 4/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 732ms/step - accuracy: 0.7789 - loss: 0.4920 - val_accuracy: 0.7411 - val_loss: 0.5554 - learning_rate: 1.0000e-04
Epoch 5/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 743ms/step - accuracy: 0.8212 - loss: 0.4330 - val_accuracy: 0.7766 - val_loss: 0.5228 - learning_rate: 1.0000e-04
Epoch 6/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[

### Testing accuracy

In [None]:
# To load the model from file uncomment and run the following 
loaded_model = keras.saving.load_model('./models_ResNet/model_ResNet18_46_dropout_callbacks_trained.keras')
print("Test Accuracy is: {:.2f}%".format(loaded_model.evaluate(X_test, y_test)[1]))

2024-12-17 11:13:12.474325: I external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:1578] failed to allocate 13.00GiB (13958643712 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2024-12-17 11:13:12.474477: I external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:1578] failed to allocate 11.70GiB (12562779136 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2024-12-17 11:13:12.474584: I external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:1578] failed to allocate 10.53GiB (11306501120 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2024-12-17 11:13:12.474692: I external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:1578] failed to allocate 9.48GiB (10175850496 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2024-12-17 11:13:12.474794: I external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:1578] failed to allocate 8.53GiB (9158264832 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2024-12-17 11:13:12.474

### Comments

Test accuracy imporoved from 0.71% (without dropout) to 0.82%! 

Conclusion: Dropout helped a lot

    - Try Dropout_rate of 0.2

### Dropout = 0.2

### Callbacks

In [8]:
log_dir = os.path.join("logs_ResNet50", "fit", "model_ResNet18_46_dropout_0.2_callbacks_trained" + time.strftime("%Y%m%d-%H%M%S"))
tb_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

callbacks_list = [
    keras.callbacks.EarlyStopping(
        monitor="val_loss",
        patience=8,   # Optimal patience value for validation accuracy
    ),
    keras.callbacks.ModelCheckpoint(
        filepath="./models_ResNet/model_ResNet18_46_dropout_0.2_callbacks_trained.keras", # save validation loss into file
        monitor="val_loss",  # monitor validation accuracy
        save_best_only=True,
    ),
    ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.5,  
    patience=3,
    verbose=1,
    mode="auto",
    min_delta=0.001,
    min_lr=1.e-6,
    ), 
    TensorBoard(log_dir=log_dir, histogram_freq=1)
]

### Train Model: Drop out = 0.2

In [9]:
ROWS = 224
COLS = 224
CHANNELS = 46
CLASSES = 2
block_layers = [2,2,2,2] # ResNet18
dropout_rate = 0.2
# Build Network Graph 
model_ResNet18 = ResNet(shape = (ROWS, COLS, CHANNELS), classes = CLASSES, block_layers = block_layers, dropout_rate = dropout_rate)

# Compile Model 
l_rate = 1.e-4
opt = keras.optimizers.Adam(learning_rate=l_rate)

model_ResNet18.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

# Apply TensorBoard
# define the logs folder 
log_dir = os.path.join("logs_ResNet50", "fit", "model_ResNet18_46_dropout_0.2_callbacks_trained" + time.strftime("%Y%m%d-%H%M%S"))
# Define TensorBoard Callback
tb_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Train Model 
batch = 64
epochs = 30
start_time = time.time()

history_ResNet18 = model_ResNet18.fit(X_train, y_train,
                                      epochs = epochs, batch_size = batch, 
                                      validation_data = (X_val, y_val), callbacks = callbacks_list)

end_time = time.time()
elapsed_time = end_time - start_time
print(f"\nElapsed time: {elapsed_time} seconds")

2024-12-16 16:52:06.962581: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 5447106560 exceeds 10% of free system memory.
2024-12-16 16:52:09.562428: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 5447106560 exceeds 10% of free system memory.


Epoch 1/30


I0000 00:00:1734367941.323721  689163 service.cc:146] XLA service 0x7fa1b00424f0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1734367941.323763  689163 service.cc:154]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
2024-12-16 16:52:21.599593: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-12-16 16:52:22.615901: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 90300
2024-12-16 16:52:26.275057: W external/local_tsl/tsl/framework/bfc_allocator.cc:291] Allocator (GPU_0_bfc) ran out of memory trying to allocate 7.12GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
I0000 00:00:1734367966.576675  689163 device_compiler.h:188] Compiled cluster using XLA!  This line i

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 3s/step - accuracy: 0.6443 - loss: 1.2264 - val_accuracy: 0.3909 - val_loss: 1.2085 - learning_rate: 1.0000e-04
Epoch 2/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 714ms/step - accuracy: 0.5982 - loss: 0.8102 - val_accuracy: 0.4010 - val_loss: 1.1494 - learning_rate: 1.0000e-04
Epoch 3/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 751ms/step - accuracy: 0.6748 - loss: 0.6892 - val_accuracy: 0.5990 - val_loss: 0.8231 - learning_rate: 1.0000e-04
Epoch 4/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 611ms/step - accuracy: 0.6561 - loss: 0.6718 - val_accuracy: 0.4162 - val_loss: 0.9373 - learning_rate: 1.0000e-04
Epoch 5/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 753ms/step - accuracy: 0.7232 - loss: 0.5525 - val_accuracy: 0.6091 - val_loss: 0.7570 - learning_rate: 1.0000e-04
Epoch 6/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

### Testing accuracy

In [21]:
loaded_model = keras.saving.load_model('./models_ResNet/model_ResNet18_46_dropout_0.2_callbacks_trained.keras') # Loading best model
print("Test Accuracy is: {:.2f}%".format(loaded_model.evaluate(X_test, y_test)[1]))

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 162ms/step - accuracy: 0.7960 - loss: 0.6576
Test Accuracy is: 0.80%


#### Comments

With Dropout = 0.2, the test accuracy decreased from 81% (drop rate = 0.5) to 80% (drop rate = 0.2)

    - stick with dropout_rate = 0.5

## ResNet18: with Tensor Board + 46 Channels + with Dropout + L2 Regularization + ReduceLR + EarlyStopping

### Redefine Network with Dropout + Regularization 

In [12]:
from keras.regularizers import l2
def identity_block(x, filter, dropout_rate = 0.5, l2_lambda=0.01):
    # copy tensor to variable called x_skip
    x_skip = x
    # Layer 1
    x = tf.keras.layers.Conv2D(filter, (3,3), padding = 'same', kernel_regularizer=l2(l2_lambda))(x)
    x = tf.keras.layers.BatchNormalization(axis=3)(x)
    x = Dropout(dropout_rate)(x)
    x = tf.keras.layers.Activation('relu')(x)
    # Layer 2
    x = tf.keras.layers.Conv2D(filter, (3,3), padding = 'same', kernel_regularizer=l2(l2_lambda))(x)
    x = tf.keras.layers.BatchNormalization(axis=3)(x)
    # Add Residue
    x = tf.keras.layers.Add()([x, x_skip])     
    x = tf.keras.layers.Activation('relu')(x)
    return x

def convolutional_block(x, filter, dropout_rate=0.5, l2_lambda=0.01):
    # Copy tensor to variable called x_skip
    x_skip = x

    # Layer 1
    x = Conv2D(filter, (3, 3), padding='same', strides=(2, 2), kernel_regularizer=l2(l2_lambda))(x)
    x = BatchNormalization(axis=3)(x)
    x = Dropout(dropout_rate)(x)
    x = Activation('relu')(x)

    # Layer 2
    x = Conv2D(filter, (3, 3), padding='same', kernel_regularizer=l2(l2_lambda))(x)
    x = BatchNormalization(axis=3)(x)

    # Processing Residue with Conv(1,1)
    x_skip = Conv2D(filter, (1, 1), strides=(2, 2), kernel_regularizer=l2(l2_lambda))(x_skip)

    # Add Residue
    x = Add()([x, x_skip])
    x = Activation('relu')(x)

    return x

def ResNet(shape = (32, 32, 3), classes = 10, block_layers = [3, 4, 6, 3], dropout_rate = 0.5, l2_lambda=0.01):
    # Step 1 (Setup Input Layer)
    x_input = tf.keras.layers.Input(shape)
    x = tf.keras.layers.ZeroPadding2D((3, 3))(x_input)
    # Step 2 (Initial Conv layer along with maxPool)
    x = tf.keras.layers.Conv2D(64, kernel_size=7, strides=2, padding='same', kernel_regularizer=l2(l2_lambda))(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = Dropout(dropout_rate)(x)
    x = tf.keras.layers.Activation('relu')(x)
    x = tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)
    # Define size of sub-blocks and initial filter size
    block_layers = block_layers
    filter_size = 64
    # Step 3 Add the Resnet Blocks
    for i in range(4):
        if i == 0:
            # For sub-block 1 Residual/Convolutional block not needed
            for j in range(block_layers[i]):
                x = identity_block(x, filter_size)
        else:
            # One Residual/Convolutional Block followed by Identity blocks
            # The filter size will go on increasing by a factor of 2
            filter_size = filter_size*2
            x = convolutional_block(x, filter_size)
            for j in range(block_layers[i] - 1):
                x = identity_block(x, filter_size)
    # Step 4 End Dense Network
    x = tf.keras.layers.AveragePooling2D((2,2), padding = 'same')(x)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(512, activation = 'relu', kernel_regularizer=l2(l2_lambda))(x)
    x = Dropout(dropout_rate)(x)
    x = tf.keras.layers.Dense(classes, activation = 'softmax')(x)
    model = tf.keras.models.Model(inputs = x_input, outputs = x, name = "ResNet34")
    return model



### Callbacks

Monitor val_accuracy because of regularization (val_loss keeps decreasing with epochs)

In [31]:
# Callbacks

log_dir = os.path.join("logs_ResNet50", "fit", "model_ResNet18_46_dropout_L2_regularizer_callbacks_trained" + time.strftime("%Y%m%d-%H%M%S"))
tb_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

callbacks_list = [
    keras.callbacks.EarlyStopping(
        monitor="val_accuracy",
        patience=8,   # Optimal patience value for validation accuracy
    ),
    keras.callbacks.ModelCheckpoint(
        filepath="./models_ResNet/model_ResNet18_46_dropout_L2_regularizer_callbacks_trained.keras", # save validation loss into file
        monitor="val_accuracy",  # monitor validation accuracy
        save_best_only=True,
    ),
    ReduceLROnPlateau(
    monitor="val_accuracy",
    factor=0.5,  
    patience=3,
    verbose=1,
    mode="auto",
    min_delta=0.001,
    min_lr=1.e-6,
    ), 
    TensorBoard(log_dir=log_dir, histogram_freq=1)
]


### Train Model

In [32]:
ROWS = 224
COLS = 224
CHANNELS = 46
CLASSES = 2
block_layers = [2,2,2,2] # ResNet18
dropout_rate = 0.5 # Change HERE
l2_lambda = 0.01 # Change HERE
# Build Network Graph 
model_ResNet18 = ResNet(shape = (ROWS, COLS, CHANNELS), classes = CLASSES, block_layers = block_layers, dropout_rate = dropout_rate, l2_lambda = l2_lambda)

# Compile Model 
l_rate = 1.e-4
opt = keras.optimizers.Adam(learning_rate=l_rate)

model_ResNet18.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

# Apply TensorBoard
# define the logs folder 
log_dir = os.path.join("logs_ResNet50", "fit", "model_ResNet18_46_dropout_L2_regularizer_callbacks_trained" + time.strftime("%Y%m%d-%H%M%S"))
# Define TensorBoard Callback
tb_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Train Model 
batch = 64
epochs = 60 # Change here
start_time = time.time()

history_ResNet18 = model_ResNet18.fit(X_train, y_train,
                                      epochs = epochs, batch_size = batch, 
                                      validation_data = (X_val, y_val), callbacks = callbacks_list)

end_time = time.time()
elapsed_time = end_time - start_time
print(f"\nElapsed time: {elapsed_time} seconds")

Epoch 1/60
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 3s/step - accuracy: 0.5607 - loss: 52.9221 - val_accuracy: 0.3807 - val_loss: 51.7925 - learning_rate: 1.0000e-04
Epoch 2/60
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 777ms/step - accuracy: 0.6667 - loss: 51.3067 - val_accuracy: 0.4467 - val_loss: 50.6384 - learning_rate: 1.0000e-04
Epoch 3/60
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 730ms/step - accuracy: 0.6728 - loss: 50.1471 - val_accuracy: 0.4721 - val_loss: 49.5554 - learning_rate: 1.0000e-04
Epoch 4/60
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 758ms/step - accuracy: 0.6620 - loss: 49.0660 - val_accuracy: 0.6193 - val_loss: 48.3070 - learning_rate: 1.0000e-04
Epoch 5/60
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 759ms/step - accuracy: 0.7107 - loss: 47.8851 - val_accuracy: 0.6802 - val_loss: 47.1205 - learning_rate: 1.0000e-04
Epoch 6/60
[1m10/10[0m [32m━━━━━━━━━━━━

### Testing Accuracy

In [33]:
loaded_model = keras.saving.load_model('./models_ResNet/model_ResNet18_46_dropout_L2_regularizer_callbacks_trained.keras') # Loading best model
print("Test Accuracy is: {:.2f}%".format(loaded_model.evaluate(X_test, y_test)[1]))

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 257ms/step - accuracy: 0.7815 - loss: 30.7805
Test Accuracy is: 0.79%


### Comments

Also with L2 Regularizer, Test Accuracy is 79% (l2_lambda = 0.01), but val accuracy was now monitored with the callbacks instead the loss

IMPORTANT: Because of the regularizer, the val keeps decreasing while the val accuracy not --> monitor the val_accuracy instead for the call backs! 

    - Try different different l2_lambda
    - Result: None
    - Stick with Dropout but no Regularizer
    (Try only Regularizer and not Dropout?)

To experiment with: 

    - Data Augmentation
    - Different ResNet Complexities

Try different ResNet Architectures: 

    - ResNet10: less complex 
    - ResNet50: more complex (need to use pre-trained model for ResNet50)
    --> use the same hyperparameters

## ResNet34: with Tensor Board + 46 Channels + with Dropout + ReduceLR + EarlyStopping

### Redefine Network Architecture 

In [6]:
def identity_block(x, filter, dropout_rate = 0.5):
    # copy tensor to variable called x_skip
    x_skip = x
    # Layer 1
    x = tf.keras.layers.Conv2D(filter, (3,3), padding = 'same')(x)
    x = tf.keras.layers.BatchNormalization(axis=3)(x)
    x = Dropout(dropout_rate)(x)
    x = tf.keras.layers.Activation('relu')(x)
    # Layer 2
    x = tf.keras.layers.Conv2D(filter, (3,3), padding = 'same')(x)
    x = tf.keras.layers.BatchNormalization(axis=3)(x)
    # Add Residue
    x = tf.keras.layers.Add()([x, x_skip])     
    x = tf.keras.layers.Activation('relu')(x)
    return x

def convolutional_block(x, filter, dropout_rate = 0.5):
    # copy tensor to variable called x_skip
    x_skip = x
    # Layer 1
    x = tf.keras.layers.Conv2D(filter, (3,3), padding = 'same', strides = (2,2))(x)
    x = tf.keras.layers.BatchNormalization(axis=3)(x)
    x = Dropout(dropout_rate)(x)
    x = tf.keras.layers.Activation('relu')(x)
    # Layer 2
    x = tf.keras.layers.Conv2D(filter, (3,3), padding = 'same')(x)
    x = tf.keras.layers.BatchNormalization(axis=3)(x)
    # Processing Residue with conv(1,1)
    x_skip = tf.keras.layers.Conv2D(filter, (1,1), strides = (2,2))(x_skip)
    # Add Residue
    x = tf.keras.layers.Add()([x, x_skip])     
    x = tf.keras.layers.Activation('relu')(x)
    return x

def ResNet(shape = (32, 32, 3), classes = 10, block_layers = [3, 4, 6, 3], dropout_rate = 0.5):
    # Step 1 (Setup Input Layer)
    x_input = tf.keras.layers.Input(shape)
    x = tf.keras.layers.ZeroPadding2D((3, 3))(x_input)
    # Step 2 (Initial Conv layer along with maxPool)
    x = tf.keras.layers.Conv2D(64, kernel_size=7, strides=2, padding='same')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = Dropout(dropout_rate)(x)
    x = tf.keras.layers.Activation('relu')(x)
    x = tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)
    # Define size of sub-blocks and initial filter size
    block_layers = block_layers
    filter_size = 64
    # Step 3 Add the Resnet Blocks
    for i in range(4):
        if i == 0:
            # For sub-block 1 Residual/Convolutional block not needed
            for j in range(block_layers[i]):
                x = identity_block(x, filter_size)
        else:
            # One Residual/Convolutional Block followed by Identity blocks
            # The filter size will go on increasing by a factor of 2
            filter_size = filter_size*2
            x = convolutional_block(x, filter_size)
            for j in range(block_layers[i] - 1):
                x = identity_block(x, filter_size)
    # Step 4 End Dense Network
    x = tf.keras.layers.AveragePooling2D((2,2), padding = 'same')(x)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(512, activation = 'relu')(x)
    x = Dropout(dropout_rate)(x)
    x = tf.keras.layers.Dense(classes, activation = 'softmax')(x)
    model = tf.keras.models.Model(inputs = x_input, outputs = x, name = "ResNet34")
    return model

### Callbacks

In [6]:
log_dir = os.path.join("logs_ResNet50", "fit", "model_ResNet34_46_dropout_callbacks_trained" + time.strftime("%Y%m%d-%H%M%S"))
tb_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

callbacks_list = [
    keras.callbacks.EarlyStopping(
        monitor="val_loss",
        patience=8,   # Optimal patience value for validation accuracy
    ),
    keras.callbacks.ModelCheckpoint(
        filepath="./models_ResNet/model_ResNet34_46_dropout_callbacks_trained.keras", # save validation loss into file
        monitor="val_loss",  # monitor validation accuracy
        save_best_only=True,
    ),
    ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.5,  
    patience=3,
    verbose=1,
    mode="auto",
    min_delta=0.001,
    min_lr=1.e-6,
    ), 
    TensorBoard(log_dir=log_dir, histogram_freq=1)
]

### Train ResNet34 model

In [7]:
ROWS = 224
COLS = 224
CHANNELS = 46
CLASSES = 2
block_layers = [3,4,6,3] # ResNet34
dropout_rate = 0.5 # Change HERE
# Build Network Graph 
model_ResNet34 = ResNet(shape = (ROWS, COLS, CHANNELS), classes = CLASSES, block_layers = block_layers, dropout_rate = dropout_rate)

# Compile Model 
l_rate = 1.e-4
opt = keras.optimizers.Adam(learning_rate=l_rate)

model_ResNet34.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

# Apply TensorBoard
# define the logs folder 
log_dir = os.path.join("logs_ResNet50", "fit", "model_ResNet34_46_dropout_callbacks_trained" + time.strftime("%Y%m%d-%H%M%S"))
# Define TensorBoard Callback
tb_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Train Model 
batch = 64
epochs = 60 # Change here
start_time = time.time()

history_ResNet18 = model_ResNet34.fit(X_train, y_train,
                                      epochs = epochs, batch_size = batch, 
                                      validation_data = (X_val, y_val), callbacks = callbacks_list)

end_time = time.time()
elapsed_time = end_time - start_time
print(f"\nElapsed time: {elapsed_time} seconds")

2024-12-16 18:44:42.649948: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 5447106560 exceeds 10% of free system memory.
2024-12-16 18:44:45.203408: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 5447106560 exceeds 10% of free system memory.


Epoch 1/60


I0000 00:00:1734374703.590072  702277 service.cc:146] XLA service 0x7fa0d80039c0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1734374703.590110  702277 service.cc:154]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
2024-12-16 18:45:04.119608: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-12-16 18:45:06.159870: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 90300
2024-12-16 18:45:12.934375: W external/local_tsl/tsl/framework/bfc_allocator.cc:291] Allocator (GPU_0_bfc) ran out of memory trying to allocate 7.12GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
I0000 00:00:1734374742.193118  702277 device_compiler.h:188] Compiled cluster using XLA!  This line i

KeyboardInterrupt: 

### Training Accuracy

In [40]:
loaded_model = keras.saving.load_model('./models_ResNet/model_ResNet34_46_dropout_callbacks_trained.keras') # Loading best model
print("Test Accuracy is: {:.2f}%".format(loaded_model.evaluate(X_test, y_test)[1]))

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 211ms/step - accuracy: 0.7973 - loss: 0.5676
Test Accuracy is: 0.81%


### Comments 

With ResNet34, we get almost the same test accuracy (81%) as with ResNet18

Let's try ResNet10!

## ResNet10: with Tensor Board + 46 Channels + with Dropout + ReduceLR + EarlyStopping

### Callbacks

In [7]:
log_dir = os.path.join("logs_ResNet50", "fit", "model_ResNet10_46_dropout_callbacks_trained" + time.strftime("%Y%m%d-%H%M%S"))
tb_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

callbacks_list = [
    keras.callbacks.EarlyStopping(
        monitor="val_loss",
        patience=8,   # Optimal patience value for validation accuracy
    ),
    keras.callbacks.ModelCheckpoint(
        filepath="./models_ResNet/model_ResNet10_46_dropout_callbacks_trained.keras", # save validation loss into file
        monitor="val_loss",  # monitor validation accuracy
        save_best_only=True,
    ),
    ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.5,  
    patience=3,
    verbose=1,
    mode="auto",
    min_delta=0.001,
    min_lr=1.e-6,
    ), 
    TensorBoard(log_dir=log_dir, histogram_freq=1)
]

### Train ResNet10 model

In [8]:
ROWS = 224
COLS = 224
CHANNELS = 46
CLASSES = 2
block_layers = [1,1,1,1] # ResNet10
dropout_rate = 0.5 # Change HERE
# Build Network Graph 
model_ResNet10 = ResNet(shape = (ROWS, COLS, CHANNELS), classes = CLASSES, block_layers = block_layers, dropout_rate = dropout_rate)

# Compile Model 
l_rate = 1.e-4
opt = keras.optimizers.Adam(learning_rate=l_rate)

model_ResNet10.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

# Apply TensorBoard
# define the logs folder 
log_dir = os.path.join("logs_ResNet50", "fit", "model_ResNet10_46_dropout_callbacks_trained" + time.strftime("%Y%m%d-%H%M%S"))
# Define TensorBoard Callback
tb_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Train Model 
batch = 64
epochs = 60 # Change here
start_time = time.time()

history_ResNet18 = model_ResNet10.fit(X_train, y_train,
                                      epochs = epochs, batch_size = batch, 
                                      validation_data = (X_val, y_val), callbacks = callbacks_list)

end_time = time.time()
elapsed_time = end_time - start_time
print(f"\nElapsed time: {elapsed_time} seconds")

2024-12-16 19:59:04.694482: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 5447106560 exceeds 10% of free system memory.
2024-12-16 19:59:07.257942: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 5447106560 exceeds 10% of free system memory.


Epoch 1/60


I0000 00:00:1734379155.637665  703976 service.cc:146] XLA service 0x7fd8a00284e0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1734379155.637712  703976 service.cc:154]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
2024-12-16 19:59:15.886001: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-12-16 19:59:16.491009: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 90300
2024-12-16 19:59:19.403535: W external/local_tsl/tsl/framework/bfc_allocator.cc:291] Allocator (GPU_0_bfc) ran out of memory trying to allocate 7.12GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
I0000 00:00:1734379175.725696  703976 device_compiler.h:188] Compiled cluster using XLA!  This line i

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 2s/step - accuracy: 0.6158 - loss: 0.9499 - val_accuracy: 0.6193 - val_loss: 0.7238 - learning_rate: 1.0000e-04
Epoch 2/60
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 554ms/step - accuracy: 0.6206 - loss: 0.8545 - val_accuracy: 0.6751 - val_loss: 0.6868 - learning_rate: 1.0000e-04
Epoch 3/60
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 549ms/step - accuracy: 0.6894 - loss: 0.6206 - val_accuracy: 0.6701 - val_loss: 0.6339 - learning_rate: 1.0000e-04
Epoch 4/60
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 557ms/step - accuracy: 0.7970 - loss: 0.4751 - val_accuracy: 0.7259 - val_loss: 0.5927 - learning_rate: 1.0000e-04
Epoch 5/60
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 552ms/step - accuracy: 0.8484 - loss: 0.4014 - val_accuracy: 0.7665 - val_loss: 0.5428 - learning_rate: 1.0000e-04
Epoch 6/60
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

### Training Accuracy ResNet10

In [9]:
loaded_model = keras.saving.load_model('./models_ResNet/model_ResNet10_46_dropout_callbacks_trained.keras') # Loading best model
print("Test Accuracy is: {:.2f}%".format(loaded_model.evaluate(X_test, y_test)[1]))

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 124ms/step - accuracy: 0.7730 - loss: 0.5733
Test Accuracy is: 0.79%


### Comments

ResNet10 provides 79% accuracy on the test set, while ResNet18 with dropout provides 81% (best model) --> keep ResNet18

## ResNet18: with Tensor Board + 46 Channels + with Dropout + ReduceLR + EarlyStopping + Data Augmentation

### Redefine Network with Dropout and Data Augmentation

In [7]:
def identity_block(x, filter, dropout_rate = 0.5):
    # copy tensor to variable called x_skip
    x_skip = x
    # Layer 1
    x = tf.keras.layers.Conv2D(filter, (3,3), padding = 'same')(x)
    x = tf.keras.layers.BatchNormalization(axis=3)(x)
    x = Dropout(dropout_rate)(x)
    x = tf.keras.layers.Activation('relu')(x)
    # Layer 2
    x = tf.keras.layers.Conv2D(filter, (3,3), padding = 'same')(x)
    x = tf.keras.layers.BatchNormalization(axis=3)(x)
    # Add Residue
    x = tf.keras.layers.Add()([x, x_skip])     
    x = tf.keras.layers.Activation('relu')(x)
    return x

def convolutional_block(x, filter, dropout_rate = 0.5):
    # copy tensor to variable called x_skip
    x_skip = x
    # Layer 1
    x = tf.keras.layers.Conv2D(filter, (3,3), padding = 'same', strides = (2,2))(x)
    x = tf.keras.layers.BatchNormalization(axis=3)(x)
    x = Dropout(dropout_rate)(x)
    x = tf.keras.layers.Activation('relu')(x)
    # Layer 2
    x = tf.keras.layers.Conv2D(filter, (3,3), padding = 'same')(x)
    x = tf.keras.layers.BatchNormalization(axis=3)(x)
    # Processing Residue with conv(1,1)
    x_skip = tf.keras.layers.Conv2D(filter, (1,1), strides = (2,2))(x_skip)
    # Add Residue
    x = tf.keras.layers.Add()([x, x_skip])     
    x = tf.keras.layers.Activation('relu')(x)
    return x

def ResNet(shape = (32, 32, 3), classes = 10, block_layers = [3, 4, 6, 3], dropout_rate = 0.5):
    # Step 1 (Setup Input Layer)
    x_input = tf.keras.layers.Input(shape)
    # Data Augmentation
    x = RandomFlip(mode = "horizontal_and_vertical", seed = None)(x_input)
    x = RandomRotation(factor = 0.2, interpolation = "nearest")(x)
    # Proceed with Model 
    x = tf.keras.layers.ZeroPadding2D((3, 3))(x)
    # Step 2 (Initial Conv layer along with maxPool)
    x = tf.keras.layers.Conv2D(64, kernel_size=7, strides=2, padding='same')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = Dropout(dropout_rate)(x)
    x = tf.keras.layers.Activation('relu')(x)
    x = tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)
    # Define size of sub-blocks and initial filter size
    block_layers = block_layers
    filter_size = 64
    # Step 3 Add the Resnet Blocks
    for i in range(4):
        if i == 0:
            # For sub-block 1 Residual/Convolutional block not needed
            for j in range(block_layers[i]):
                x = identity_block(x, filter_size)
        else:
            # One Residual/Convolutional Block followed by Identity blocks
            # The filter size will go on increasing by a factor of 2
            filter_size = filter_size*2
            x = convolutional_block(x, filter_size)
            for j in range(block_layers[i] - 1):
                x = identity_block(x, filter_size)
    # Step 4 End Dense Network
    x = tf.keras.layers.AveragePooling2D((2,2), padding = 'same')(x)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(512, activation = 'relu')(x)
    x = Dropout(dropout_rate)(x)
    x = tf.keras.layers.Dense(classes, activation = 'softmax')(x)
    model = tf.keras.models.Model(inputs = x_input, outputs = x, name = "ResNet34")
    return model

### Callback

In [8]:
log_dir = os.path.join("logs_ResNet50", "fit", "model_ResNet18_46_dropout_callbacks_augmentation_trained" + time.strftime("%Y%m%d-%H%M%S"))
tb_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

callbacks_list = [
    keras.callbacks.EarlyStopping(
        monitor="val_loss",
        patience=8,   # Optimal patience value for validation accuracy
    ),
    keras.callbacks.ModelCheckpoint(
        filepath="./models_ResNet/model_ResNet18_46_dropout_callbacks_augmentation_trained.keras", # save validation loss into file
        monitor="val_loss",  # monitor validation accuracy
        save_best_only=True,
    ),
    ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.5,  
    patience=3,
    verbose=1,
    mode="auto",
    min_delta=0.001,
    min_lr=1.e-6,
    ), 
    TensorBoard(log_dir=log_dir, histogram_freq=1)
]

### Training ResNet18 model with augmentation

In [9]:
ROWS = 224
COLS = 224
CHANNELS = 46
CLASSES = 2
block_layers = [2,2,2,2] # ResNet18
dropout_rate = 0.5
# Build Network Graph 
model_ResNet18 = ResNet(shape = (ROWS, COLS, CHANNELS), classes = CLASSES, block_layers = block_layers, dropout_rate = dropout_rate)

# Compile Model 
l_rate = 1.e-4
opt = keras.optimizers.Adam(learning_rate=l_rate)

model_ResNet18.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

# Apply TensorBoard
# define the logs folder 
log_dir = os.path.join("logs_ResNet50", "fit", "model_ResNet18_46_dropout_callbacks_augmentation_trained" + time.strftime("%Y%m%d-%H%M%S"))
# Define TensorBoard Callback
tb_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Train Model 
batch = 64
epochs = 30
start_time = time.time()

history_ResNet18 = model_ResNet18.fit(X_train, y_train,
                                      epochs = epochs, batch_size = batch, 
                                      validation_data = (X_val, y_val), callbacks = callbacks_list)

end_time = time.time()
elapsed_time = end_time - start_time
print(f"\nElapsed time: {elapsed_time} seconds")

2024-12-16 21:58:35.584670: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 5447106560 exceeds 10% of free system memory.
2024-12-16 21:58:38.205931: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 5447106560 exceeds 10% of free system memory.


Epoch 1/30


2024-12-16 21:58:48.899855: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:966] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inStatefulPartitionedCall/ResNet34_1/dropout_1/stateless_dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer
W0000 00:00:1734386331.217668  707518 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386331.330188  707518 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386331.418583  707518 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386331.618094  707518 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386331.679775  707518 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386331.747881  707518 gpu_timer.cc:114] Skipping the delay kernel, measurement acc

[1m 9/10[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 559ms/step - accuracy: 0.5530 - loss: 1.5265

W0000 00:00:1734386340.834708  707512 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386340.848433  707512 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386340.858831  707512 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386340.904119  707512 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386340.917456  707512 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386340.930634  707512 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386340.944664  707512 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386340.989088  707512 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386341.024591  707512 gp

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 645ms/step - accuracy: 0.5511 - loss: 1.5232

W0000 00:00:1734386346.020382  707512 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386346.024902  707512 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386346.030038  707512 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386346.038371  707512 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386346.053880  707512 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386346.069498  707512 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386346.073740  707512 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386346.082184  707512 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386346.084559  707512 gp

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 1s/step - accuracy: 0.5495 - loss: 1.5204 - val_accuracy: 0.4010 - val_loss: 0.9745 - learning_rate: 1.0000e-04
Epoch 2/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 793ms/step - accuracy: 0.6330 - loss: 0.8855 - val_accuracy: 0.3858 - val_loss: 1.0615 - learning_rate: 1.0000e-04
Epoch 3/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 950ms/step - accuracy: 0.6722 - loss: 0.6857 - val_accuracy: 0.6244 - val_loss: 0.7681 - learning_rate: 1.0000e-04
Epoch 4/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 808ms/step - accuracy: 0.6920 - loss: 0.6313 - val_accuracy: 0.5939 - val_loss: 0.7929 - learning_rate: 1.0000e-04
Epoch 5/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 966ms/step - accuracy: 0.7005 - loss: 0.6094 - val_accuracy: 0.6497 - val_loss: 0.7335 - learning_rate: 1.0000e-04
Epoch 6/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[

### Testing Accuracy

In [10]:
loaded_model = keras.saving.load_model('./models_ResNet/model_ResNet18_46_dropout_callbacks_augmentation_trained.keras') # Loading best model
print("Test Accuracy is: {:.2f}%".format(loaded_model.evaluate(X_test, y_test)[1]))

W0000 00:00:1734386590.772684  707510 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386590.825863  707510 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386590.870674  707510 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386590.999930  707510 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386591.038402  707510 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386591.076850  707510 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386591.099678  707510 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386591.188648  707510 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386591.263135  707510 gp

[1m1/7[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 2s/step - accuracy: 0.7812 - loss: 0.6515

W0000 00:00:1734386591.466036  707510 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386591.468763  707510 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386591.473688  707510 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386591.474636  707510 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386591.475575  707510 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386591.476443  707510 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386591.477433  707510 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386591.478351  707510 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1734386591.479276  707510 gp

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 98ms/step - accuracy: 0.8137 - loss: 0.5284 
Test Accuracy is: 0.81%


### Comments

After Data Augmentation (RandomFlip and RandomRotation, the Testing accuracy is 81%, almost the same as before. For simplicity, we will leave out Data Augmentation

## ResNet18: with Tensor Board + 46 Channels + with Dropout + ReduceLR + EarlyStopping + In Layer Rescaling 

For the sake of clarity, we want to normalize the data with "x = layers.Rescaling(1./255)(x)" in layer instead of pre-processing the data and standardizing with mean = 0 & std = 1.

In [4]:
# Create data set: without normalization
X_train, y_train = load_dataset(train_df, image_folder, normalize = False)
X_val, y_val = load_dataset(val_df, image_folder, normalize = False)
X_test, y_test = load_dataset(test_df, image_folder, normalize = False)

# # One-hot Encoding
# y_train= to_categorical(y_train)
# y_val= to_categorical(y_val)
# y_test = to_categorical(y_test)

### Redefine network architecture with in layer rescaling

In [5]:
def identity_block(x, filter, dropout_rate = 0.5):
    # copy tensor to variable called x_skip
    x_skip = x
    # Layer 1
    x = tf.keras.layers.Conv2D(filter, (3,3), padding = 'same')(x)
    x = tf.keras.layers.BatchNormalization(axis=3)(x)
    x = Dropout(dropout_rate)(x)
    x = tf.keras.layers.Activation('relu')(x)
    # Layer 2
    x = tf.keras.layers.Conv2D(filter, (3,3), padding = 'same')(x)
    x = tf.keras.layers.BatchNormalization(axis=3)(x)
    # Add Residue
    x = tf.keras.layers.Add()([x, x_skip])     
    x = tf.keras.layers.Activation('relu')(x)
    return x

def convolutional_block(x, filter, dropout_rate = 0.5):
    # copy tensor to variable called x_skip
    x_skip = x
    # Layer 1
    x = tf.keras.layers.Conv2D(filter, (3,3), padding = 'same', strides = (2,2))(x)
    x = tf.keras.layers.BatchNormalization(axis=3)(x)
    x = Dropout(dropout_rate)(x)
    x = tf.keras.layers.Activation('relu')(x)
    # Layer 2
    x = tf.keras.layers.Conv2D(filter, (3,3), padding = 'same')(x)
    x = tf.keras.layers.BatchNormalization(axis=3)(x)
    # Processing Residue with conv(1,1)
    x_skip = tf.keras.layers.Conv2D(filter, (1,1), strides = (2,2))(x_skip)
    # Add Residue
    x = tf.keras.layers.Add()([x, x_skip])     
    x = tf.keras.layers.Activation('relu')(x)
    return x

def ResNet(shape = (32, 32, 3), classes = 10, block_layers = [3, 4, 6, 3], dropout_rate = 0.5):
    # Step 1 (Setup Input Layer)
    x_input = tf.keras.layers.Input(shape)
    # Rescaling Layer (in-layer)
    x = layers.Rescaling(1./255)(x_input)
    x = tf.keras.layers.ZeroPadding2D((3, 3))(x)
    # Step 2 (Initial Conv layer along with maxPool)
    x = tf.keras.layers.Conv2D(64, kernel_size=7, strides=2, padding='same')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = Dropout(dropout_rate)(x)
    x = tf.keras.layers.Activation('relu')(x)
    x = tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)
    # Define size of sub-blocks and initial filter size
    block_layers = block_layers
    filter_size = 64
    # Step 3 Add the Resnet Blocks
    for i in range(4):
        if i == 0:
            # For sub-block 1 Residual/Convolutional block not needed
            for j in range(block_layers[i]):
                x = identity_block(x, filter_size)
        else:
            # One Residual/Convolutional Block followed by Identity blocks
            # The filter size will go on increasing by a factor of 2
            filter_size = filter_size*2
            x = convolutional_block(x, filter_size)
            for j in range(block_layers[i] - 1):
                x = identity_block(x, filter_size)
    # Step 4 End Dense Network
    x = tf.keras.layers.AveragePooling2D((2,2), padding = 'same')(x)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(512, activation = 'relu')(x)
    x = Dropout(dropout_rate)(x)
    x = tf.keras.layers.Dense(1, activation = 'sigmoid')(x)
    model = tf.keras.models.Model(inputs = x_input, outputs = x, name = "ResNet34")
    return model

### Callbacks

In [6]:
log_dir = os.path.join("logs_ResNet50", "fit", "model_ResNet18_46_dropout_callbacks_layer_rescaling_trained" + time.strftime("%Y%m%d-%H%M%S"))
tb_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

callbacks_list = [
    keras.callbacks.EarlyStopping(
        monitor="val_loss",
        patience=8,   # Optimal patience value for validation accuracy
    ),
    keras.callbacks.ModelCheckpoint(
        filepath="./models_ResNet/model_ResNet18_46_dropout_callbacks_layer_rescaling_trained.keras", # save validation loss into file
        monitor="val_loss",  # monitor validation accuracy
        save_best_only=True,
    ),
    ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.5,  
    patience=3,
    verbose=1,
    mode="auto",
    min_delta=0.001,
    min_lr=1.e-6,
    ), 
    TensorBoard(log_dir=log_dir, histogram_freq=1)
]

### Training Model

In [7]:
ROWS = 224
COLS = 224
CHANNELS = 46
CLASSES = 2
block_layers = [2,2,2,2] # ResNet18
dropout_rate = 0.5
# Build Network Graph 
model_ResNet18 = ResNet(shape = (ROWS, COLS, CHANNELS), classes = CLASSES, block_layers = block_layers, dropout_rate = dropout_rate)

# Compile Model
l_rate = 1.e-4
opt = keras.optimizers.Adam(learning_rate=l_rate)
model_ResNet18.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
log_dir = os.path.join("logs_ResNet50", "fit", "model_ResNet18_46_dropout_callbacks_layer_rescaling_trained" + time.strftime("%Y%m%d-%H%M%S"))
tb_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Train Model 
batch = 64
epochs = 30
start_time = time.time()

history_ResNet18 = model_ResNet18.fit(X_train, y_train,
                                      epochs = epochs, batch_size = batch, 
                                      validation_data = (X_val, y_val), callbacks = callbacks_list)

end_time = time.time()
elapsed_time = end_time - start_time
print(f"\nElapsed time: {elapsed_time} seconds")

2024-12-17 20:53:05.601589: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 5447106560 exceeds 10% of free system memory.
2024-12-17 20:53:08.207689: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 5447106560 exceeds 10% of free system memory.


Epoch 1/30


I0000 00:00:1734468800.011273  735905 service.cc:146] XLA service 0x7f51cc04a950 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1734468800.011327  735905 service.cc:154]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
2024-12-17 20:53:20.326613: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-12-17 20:53:21.392947: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 90300
2024-12-17 20:53:24.945075: W external/local_tsl/tsl/framework/bfc_allocator.cc:291] Allocator (GPU_0_bfc) ran out of memory trying to allocate 7.12GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
I0000 00:00:1734468824.923963  735905 device_compiler.h:188] Compiled cluster using XLA!  This line i

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 3s/step - accuracy: 0.5580 - loss: 1.8411 - val_accuracy: 0.6396 - val_loss: 0.6870 - learning_rate: 1.0000e-04
Epoch 2/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 747ms/step - accuracy: 0.5664 - loss: 1.0095 - val_accuracy: 0.6396 - val_loss: 0.6814 - learning_rate: 1.0000e-04
Epoch 3/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 734ms/step - accuracy: 0.5392 - loss: 0.8558 - val_accuracy: 0.6396 - val_loss: 0.6767 - learning_rate: 1.0000e-04
Epoch 4/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 745ms/step - accuracy: 0.6375 - loss: 0.8148 - val_accuracy: 0.6396 - val_loss: 0.6667 - learning_rate: 1.0000e-04
Epoch 5/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 759ms/step - accuracy: 0.6065 - loss: 0.7273 - val_accuracy: 0.6396 - val_loss: 0.6622 - learning_rate: 1.0000e-04
Epoch 6/30
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

### Testing Accuracy

In [8]:
# To load the model from file uncomment and run the following 
loaded_model = keras.saving.load_model('./models_ResNet/model_ResNet18_46_dropout_callbacks_layer_rescaling_trained.keras')
print("Test Accuracy is: {:.2f}%".format(loaded_model.evaluate(X_test, y_test)[1]))

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 148ms/step - accuracy: 0.6209 - loss: 0.6635
Test Accuracy is: 0.63%


### F1 Score

In [11]:
from sklearn.metrics import f1_score

y_pred_test = loaded_model.predict(X_test)
y_pred_test = (y_pred_test < 0.5).astype(int)
f1 = f1_score(y_test, y_pred_test)

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 70ms/step


In [12]:
print(f1)

0.5353159851301115


### Comments 

Very interesting: Without normalizing the images to mean = 0 and std = 1, but rescaling with layers.Rescaling (with range [0,1]), the mode fails to extract meaningfull information and only predicts one class

#### Future improvements to make: 

    - Vary the neurons of the last Dense layer before classification: 512 --> (128, 256, 1024, 2048 etc)
    - Cross-Validation
    - Use Balanced Dataset maybe?
    - Use pre-trained ResNet and train only some unfrozen layers
    - Channel selection


