### ResNet50 Trained

### Initialize Notebook & packages

In [1]:
import helper as hp 
hp.initialize_notebook() # initialize with GPU enabled  
# hp.initialize_notebook(False) # to disable GPU 

2024-12-14 12:38:24.060541: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-12-14 12:38:24.075118: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-12-14 12:38:24.079609: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-14 12:38:24.090860: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


GPU enabled. Checking for available GPUs...


I0000 00:00:1734179905.993344  636814 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1734179906.036026  636814 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1734179906.039621  636814 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1734179906.045308  636814 cuda_executor.cc:1015] successful NUMA node read from SysFS ha

1 Physical GPUs, 1 Logical GPUs

Verifying TensorFlow and PyTorch CUDA setup...
TensorFlow version: 2.17.0
Built with CUDA: True
Num GPUs Available: 1

Keras version: 3.6.0

End checks and initialization.


I0000 00:00:1734179906.048751  636814 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1734179906.052000  636814 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1734179906.240596  636814 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1734179906.242359  636814 cuda_executor.cc:1015] successful NUMA node read from SysFS ha

In [11]:
# Import standard libraries
import os
import sys
import gc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import time
import keras
# Import DL libraries
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras import layers, models, Model, Input
from keras.utils import to_categorical
from keras.optimizers import Adam
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from keras.callbacks import TensorBoard 

# Suppress tensorflow warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# Import ML libraries
from sklearn.metrics import f1_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.utils.class_weight import compute_class_weight

# Import image libraries
from skimage import transform
import tifffile as tiff

# Appends current working dir
current_path = os.getcwd()
sys.path.append(current_path)

# Import custom preprocessing class
from imc_preprocessing import IMCPreprocessor

# Import Stratified Split
from sklearn.model_selection import StratifiedShuffleSplit

#### Methods

In [12]:
# Preprocessing (if needed)
def preprocessing(image, transpose=True, normalize=True) -> np.ndarray:
    if transpose:
        return np.transpose(image, (1, 2, 0))
    if normalize:
        return IMCPreprocessor.normalize_multichannel_image(image)

# Load images
def load_image(image_path) -> np.ndarray:
    image = tiff.imread(image_path)
    if image is None:
        raise ValueError(f"Failed to load image: {image_path}")
    return image


# Define a function to create a list of images from files within a folder 
def image_list(image_dir):
    # List all files in the directory
    image_files = [f for f in os.listdir(image_dir) if os.path.isfile(os.path.join(image_dir, f))]  
    # Initialize a list to store the images
    images = []
    # Loop through each file and read the image
    for image_file in image_files:
        image_path = os.path.join(image_dir, image_file)
        image = load_image(image_path)
        images.append(image)        
    return images 

# Converting to one hot
def convert_to_one_hot(y, classes):
    return np.eye(classes)[y]

### Data 

#### Preprocessing and Other

In [13]:
images_dir = '/home/jupyter-luvogt/Final_Project_LR/IMC_images' 
metadata_dir = '/home/jupyter-luvogt/Final_Project_LR/metadata.csv' 
panel_dir = '/home/jupyter-luvogt/Final_Project_LR/panel.csv' 
os.listdir(images_dir)[:5] # Get first five images

# Load images
images = image_list(images_dir)
images = np.array(images)

# load labels
metadata = pd.read_csv(metadata_dir)
PDL1_score = metadata["PDL1_score"]

# Shape PDL1
PDL1_score = PDL1_score.tolist()
PDL1_score = np.array(PDL1_score)

# Transpose and Normalize images
images_preproc = [preprocessing(i, transpose = True, normalize = False) for i in images]
images_preproc = [preprocessing(i, transpose = False, normalize = True) for i in images_preproc]
images_preproc = np.array(images_preproc)

# Extract channel information
panel_df = pd.read_csv(panel_dir)
channel_names = dict(zip(panel_df['clean_target'].to_list(), panel_df['channel'].to_list()))

# Filter out Xe131, Xe134 and Ba138 = Noise channels (OPTIONAL) 
channel_names_new = [x for x in list(channel_names.values()) if x not in ["Xe131", "Xe134", "Ba138"]]
images_preproc_drop = [IMCPreprocessor.drop_channels(i, channel_names_new, list(channel_names.values()))[0] for i in images_preproc]
images_preproc_drop = np.array(images_preproc_drop)

### ResNet50 Model: Trained directly here

#### ResNet50 Model: 3 Channels & Resizing with Anti Aliasing to 64x64

Approach: Select biological relevant channels that correspond or are associated with PDL1 

In [14]:
# Choose 3 biological relevant channels
channel_names_new = ["Gd160", "Eu153", "Gd155"]
images_preproc_drop_3 = [IMCPreprocessor.drop_channels(i, channel_names_new, list(channel_names.values()))[0] for i in images_preproc]
images_preproc_drop_3 = np.array(images_preproc_drop_3)

channels_preproc_drop_3 = channel_names_new

Create unbalanced (but with stratified) training, validation and test set

In [15]:
random_seed = 56
X = images_preproc_drop_3
y = PDL1_score
train_size = 0.6
val_size = 0.2
test_size = 0.2


# Create a StratifiedShuffleSplit for train/test split
sss_train_test = StratifiedShuffleSplit(n_splits=1, test_size=(val_size + test_size), random_state=random_seed)

# First split: Train and remaining (validation + test)
for train_index, remaining_index in sss_train_test.split(X, y):
    X_train, X_remaining = X[train_index], X[remaining_index]
    y_train, y_remaining = y[train_index], y[remaining_index]

# Create a StratifiedShuffleSplit for validation/test split on remaining data
sss_val_test = StratifiedShuffleSplit(n_splits=1, test_size=test_size / (val_size + test_size), random_state=random_seed)

# Second split: Validation and Test
for val_index, test_index in sss_val_test.split(X_remaining, y_remaining):
    X_val, X_test = X_remaining[val_index], X_remaining[test_index]
    y_val, y_test = y_remaining[val_index], y_remaining[test_index]


Downsampling

In [7]:
# Downsampling
from skimage import io, transform
X_train_64 = [transform.resize(i, (64, 64), anti_aliasing=True) for i in X_train]
X_train_64 = np.array(X_train_64)
X_test_64 = [transform.resize(i, (64, 64), anti_aliasing=True) for i in X_test]
X_test_64 = np.array(X_test_64)
X_val_64 = [transform.resize(i, (64, 64), anti_aliasing=True) for i in X_val]
X_val_64 = np.array(X_val_64)

Define CONSTANTS and do One Hot Encoding

In [16]:
from keras import layers
from keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D
from keras.models import Model, load_model
from keras.initializers import glorot_uniform
from keras.utils import plot_model
# from keras.utils.vis_utils import model_to_dot
import keras.backend as K
import tensorflow as tf
ROWS = 64
COLS = 64
CHANNELS = 3
CLASSES = 2

y_train_one_hot = convert_to_one_hot(y_train, CLASSES)
y_test_one_hot = convert_to_one_hot(y_test, CLASSES)
y_val_one_hot = convert_to_one_hot(y_val, CLASSES)

Define Blocks and ResNet Model

In [34]:
from keras.regularizers import l2
def identity_block(X, f, filters, stage, block):
    # defining name basis
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'
    # Retrieve Filters
    F1, F2, F3 = filters
    # Save the input value. We'll need this later to add back to the main path. 
    X_shortcut = X
    # First component of main path
    X = Conv2D(filters = F1, kernel_size = (1, 1), strides = (1,1), padding = 'valid', name = conv_name_base + '2a', kernel_initializer = glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis = 3, name = bn_name_base + '2a')(X)
    X = Activation('relu')(X)
    # Second component of main path
    X = Conv2D(filters = F2, kernel_size = (f, f), strides = (1,1), padding = 'same', name = conv_name_base + '2b', kernel_initializer = glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis = 3, name = bn_name_base + '2b')(X)
    X = Activation('relu')(X)
    # Third component of main path
    X = Conv2D(filters = F3, kernel_size = (1, 1), strides = (1,1), padding = 'valid', name = conv_name_base + '2c', kernel_initializer = glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis = 3, name = bn_name_base + '2c')(X)
    # Final step: Add shortcut value to main path, and pass it through a RELU activation
    X = Add()([X, X_shortcut])
    X = Activation('relu')(X)
    return X
def convolutional_block(X, f, filters, stage, block, s = 2):
    # defining name basis
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'
    # Retrieve Filters
    F1, F2, F3 = filters
    # Save the input value
    X_shortcut = X
    ##### MAIN PATH #####
    # First component of main path 
    X = Conv2D(F1, (1, 1), strides = (s,s), name = conv_name_base + '2a', kernel_initializer = glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis = 3, name = bn_name_base + '2a')(X)
    X = Activation('relu')(X)
    # Second component of main path
    X = Conv2D(filters=F2, kernel_size=(f, f), strides=(1, 1), padding='same', name=conv_name_base + '2b', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name=bn_name_base + '2b')(X)
    X = Activation('relu')(X)
    # Third component of main path
    X = Conv2D(filters=F3, kernel_size=(1, 1), strides=(1, 1), padding='valid', name=conv_name_base + '2c', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name=bn_name_base + '2c')(X)
    ##### SHORTCUT PATH ####
    X_shortcut = Conv2D(F3, (1, 1), strides = (s,s), name = conv_name_base + '1', kernel_initializer = glorot_uniform(seed=0))(X_shortcut)
    X_shortcut = BatchNormalization(axis = 3, name = bn_name_base + '1')(X_shortcut)
    # Final step: Add shortcut value to main path, and pass it through a RELU activation
    X = Add()([X, X_shortcut])
    X = Activation('relu')(X)
    return X
    
def ResNet50(input_shape = (64, 64, 3), classes = 2):   
    # Define the input as a tensor with shape input_shape
    X_input = Input(input_shape)
    # Zero-Padding
    X = ZeroPadding2D((3, 3))(X_input)
    # Stage 1
    X = Conv2D(64, (7, 7), strides = (2, 2), name = 'conv1', kernel_initializer = glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis = 3, name = 'bn_conv1')(X)
    X = Activation('relu')(X)
    X = MaxPooling2D((3, 3), strides=(2, 2))(X)
    # Stage 2
    X = convolutional_block(X, f = 3, filters = [64, 64, 256], stage = 2, block='a', s = 1)
    X = identity_block(X, 3, [64, 64, 256], stage=2, block='b')
    X = identity_block(X, 3, [64, 64, 256], stage=2, block='c')
    # Stage 3
    X = convolutional_block(X, f = 3, filters = [128, 128, 512], stage = 3, block='a', s = 2)
    X = identity_block(X, 3, [128, 128, 512], stage=3, block='b')
    X = identity_block(X, 3, [128, 128, 512], stage=3, block='c')
    X = identity_block(X, 3, [128, 128, 512], stage=3, block='d')
    # Stage 4
    X = convolutional_block(X, f = 3, filters = [256, 256, 1024], stage = 4, block='a', s = 2)
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='b')
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='c')
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='d')
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='e')
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='f')
    # Stage 5
    X = convolutional_block(X, f = 3, filters = [512, 512, 2048], stage = 5, block='a', s = 2)
    X = identity_block(X, 3, [512, 512, 2048], stage=5, block='b')
    X = identity_block(X, 3, [512, 512, 2048], stage=5, block='c')
    # AVGPOOL.
    X = AveragePooling2D((2, 2), name='avg_pool')(X)
    # Output layer
    X = Flatten()(X)
    X = Dense(classes, activation='softmax', name='fc' + str(classes), kernel_initializer = glorot_uniform(seed=0))(X)
    # Create model
    model = Model(inputs = X_input, outputs = X, name='ResNet50')
    return model

Build Model

In [None]:
# Build Network Graph 
model_ResNet50 = ResNet50(input_shape = (ROWS, COLS, CHANNELS), classes = CLASSES)

# Compile Model 
l_rate = 1.e-4
opt = keras.optimizers.Adam(learning_rate=l_rate)

model_ResNet50.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

# Apply TensorBoard
# define the logs folder 
log_dir = os.path.join("logs_ResNet50", "fit", "model_ResNet50_3Channels" + time.strftime("%Y%m%d-%H%M%S"))
# Define TensorBoard Callback
tb_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Train Model 
batch = 64
epochs = 50
start_time = time.time()

history_ResNet50 = model_ResNet50.fit(X_train_64, y_train_one_hot, 
                                      epochs = epochs, batch_size = batch, 
                                      validation_data = (X_val_64, y_val_one_hot),
                                      callbacks=tb_callback)

end_time = time.time()
elapsed_time = end_time - start_time
print(f"\nElapsed time: {elapsed_time} seconds")

#### COMMENTS

Observing our validation and testing accuracy, our model is performing very badly on the test set while overfitting to the training set. 

2 possible errors contributing to that: 
    1. Downsampling of images from 224x224 to 64x64 --> loss of information 
    2. Uninformative channels used 

Approaches to solve issues:

    1. Problem: 
        - Just use 224x224 pixel images
        - Use Dimensionality reduction techniques to reduce the size of the images (e.g. autoencoders)
        
    2. Problem: 
        - Access the most significant channels by first apply
            a) Shallow CNNs (per channel or with all channels) and then analyzing
                - SHAP
            b) Autoencoders

### ResNet50: 3 Channels but without Resizing (224x224) 

In [10]:
# Build Network Graph 
# Take original Size
ROWS = 224
COLS = 224
CHANNELS = 3
model_ResNet50 = ResNet50(input_shape = (ROWS, COLS, CHANNELS), classes = CLASSES)

# Compile Model 
l_rate = 1.e-4
opt = keras.optimizers.Adam(learning_rate=l_rate)

model_ResNet50.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

# Apply TensorBoard
# define the logs folder 
log_dir = os.path.join("logs_ResNet50", "fit", "model_ResNet50_3Channels" + time.strftime("%Y%m%d-%H%M%S"))
# Define TensorBoard Callback
tb_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Train Model 
batch = 32
epochs = 20
start_time = time.time()

history_ResNet50 = model_ResNet50.fit(X_train, y_train_one_hot, 
                                      epochs = epochs, batch_size = batch, 
                                      validation_data = (X_val, y_val_one_hot),
                                      callbacks=tb_callback)

end_time = time.time()
elapsed_time = end_time - start_time
print(f"\nElapsed time: {elapsed_time} seconds")

Epoch 1/20


I0000 00:00:1734124653.153374  628323 service.cc:146] XLA service 0x7fca78006230 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1734124653.153406  628323 service.cc:154]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
2024-12-13 21:17:33.898189: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-12-13 21:17:36.534641: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 90300
2024-12-13 21:17:41.569654: W external/local_tsl/tsl/framework/bfc_allocator.cc:291] Allocator (GPU_0_bfc) ran out of memory trying to allocate 18.25GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
I0000 00:00:1734124677.960271  628323 device_compiler.h:188] Compiled cluster using XLA!  This line 

[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 2s/step - accuracy: 0.5818 - loss: 1.0362 - val_accuracy: 0.6396 - val_loss: 0.6869
Epoch 2/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 466ms/step - accuracy: 0.6684 - loss: 0.7175 - val_accuracy: 0.6396 - val_loss: 0.6830
Epoch 3/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 451ms/step - accuracy: 0.8483 - loss: 0.4571 - val_accuracy: 0.6396 - val_loss: 0.6590
Epoch 4/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 455ms/step - accuracy: 0.9640 - loss: 0.2288 - val_accuracy: 0.6396 - val_loss: 0.6554
Epoch 5/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 456ms/step - accuracy: 0.9920 - loss: 0.1257 - val_accuracy: 0.6396 - val_loss: 0.6851
Epoch 6/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 465ms/step - accuracy: 0.9891 - loss: 0.1053 - val_accuracy: 0.6396 - val_loss: 0.7335
Epoch 7/20
[1m19/19[0m [32m━━━━━━━━━━

#### COMMENTS

Observing our validation and testing accuracy, our model is still performing very badly on the validation set while overfitting to the training set, even with 224x224 pixels a
INTERESTING: validation accuracy constantly 0.6396 indicates that model only predicts PDL1 == 0 as this is the distribution in our unbalanced validation set



In [13]:
model_ResNet50.evaluate(X_test, y_test_one_hot)

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 65ms/step - accuracy: 0.6493 - loss: 1.2322


[1.3231925964355469, 0.624365508556366]

### ResNet50: 10 Channels

In [6]:
# Choose 3 biological relevant channels
channel_names_new = ["Gd160", "Eu153", "Gd155", "Er166", "Er170", "Nd146", "Yb173", "Nd145", "Tb159", "Yb171"]
images_preproc_drop_10 = [IMCPreprocessor.drop_channels(i, channel_names_new, list(channel_names.values()))[0] for i in images_preproc]
images_preproc_drop_10 = np.array(images_preproc_drop_10)
random_seed = 56
X = images_preproc_drop_10
y = PDL1_score
train_size = 0.6
val_size = 0.2
test_size = 0.2
# Create a StratifiedShuffleSplit for train/test split
sss_train_test = StratifiedShuffleSplit(n_splits=1, test_size=(val_size + test_size), random_state=random_seed)
# First split: Train and remaining (validation + test)
for train_index, remaining_index in sss_train_test.split(X, y):
    X_train, X_remaining = X[train_index], X[remaining_index]
    y_train, y_remaining = y[train_index], y[remaining_index]
# Create a StratifiedShuffleSplit for validation/test split on remaining data
sss_val_test = StratifiedShuffleSplit(n_splits=1, test_size=test_size / (val_size + test_size), random_state=random_seed)
# Second split: Validation and Test
for val_index, test_index in sss_val_test.split(X_remaining, y_remaining):
    X_val, X_test = X_remaining[val_index], X_remaining[test_index]
    y_val, y_test = y_remaining[val_index], y_remaining[test_index]

In [38]:
ROWS = 224
COLS = 224
# Change Channels to 10
CHANNELS = 10
model_ResNet50 = ResNet50(input_shape = (ROWS, COLS, CHANNELS), classes = CLASSES)

# Compile Model 
l_rate = 1.e-4
opt = keras.optimizers.Adam(learning_rate=l_rate)

model_ResNet50.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

# Apply TensorBoard
# define the logs folder 
log_dir = os.path.join("logs_ResNet50", "fit", "model_ResNet50_3Channels" + time.strftime("%Y%m%d-%H%M%S"))
# Define TensorBoard Callback
tb_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Train Model 
batch = 32
epochs = 20
start_time = time.time()

history_ResNet50 = model_ResNet50.fit(X_train, y_train_one_hot, 
                                      epochs = epochs, batch_size = batch, 
                                      validation_data = (X_val, y_val_one_hot),
                                      callbacks=tb_callback)

end_time = time.time()
elapsed_time = end_time - start_time
print(f"\nElapsed time: {elapsed_time} seconds")

Epoch 1/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 2s/step - accuracy: 0.5478 - loss: 1.1325 - val_accuracy: 0.3604 - val_loss: 0.6978
Epoch 2/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 473ms/step - accuracy: 0.7582 - loss: 0.6312 - val_accuracy: 0.6396 - val_loss: 0.6770
Epoch 3/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 481ms/step - accuracy: 0.8441 - loss: 0.4376 - val_accuracy: 0.6396 - val_loss: 0.6677
Epoch 4/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 466ms/step - accuracy: 0.9331 - loss: 0.2697 - val_accuracy: 0.6396 - val_loss: 0.6628
Epoch 5/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 472ms/step - accuracy: 0.9889 - loss: 0.1432 - val_accuracy: 0.6396 - val_loss: 0.6589
Epoch 6/20
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 484ms/step - accuracy: 0.9902 - loss: 0.1118 - val_accuracy: 0.6396 - val_loss: 0.6570
Epoch 7/20
[1m19/19[0m [32m

#### Comments

Increasing Channels did not help the overfitting problem: validation accuracy still stagnates

### ResNet50: 3 Channels + Regularization

Redefine ResNet50 with L2 Regularizers for each layer

In [24]:
from keras.regularizers import l2
def identity_block(X, f, filters, stage, block, l2_lambda = 0.01):
    # defining name basis
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'
    # Retrieve Filters
    F1, F2, F3 = filters
    # Save the input value. We'll need this later to add back to the main path. 
    X_shortcut = X
    # First component of main path
    X = Conv2D(filters = F1, kernel_size = (1, 1), strides = (1,1), padding = 'valid', name = conv_name_base + '2a', kernel_initializer = glorot_uniform(seed=0), kernel_regularizer = l2(l2_lambda))(X)
    X = BatchNormalization(axis = 3, name = bn_name_base + '2a')(X)
    X = Activation('relu')(X)
    # Second component of main path
    X = Conv2D(filters = F2, kernel_size = (f, f), strides = (1,1), padding = 'same', name = conv_name_base + '2b', kernel_initializer = glorot_uniform(seed=0), kernel_regularizer = l2(l2_lambda))(X)
    X = BatchNormalization(axis = 3, name = bn_name_base + '2b')(X)
    X = Activation('relu')(X)
    # Third component of main path
    X = Conv2D(filters = F3, kernel_size = (1, 1), strides = (1,1), padding = 'valid', name = conv_name_base + '2c', kernel_initializer = glorot_uniform(seed=0), kernel_regularizer = l2(l2_lambda))(X)
    X = BatchNormalization(axis = 3, name = bn_name_base + '2c')(X)
    # Final step: Add shortcut value to main path, and pass it through a RELU activation
    X = Add()([X, X_shortcut])
    X = Activation('relu')(X)
    return X
def convolutional_block(X, f, filters, stage, block, l2_lambda = 0.01, s = 2):
    # defining name basis
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'
    # Retrieve Filters
    F1, F2, F3 = filters
    # Save the input value
    X_shortcut = X
    ##### MAIN PATH #####
    # First component of main path 
    X = Conv2D(F1, (1, 1), strides = (s,s), name = conv_name_base + '2a', kernel_initializer = glorot_uniform(seed=0), kernel_regularizer = l2(l2_lambda))(X)
    X = BatchNormalization(axis = 3, name = bn_name_base + '2a')(X)
    X = Activation('relu')(X)
    # Second component of main path
    X = Conv2D(filters=F2, kernel_size=(f, f), strides=(1, 1), padding='same', name=conv_name_base + '2b', kernel_initializer=glorot_uniform(seed=0), kernel_regularizer = l2(l2_lambda))(X)
    X = BatchNormalization(axis=3, name=bn_name_base + '2b')(X)
    X = Activation('relu')(X)
    # Third component of main path
    X = Conv2D(filters=F3, kernel_size=(1, 1), strides=(1, 1), padding='valid', name=conv_name_base + '2c', kernel_initializer=glorot_uniform(seed=0), kernel_regularizer = l2(l2_lambda))(X)
    X = BatchNormalization(axis=3, name=bn_name_base + '2c')(X)
    ##### SHORTCUT PATH ####
    X_shortcut = Conv2D(F3, (1, 1), strides = (s,s), name = conv_name_base + '1', kernel_initializer = glorot_uniform(seed=0), kernel_regularizer = l2(l2_lambda))(X_shortcut)
    X_shortcut = BatchNormalization(axis = 3, name = bn_name_base + '1')(X_shortcut)
    # Final step: Add shortcut value to main path, and pass it through a RELU activation
    X = Add()([X, X_shortcut])
    X = Activation('relu')(X)
    return X
    
def ResNet50(input_shape = (64, 64, 3), l2_lambda = 0.01, classes = 2):   
    # Define the input as a tensor with shape input_shape
    X_input = Input(input_shape)
    # Zero-Padding
    X = ZeroPadding2D((3, 3))(X_input)
    # Stage 1
    X = Conv2D(64, (7, 7), strides = (2, 2), name = 'conv1', kernel_initializer = glorot_uniform(seed=0), kernel_regularizer = l2(l2_lambda))(X)
    X = BatchNormalization(axis = 3, name = 'bn_conv1')(X)
    X = Activation('relu')(X)
    X = MaxPooling2D((3, 3), strides=(2, 2))(X)
    # Stage 2
    X = convolutional_block(X, f = 3, filters = [64, 64, 256], stage = 2, block='a', s = 1, l2_lambda=l2_lambda)
    X = identity_block(X, 3, [64, 64, 256], stage=2, block='b', l2_lambda=l2_lambda)
    X = identity_block(X, 3, [64, 64, 256], stage=2, block='c', l2_lambda=l2_lambda)
    # Stage 3
    X = convolutional_block(X, f = 3, filters = [128, 128, 512], stage = 3, block='a', s = 2, l2_lambda=l2_lambda)
    X = identity_block(X, 3, [128, 128, 512], stage=3, block='b', l2_lambda=l2_lambda)
    X = identity_block(X, 3, [128, 128, 512], stage=3, block='c', l2_lambda=l2_lambda)
    X = identity_block(X, 3, [128, 128, 512], stage=3, block='d', l2_lambda=l2_lambda)
    # Stage 4
    X = convolutional_block(X, f = 3, filters = [256, 256, 1024], stage = 4, block='a', s = 2, l2_lambda=l2_lambda)
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='b', l2_lambda=l2_lambda)
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='c', l2_lambda=l2_lambda)
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='d', l2_lambda=l2_lambda)
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='e', l2_lambda=l2_lambda)
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='f', l2_lambda=l2_lambda)
    # Stage 5
    X = convolutional_block(X, f = 3, filters = [512, 512, 2048], stage = 5, block='a', s = 2, l2_lambda=l2_lambda)
    X = identity_block(X, 3, [512, 512, 2048], stage=5, block='b', l2_lambda=l2_lambda)
    X = identity_block(X, 3, [512, 512, 2048], stage=5, block='c', l2_lambda=l2_lambda)
    # AVGPOOL.
    X = AveragePooling2D((2, 2), name='avg_pool')(X)
    # Output layer
    X = Flatten()(X)
    X = Dense(classes, activation='softmax', name='fc' + str(classes), kernel_initializer = glorot_uniform(seed=0), kernel_regularizer = l2(l2_lambda))(X)
    # Create model
    model = Model(inputs = X_input, outputs = X, name='ResNet50')
    return model

In [30]:
# Build Network Graph 
# Take original Size
ROWS = 224
COLS = 224
CHANNELS = 3
l2_lambda = 0.1
model_ResNet50 = ResNet50(input_shape = (ROWS, COLS, CHANNELS), l2_lambda=l2_lambda, classes = CLASSES)

# Compile Model 
l_rate = 1.e-4
opt = keras.optimizers.Adam(learning_rate=l_rate)

model_ResNet50.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

# # Apply TensorBoard
# # define the logs folder 
# log_dir = os.path.join("logs_ResNet50", "fit", "model_ResNet50_3Channels" + time.strftime("%Y%m%d-%H%M%S"))
# # Define TensorBoard Callback
# tb_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Train Model 
batch = 32
epochs = 5
start_time = time.time()

history_ResNet50 = model_ResNet50.fit(X_train, y_train_one_hot, 
                                      epochs = epochs, batch_size = batch, 
                                      validation_data = (X_val, y_val_one_hot))
                                      

end_time = time.time()
elapsed_time = end_time - start_time
print(f"\nElapsed time: {elapsed_time} seconds")

Epoch 1/5
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 2s/step - accuracy: 0.5571 - loss: 1774.6960 - val_accuracy: 0.6396 - val_loss: 1665.2546
Epoch 2/5
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 269ms/step - accuracy: 0.7156 - loss: 1630.5818 - val_accuracy: 0.6396 - val_loss: 1527.1132
Epoch 3/5
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 271ms/step - accuracy: 0.8277 - loss: 1494.3605 - val_accuracy: 0.6396 - val_loss: 1398.1536
Epoch 4/5
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 274ms/step - accuracy: 0.8814 - loss: 1367.8191 - val_accuracy: 0.6396 - val_loss: 1279.4423
Epoch 5/5
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 276ms/step - accuracy: 0.9112 - loss: 1251.5955 - val_accuracy: 0.6396 - val_loss: 1171.0596

Elapsed time: 100.53303241729736 seconds


#### COMMENTS

Problem is still present even with regularization

Maybe increase l2_lambda?: doesn't help either (additional step modified in code above)

### ResNet50: 3 Channels + Regularization + Dropout

In [17]:
from keras.regularizers import l2
def identity_block(X, f, filters, stage, block, l2_lambda = 0.01, dropout = 0.2):
    # defining name basis
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'
    # Retrieve Filters
    F1, F2, F3 = filters
    # Save the input value. We'll need this later to add back to the main path. 
    X_shortcut = X
    # First component of main path
    X = Conv2D(filters = F1, kernel_size = (1, 1), strides = (1,1), padding = 'valid', name = conv_name_base + '2a', kernel_initializer = glorot_uniform(seed=0), kernel_regularizer = l2(l2_lambda))(X)
    X = BatchNormalization(axis = 3, name = bn_name_base + '2a')(X)
    X = Dropout(dropout)(X)
    X = Activation('relu')(X)
    # Second component of main path
    X = Conv2D(filters = F2, kernel_size = (f, f), strides = (1,1), padding = 'same', name = conv_name_base + '2b', kernel_initializer = glorot_uniform(seed=0), kernel_regularizer = l2(l2_lambda))(X)
    X = BatchNormalization(axis = 3, name = bn_name_base + '2b')(X)
    X = Dropout(dropout)(X)
    X = Activation('relu')(X)
    # Third component of main path
    X = Conv2D(filters = F3, kernel_size = (1, 1), strides = (1,1), padding = 'valid', name = conv_name_base + '2c', kernel_initializer = glorot_uniform(seed=0), kernel_regularizer = l2(l2_lambda))(X)
    X = BatchNormalization(axis = 3, name = bn_name_base + '2c')(X)
    # Final step: Add shortcut value to main path, and pass it through a RELU activation
    X = Add()([X, X_shortcut])
    X = Activation('relu')(X)
    return X
def convolutional_block(X, f, filters, stage, block, l2_lambda = 0.01, s = 2, dropout = 0.2):
    # defining name basis
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'
    # Retrieve Filters
    F1, F2, F3 = filters
    # Save the input value
    X_shortcut = X
    ##### MAIN PATH #####
    # First component of main path 
    X = Conv2D(F1, (1, 1), strides = (s,s), name = conv_name_base + '2a', kernel_initializer = glorot_uniform(seed=0), kernel_regularizer = l2(l2_lambda))(X)
    X = BatchNormalization(axis = 3, name = bn_name_base + '2a')(X)
    X = Dropout(dropout)(X)
    X = Activation('relu')(X)
    # Second component of main path
    X = Conv2D(filters=F2, kernel_size=(f, f), strides=(1, 1), padding='same', name=conv_name_base + '2b', kernel_initializer=glorot_uniform(seed=0), kernel_regularizer = l2(l2_lambda))(X)
    X = BatchNormalization(axis=3, name=bn_name_base + '2b')(X)
    X = Dropout(dropout)(X)
    X = Activation('relu')(X)
    # Third component of main path
    X = Conv2D(filters=F3, kernel_size=(1, 1), strides=(1, 1), padding='valid', name=conv_name_base + '2c', kernel_initializer=glorot_uniform(seed=0), kernel_regularizer = l2(l2_lambda))(X)
    X = BatchNormalization(axis=3, name=bn_name_base + '2c')(X)
    ##### SHORTCUT PATH ####
    X_shortcut = Conv2D(F3, (1, 1), strides = (s,s), name = conv_name_base + '1', kernel_initializer = glorot_uniform(seed=0), kernel_regularizer = l2(l2_lambda))(X_shortcut)
    X_shortcut = BatchNormalization(axis = 3, name = bn_name_base + '1')(X_shortcut)
    # Final step: Add shortcut value to main path, and pass it through a RELU activation
    X = Add()([X, X_shortcut])
    X = Activation('relu')(X)
    return X
    
def ResNet50(input_shape = (64, 64, 3), l2_lambda = 0.01, classes = 2, dropout = 0.2):   
    # Define the input as a tensor with shape input_shape
    X_input = Input(input_shape)
    # Zero-Padding
    X = ZeroPadding2D((3, 3))(X_input)
    # Stage 1
    X = Conv2D(64, (7, 7), strides = (2, 2), name = 'conv1', kernel_initializer = glorot_uniform(seed=0), kernel_regularizer = l2(l2_lambda))(X)
    X = BatchNormalization(axis = 3, name = 'bn_conv1')(X)
    X = Dropout(dropout)(X)
    X = Activation('relu')(X)
    X = MaxPooling2D((3, 3), strides=(2, 2))(X)
    # Stage 2
    X = convolutional_block(X, f = 3, filters = [64, 64, 256], stage = 2, block='a', s = 1, l2_lambda=l2_lambda, dropout = dropout)
    X = identity_block(X, 3, [64, 64, 256], stage=2, block='b', l2_lambda=l2_lambda, dropout = dropout)
    X = identity_block(X, 3, [64, 64, 256], stage=2, block='c', l2_lambda=l2_lambda, dropout = dropout)
    # Stage 3
    X = convolutional_block(X, f = 3, filters = [128, 128, 512], stage = 3, block='a', s = 2, l2_lambda=l2_lambda, dropout = dropout)
    X = identity_block(X, 3, [128, 128, 512], stage=3, block='b', l2_lambda=l2_lambda, dropout = dropout)
    X = identity_block(X, 3, [128, 128, 512], stage=3, block='c', l2_lambda=l2_lambda, dropout = dropout)
    X = identity_block(X, 3, [128, 128, 512], stage=3, block='d', l2_lambda=l2_lambda, dropout = dropout)
    # Stage 4
    X = convolutional_block(X, f = 3, filters = [256, 256, 1024], stage = 4, block='a', s = 2, l2_lambda=l2_lambda, dropout = dropout)
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='b', l2_lambda=l2_lambda, dropout = dropout)
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='c', l2_lambda=l2_lambda, dropout = dropout)
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='d', l2_lambda=l2_lambda, dropout = dropout)
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='e', l2_lambda=l2_lambda, dropout = dropout)
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='f', l2_lambda=l2_lambda, dropout = dropout)
    # Stage 5
    X = convolutional_block(X, f = 3, filters = [512, 512, 2048], stage = 5, block='a', s = 2, l2_lambda=l2_lambda, dropout = dropout)
    X = identity_block(X, 3, [512, 512, 2048], stage=5, block='b', l2_lambda=l2_lambda, dropout = dropout)
    X = identity_block(X, 3, [512, 512, 2048], stage=5, block='c', l2_lambda=l2_lambda, dropout = dropout)
    # AVGPOOL.
    X = AveragePooling2D((2, 2), name='avg_pool')(X)
    # Output layer
    X = Flatten()(X)
    X = Dense(classes, activation='softmax', name='fc' + str(classes), kernel_initializer = glorot_uniform(seed=0), kernel_regularizer = l2(l2_lambda))(X)
    # Create model
    model = Model(inputs = X_input, outputs = X, name='ResNet50')
    return model

In [18]:
# Choose 3 biological relevant channels
channel_names_new = ["Gd160", "Eu153", "Gd155"]
images_preproc_drop_3 = [IMCPreprocessor.drop_channels(i, channel_names_new, list(channel_names.values()))[0] for i in images_preproc]
images_preproc_drop_3 = np.array(images_preproc_drop_3)

random_seed = 56
X = images_preproc_drop_3
y = PDL1_score
train_size = 0.6
val_size = 0.2
test_size = 0.2


# Create a StratifiedShuffleSplit for train/test split
sss_train_test = StratifiedShuffleSplit(n_splits=1, test_size=(val_size + test_size), random_state=random_seed)

# First split: Train and remaining (validation + test)
for train_index, remaining_index in sss_train_test.split(X, y):
    X_train, X_remaining = X[train_index], X[remaining_index]
    y_train, y_remaining = y[train_index], y[remaining_index]

# Create a StratifiedShuffleSplit for validation/test split on remaining data
sss_val_test = StratifiedShuffleSplit(n_splits=1, test_size=test_size / (val_size + test_size), random_state=random_seed)

# Second split: Validation and Test
for val_index, test_index in sss_val_test.split(X_remaining, y_remaining):
    X_val, X_test = X_remaining[val_index], X_remaining[test_index]
    y_val, y_test = y_remaining[val_index], y_remaining[test_index]



In [21]:
# Build Network Graph 
# Take original Size
ROWS = 224
COLS = 224
CHANNELS = 3
CLASSES = 2
l2_lambda = 0.1
dropout = 0.2
model_ResNet50 = ResNet50(input_shape = (ROWS, COLS, CHANNELS), l2_lambda=l2_lambda, classes = CLASSES, dropout = dropout)

# Compile Model 
l_rate = 1.e-4
opt = keras.optimizers.Adam(learning_rate=l_rate)

model_ResNet50.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

# # Apply TensorBoard
# # define the logs folder 
# log_dir = os.path.join("logs_ResNet50", "fit", "model_ResNet50_3Channels" + time.strftime("%Y%m%d-%H%M%S"))
# # Define TensorBoard Callback
# tb_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Train Model 
batch = 32
epochs = 10
start_time = time.time()

history_ResNet50 = model_ResNet50.fit(X_train, y_train_one_hot, 
                                      epochs = epochs, batch_size = batch, 
                                      validation_data = (X_val, y_val_one_hot))
                                      

end_time = time.time()
elapsed_time = end_time - start_time
print(f"\nElapsed time: {elapsed_time} seconds")

Epoch 1/10
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m119s[0m 3s/step - accuracy: 0.5618 - loss: 1771.9072 - val_accuracy: 0.6396 - val_loss: 1655.6842
Epoch 2/10
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 278ms/step - accuracy: 0.5822 - loss: 1620.0887 - val_accuracy: 0.6396 - val_loss: 1514.1362
Epoch 3/10
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 280ms/step - accuracy: 0.6292 - loss: 1481.4170 - val_accuracy: 0.3604 - val_loss: 1384.4082
Epoch 4/10
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 281ms/step - accuracy: 0.5329 - loss: 1354.5082 - val_accuracy: 0.6497 - val_loss: 1265.9998
Epoch 5/10
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 285ms/step - accuracy: 0.6252 - loss: 1238.7194 - val_accuracy: 0.6396 - val_loss: 1158.1440
Epoch 6/10
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 288ms/step - accuracy: 0.5929 - loss: 1133.5695 - val_accuracy: 0.6396 - val_loss: 10

#### COMMENTS

Problem of stagnating validation accuracy still persists even with drop out. Validation accuracy constantly 0.6396 or 0.3604 (with some exceptions...) --> only predicting one or the other class

Also: with Dropout models fails to overfit on the training set, while with regularization it overvits 

Approaches from here: 

    - ResNet18: Apply less complex model
    - Dimensionality Reduction: Autoencoders