In [1]:
import os
import numpy as np

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger

#warnings.filterwarnings("ignore")
#tf version should be 2.9.0
print(tf.__version__)

2.9.0


In [2]:
rand_seed = 42

In [3]:
dir_path = r"C:\Users\acer\Desktop\Data_2D\data_x"
results_dir = os.path.join(dir_path, r'logs\EfficientNet-B5')
models_dir = os.path.join(dir_path, r'models\EfficientNet-B5')

if not os.path.isdir(results_dir):
    os.makedirs(results_dir)
    
if not os.path.isdir(models_dir):
    os.makedirs(models_dir)

In [4]:
# define the location of your dataset

TRAIN_PATH = r"C:\Users\acer\Desktop\Data_2D\data_x\train"
TRAIN_LABEL_PATH = r"C:\Users\acer\Desktop\Data_2D\data_x\train_classification.csv"

VAL_PATH = r"C:\Users\acer\Desktop\Data_2D\data_x\validation"
VAL_LABEL_PATH = r"C:\Users\acer\Desktop\Data_2D\data_x\validation_classification.csv"

IMG_DIM = (224,224)
INPUT_SHAPE = (224,224,3)

BATCH_SIZE = 32
EPOCH = 20
x_axis_inc = 1 # for plotting the training acc and loss

In [5]:
import pandas as pd

def expand_dataframe(df):
    # Create a list to hold the new rows
    new_rows = []
    
    # Iterate through each row in the original dataframe
    for _, row in df.iterrows():
        # Duplicate the row 234 times (for indices 0-233)
        for i in range(224):
            new_row = row.copy()
            new_row['ID'] = f"{row['ID']}_x_{i:03d}"  # Wrap the original ID and add suffix
            new_rows.append(new_row)
    
    # Create a new dataframe from the list of new rows
    new_df = pd.DataFrame(new_rows)
    
    return new_df

train_original_df = pd.read_csv(TRAIN_LABEL_PATH)

train_original_df = train_original_df.sample(frac=1, random_state=rand_seed).reset_index(drop=True)

train_c_erosion_label = train_original_df[['ID','c_erosion']]
# Create the new DataFrame
train_df = expand_dataframe(train_c_erosion_label)

# Shuffle the training data
#train_df = train_df.sample(frac=1, random_state=rand_seed).reset_index(drop=True)

train_df.head()

Unnamed: 0,ID,c_erosion
0,66-4559 L_x_000,1
0,66-4559 L_x_001,1
0,66-4559 L_x_002,1
0,66-4559 L_x_003,1
0,66-4559 L_x_004,1


In [6]:
data_labels = train_df
target_labels = data_labels['c_erosion']
data_labels['image_path'] =  data_labels.apply(lambda row: (os.path.join(TRAIN_PATH, str(row['ID'])) + '.jpg'), axis=1)
data_labels.head()

Unnamed: 0,ID,c_erosion,image_path
0,66-4559 L_x_000,1,C:\Users\acer\Desktop\Data_2D\data_x\train\66-...
0,66-4559 L_x_001,1,C:\Users\acer\Desktop\Data_2D\data_x\train\66-...
0,66-4559 L_x_002,1,C:\Users\acer\Desktop\Data_2D\data_x\train\66-...
0,66-4559 L_x_003,1,C:\Users\acer\Desktop\Data_2D\data_x\train\66-...
0,66-4559 L_x_004,1,C:\Users\acer\Desktop\Data_2D\data_x\train\66-...


In [7]:
print(data_labels.shape)

(56896, 3)


In [8]:
import pandas as pd
import os

def check_valid_files(df, column_name='image_path'):
    # Create a new column for validity
    df['is_valid_file'] = df[column_name].apply(os.path.isfile)
    
    # Print summary
    total = len(df)
    valid = df['is_valid_file'].sum()
    invalid = total - valid
    
    print(f"Total files: {total}")
    print(f"Valid files: {valid}")
    print(f"Invalid files: {invalid}")
    
    # If there are invalid files, you can get them like this:
    if invalid > 0:
        print("\nInvalid files:")
        invalid_files = df[~df['is_valid_file']][column_name]
        for file in invalid_files:
            print(file)
    
    return df

check_valid_file_df = check_valid_files(data_labels)

# You can access the results in the DataFrame
check_valid_file_df.head(10)

Total files: 56896
Valid files: 56896
Invalid files: 0


Unnamed: 0,ID,c_erosion,image_path,is_valid_file
0,66-4559 L_x_000,1,C:\Users\acer\Desktop\Data_2D\data_x\train\66-...,True
0,66-4559 L_x_001,1,C:\Users\acer\Desktop\Data_2D\data_x\train\66-...,True
0,66-4559 L_x_002,1,C:\Users\acer\Desktop\Data_2D\data_x\train\66-...,True
0,66-4559 L_x_003,1,C:\Users\acer\Desktop\Data_2D\data_x\train\66-...,True
0,66-4559 L_x_004,1,C:\Users\acer\Desktop\Data_2D\data_x\train\66-...,True
0,66-4559 L_x_005,1,C:\Users\acer\Desktop\Data_2D\data_x\train\66-...,True
0,66-4559 L_x_006,1,C:\Users\acer\Desktop\Data_2D\data_x\train\66-...,True
0,66-4559 L_x_007,1,C:\Users\acer\Desktop\Data_2D\data_x\train\66-...,True
0,66-4559 L_x_008,1,C:\Users\acer\Desktop\Data_2D\data_x\train\66-...,True
0,66-4559 L_x_009,1,C:\Users\acer\Desktop\Data_2D\data_x\train\66-...,True


In [9]:
val_original_df = pd.read_csv(VAL_LABEL_PATH)
val_c_erosion_label = val_original_df[['ID','c_erosion']]
# Create the new DataFrame
val_df = expand_dataframe(val_c_erosion_label)

# Shuffle the validation data
val_df = val_df.sample(frac=1, random_state=rand_seed).reset_index(drop=True)

val_df.head()

Unnamed: 0,ID,c_erosion
0,63-17877 R_x_020,0
1,61-5586 R_x_196,0
2,63-17877 R_x_090,0
3,60-34373 L 2019_x_113,0
4,53-24700 R_x_092,0


In [10]:
val_labels = val_df[['ID','c_erosion']]
target_val_labels = val_labels['c_erosion']
val_labels['image_path'] =  val_labels.apply(lambda row: (os.path.join(VAL_PATH, str(row['ID'])) + '.jpg'), axis=1)
val_labels.head()

Unnamed: 0,ID,c_erosion,image_path
0,63-17877 R_x_020,0,C:\Users\acer\Desktop\Data_2D\data_x\validatio...
1,61-5586 R_x_196,0,C:\Users\acer\Desktop\Data_2D\data_x\validatio...
2,63-17877 R_x_090,0,C:\Users\acer\Desktop\Data_2D\data_x\validatio...
3,60-34373 L 2019_x_113,0,C:\Users\acer\Desktop\Data_2D\data_x\validatio...
4,53-24700 R_x_092,0,C:\Users\acer\Desktop\Data_2D\data_x\validatio...


In [11]:
check_valid_file_df = check_valid_files(val_labels)
check_valid_file_df.head()

Total files: 12096
Valid files: 12096
Invalid files: 0


Unnamed: 0,ID,c_erosion,image_path,is_valid_file
0,63-17877 R_x_020,0,C:\Users\acer\Desktop\Data_2D\data_x\validatio...,True
1,61-5586 R_x_196,0,C:\Users\acer\Desktop\Data_2D\data_x\validatio...,True
2,63-17877 R_x_090,0,C:\Users\acer\Desktop\Data_2D\data_x\validatio...,True
3,60-34373 L 2019_x_113,0,C:\Users\acer\Desktop\Data_2D\data_x\validatio...,True
4,53-24700 R_x_092,0,C:\Users\acer\Desktop\Data_2D\data_x\validatio...,True


In [12]:
train_data_size = 2240
validation_data_size = 200

In [13]:
train_data = np.array([img_to_array(load_img(img, target_size=IMG_DIM))
                       for img in data_labels['image_path'][0:train_data_size].values.tolist()]).astype('float32')

In [14]:
val_data = np.array([img_to_array(load_img(img, target_size=IMG_DIM))
                       for img in val_labels['image_path'][0:validation_data_size].values.tolist()]).astype('float32')

In [15]:
print('Training Dataset Size:', train_data.shape)
print('Validation Dataset Size:', val_data.shape)

Training Dataset Size: (2240, 224, 224, 3)
Validation Dataset Size: (200, 224, 224, 3)


## Prepare for Transfer Learning

In [16]:
prep_in = tf.keras.applications.efficientnet.preprocess_input

In [17]:
x_train_new = train_data.astype('int')
x_val_new = val_data.astype('int')

In [18]:
x_train_in = prep_in(x_train_new)
x_val_in = prep_in(x_val_new)

In [19]:
train_labels_enc = target_labels[0:train_data_size].to_numpy()
val_labels_enc = target_val_labels[0:validation_data_size].to_numpy()

In [20]:
# Get the VGG16 model so we can do transfer learning
base_model = tf.keras.applications.EfficientNetB5(input_shape=INPUT_SHAPE, include_top=False, weights='imagenet')

In [21]:
print('Number of layers in the base model: ', len(base_model.layers))

Number of layers in the base model:  577


In [22]:
pd.set_option('max_colwidth', None)
layers = [(layer, layer.name, layer.trainable) for layer in base_model.layers]
pd.DataFrame(layers, columns=['Layer Type', 'Layer Name', 'Layer Trainable'])

Unnamed: 0,Layer Type,Layer Name,Layer Trainable
0,<keras.engine.input_layer.InputLayer object at 0x0000020D6E4BA760>,input_1,True
1,<keras.layers.preprocessing.image_preprocessing.Rescaling object at 0x0000020D6E4EEC40>,rescaling,True
2,<keras.layers.preprocessing.normalization.Normalization object at 0x0000020D6E4EE5E0>,normalization,True
3,<keras.layers.core.tf_op_layer.TFOpLambda object at 0x0000020D6B8B3370>,tf.math.truediv,True
4,<keras.layers.reshaping.zero_padding2d.ZeroPadding2D object at 0x0000020D6B8B36A0>,stem_conv_pad,True
...,...,...,...
572,<keras.layers.regularization.dropout.Dropout object at 0x0000020D6FA2FB50>,block7c_drop,True
573,<keras.layers.merging.add.Add object at 0x0000020D6FA2FAF0>,block7c_add,True
574,<keras.layers.convolutional.conv2d.Conv2D object at 0x0000020D7261E880>,top_conv,True
575,<keras.layers.normalization.batch_normalization.BatchNormalization object at 0x0000020D6FA38190>,top_bn,True


## Slapping AvgPool > 1024_Dense > 512_Dense > 1_output on top of the EfficientNet

In [23]:
base_model.trainable = False

pd.set_option('max_colwidth', None)
layers = [(layer, layer.name, layer.trainable) for layer in base_model.layers[10:]]
pd.DataFrame(layers, columns=['Layer Type', 'Layer Name', 'Layer Trainable'])

Unnamed: 0,Layer Type,Layer Name,Layer Trainable
0,<keras.layers.core.activation.Activation object at 0x0000020D725EF370>,block1a_activation,False
1,<keras.layers.pooling.global_average_pooling2d.GlobalAveragePooling2D object at 0x0000020D725EB940>,block1a_se_squeeze,False
2,<keras.layers.reshaping.reshape.Reshape object at 0x0000020D6EFC5430>,block1a_se_reshape,False
3,<keras.layers.convolutional.conv2d.Conv2D object at 0x0000020D6EFC7580>,block1a_se_reduce,False
4,<keras.layers.convolutional.conv2d.Conv2D object at 0x0000020D6EFC72E0>,block1a_se_expand,False
...,...,...,...
562,<keras.layers.regularization.dropout.Dropout object at 0x0000020D6FA2FB50>,block7c_drop,False
563,<keras.layers.merging.add.Add object at 0x0000020D6FA2FAF0>,block7c_add,False
564,<keras.layers.convolutional.conv2d.Conv2D object at 0x0000020D7261E880>,top_conv,False
565,<keras.layers.normalization.batch_normalization.BatchNormalization object at 0x0000020D6FA38190>,top_bn,False


In [24]:
fig_train = 'EfficientNetB5_A_FE_train_224'
log_file = os.path.join(results_dir, 'EfficientNetB5_A_FE_train_224.csv')
model_path = os.path.join(models_dir, 'EfficientNetB5_A_FE_224.h5')

In [25]:
# Create a new model on top

inputs = tf.keras.Input(shape=INPUT_SHAPE)
# We make sure that the base_model is running in inference mode here,
# by passing 'training=False'. This is important for fine-tuning
x = base_model(inputs, training=False)

x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
x = Dense(512, activation='relu')(x)

# A Dense classifier with a single unit (binary classification)
outputs = Dense(1, activation='sigmoid')(x)
model = Model(inputs, outputs)

In [26]:
pd.set_option('max_colwidth', None)
layers = [(layer, layer.name, layer.trainable) for layer in model.layers]
pd.DataFrame(layers, columns=['Layer Type', 'Layer Name', 'Layer Trainable'])

Unnamed: 0,Layer Type,Layer Name,Layer Trainable
0,<keras.engine.input_layer.InputLayer object at 0x0000020D6FADA5E0>,input_2,True
1,<keras.engine.functional.Functional object at 0x0000020D6FA2FEB0>,efficientnetb5,False
2,<keras.layers.pooling.global_average_pooling2d.GlobalAveragePooling2D object at 0x0000020D721D47F0>,global_average_pooling2d,True
3,<keras.layers.core.dense.Dense object at 0x0000020D721D7A60>,dense,True
4,<keras.layers.core.dense.Dense object at 0x0000020D6FA2F820>,dense_1,True
5,<keras.layers.core.dense.Dense object at 0x0000020D70566400>,dense_2,True


In [27]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 efficientnetb5 (Functional)  (None, 7, 7, 2048)       28513527  
                                                                 
 global_average_pooling2d (G  (None, 2048)             0         
 lobalAveragePooling2D)                                          
                                                                 
 dense (Dense)               (None, 1024)              2098176   
                                                                 
 dense_1 (Dense)             (None, 512)               524800    
                                                                 
 dense_2 (Dense)             (None, 1)                 513       
                                                             

In [28]:
callbacks = [
    CSVLogger(log_file),
    ModelCheckpoint(
        filepath=os.path.join(models_dir,"Eff-epoch-{epoch:02d}.h5"),
        save_weights_only=False,
        save_best_only=False,
        save_freq='epoch',
        verbose=1
    )
]

In [29]:
model.compile(loss=keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer= keras.optimizers.RMSprop(lr=1e-3),
              metrics = [keras.metrics.BinaryAccuracy()])

  super(RMSprop, self).__init__(name, **kwargs)


In [30]:
history = model.fit(x=x_train_in, 
                    y=train_labels_enc,
                    validation_data=(x_val_in, val_labels_enc),
                    epochs=EPOCH, 
                    verbose=1, 
                    callbacks=callbacks)

Epoch 1/20


  return dispatch_target(*args, **kwargs)


Epoch 1: saving model to C:\Users\acer\Desktop\Data_2D\data_x\models\EfficientNet-B5\Eff-epoch-01.h5
Epoch 2/20
Epoch 2: saving model to C:\Users\acer\Desktop\Data_2D\data_x\models\EfficientNet-B5\Eff-epoch-02.h5
Epoch 3/20
Epoch 3: saving model to C:\Users\acer\Desktop\Data_2D\data_x\models\EfficientNet-B5\Eff-epoch-03.h5
Epoch 4/20
Epoch 4: saving model to C:\Users\acer\Desktop\Data_2D\data_x\models\EfficientNet-B5\Eff-epoch-04.h5
Epoch 5/20
Epoch 5: saving model to C:\Users\acer\Desktop\Data_2D\data_x\models\EfficientNet-B5\Eff-epoch-05.h5
Epoch 6/20
Epoch 6: saving model to C:\Users\acer\Desktop\Data_2D\data_x\models\EfficientNet-B5\Eff-epoch-06.h5
Epoch 7/20
Epoch 7: saving model to C:\Users\acer\Desktop\Data_2D\data_x\models\EfficientNet-B5\Eff-epoch-07.h5
Epoch 8/20
Epoch 8: saving model to C:\Users\acer\Desktop\Data_2D\data_x\models\EfficientNet-B5\Eff-epoch-08.h5
Epoch 9/20
Epoch 9: saving model to C:\Users\acer\Desktop\Data_2D\data_x\models\EfficientNet-B5\Eff-epoch-09.h5
Epo

In [31]:
history_dict = history.history
history_dict.keys()

dict_keys(['loss', 'binary_accuracy', 'val_loss', 'val_binary_accuracy'])

In [32]:
epoch_list = list(range(1,EPOCH+1))
y_train_acc = history.history['binary_accuracy']
y_train_loss = history.history['loss']

In [33]:
print(y_train_acc)

[0.7535714507102966, 0.8450892567634583, 0.8598214387893677, 0.8687499761581421, 0.8741071224212646, 0.8825892806053162, 0.8861607313156128, 0.8888393044471741, 0.8897321224212646, 0.8866071701049805, 0.890625, 0.8946428298950195, 0.893750011920929, 0.893750011920929, 0.8955357074737549, 0.8924106955528259, 0.8892857432365417, 0.8941964507102966, 0.8977678418159485, 0.8959821462631226]


In [34]:
print(y_train_loss)

[0.5043524503707886, 0.2951582372188568, 0.2523457705974579, 0.24357308447360992, 0.24480023980140686, 0.2335074543952942, 0.20348107814788818, 0.19805607199668884, 0.20740440487861633, 0.20793087780475616, 0.19537465274333954, 0.1883123368024826, 0.2076093554496765, 0.18783624470233917, 0.18668022751808167, 0.19843189418315887, 0.19069421291351318, 0.18722878396511078, 0.18066027760505676, 0.19670090079307556]
