In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import BatchNormalization, Conv2D, Flatten, Dense, MaxPooling2D, RandomFlip, RandomRotation, RandomZoom, RandomTranslation, Dropout, Rescaling
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adagrad
from tensorflow.keras.utils import to_categorical
from tensorflow.data import AUTOTUNE
import os
from pathlib import Path

In [2]:
RANDOM_SEED=4
BASE_PATH = os.path.join(Path(os.getcwd()).resolve().parents[1], "Data")
# BASE_PATH = os.path.join(os.getcwd(), "Data")
assert os.path.isdir(BASE_PATH), f'Data Directory is required: {BASE_PATH}'
BATCH_SIZE = 1024
EPOCHS = 2
TRAIN_PATH = os.path.sep.join([BASE_PATH, "training"])
VAL_PATH = os.path.sep.join([BASE_PATH, "validation"])
TEST_PATH = os.path.sep.join([BASE_PATH, "testing"])

tf.random.set_seed(RANDOM_SEED)

In [3]:
train_data = image_dataset_from_directory(TRAIN_PATH, image_size=(50,50), label_mode='binary',
                                  seed=RANDOM_SEED, shuffle=True, batch_size=BATCH_SIZE)
val_data = image_dataset_from_directory(VAL_PATH, image_size=(50,50), label_mode='binary',
                                  seed=RANDOM_SEED, shuffle=True, batch_size=BATCH_SIZE)
test_data = image_dataset_from_directory(TEST_PATH, image_size=(50,50), label_mode='binary',
                                  seed=RANDOM_SEED, shuffle=True, batch_size=BATCH_SIZE)

Found 176794 files belonging to 2 classes.
Found 49334 files belonging to 2 classes.
Found 51396 files belonging to 2 classes.


In [4]:
data_augmentation = Sequential([
        RandomFlip("horizontal_and_vertical", input_shape=(50, 50, 3)),
        RandomRotation(0.2),
        RandomZoom(0.3),
        RandomTranslation(height_factor=0.2, width_factor=0.2),
        Rescaling(1./255),
])
model = Sequential([
  Conv2D(32, 3, activation='relu', input_shape=(50, 50, 3)),
  MaxPooling2D(),
  Conv2D(32, 3, activation='relu'),
  MaxPooling2D(),
  Conv2D(32, 3, activation='relu'),
  MaxPooling2D(),
  Dropout(0.25),
  Flatten(),
  Dense(64, activation='relu'),
  Dropout(0.25),
  Dense(1, activation='sigmoid')
])
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 48, 48, 32)        896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 24, 24, 32)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 22, 22, 32)        9248      
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 11, 11, 32)        0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 9, 9, 32)          9248      
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 4, 4, 32)         

In [5]:
whole_system = Sequential([
    data_augmentation,
    model
])
whole_system.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential (Sequential)     (None, 50, 50, 3)         0         
                                                                 
 sequential_1 (Sequential)   (None, 1)                 52289     
                                                                 
Total params: 52289 (204.25 KB)
Trainable params: 52289 (204.25 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [6]:
checkpoint_filepath = 'cnn_models/checkpoint.model.keras'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor='val_binary_accuracy',
    mode='max',
    save_best_only=True)

limited_computation_callback = tf.keras.callbacks.EarlyStopping(monitor='loss',
                                              patience=5)

In [8]:
whole_system.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=[tf.keras.metrics.BinaryAccuracy(), 
                       tf.keras.metrics.F1Score(), 
                       tf.keras.metrics.Recall(), 
                       tf.keras.metrics.Precision()])
  
whole_system.fit(train_data, epochs=EPOCHS, validation_data=val_data, 
                 batch_size=BATCH_SIZE, callbacks=[model_checkpoint_callback, 
                                                   limited_computation_callback],
                 class_weight={
                     0: 1., 
                     1: 4.  # Forces model to treat every cancer same as 4 non cancer 
                 })

Epoch 1/2
Epoch 2/2


<keras.src.callbacks.History at 0x319232cd0>

In [9]:
whole_system.evaluate(test_data)



[0.47605669498443604,
 0.7870456576347351,
 array([0.3806598], dtype=float32),
 0.7641751766204834,
 0.532779335975647]