<a href="https://colab.research.google.com/github/Juba-Amr/breast-cancer-CNN/blob/main/notebooks/colab_notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [44]:
#imports
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.preprocessing import image_dataset_from_directory

In [43]:
#paths
train_path= '/content/dataset/train'
val_path= '/content/dataset/validation'
test_path= '/content/dataset/test'

In [54]:
#for reproducability
def set_seed(seed=42):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
set_seed()

X_train = image_dataset_from_directory(
    train_path,
    labels='inferred',
    label_mode='binary',
    color_mode='rgb',
    batch_size=None,
    image_size=(50,50),
    shuffle=True,
    interpolation='nearest'
)

X_val = image_dataset_from_directory(
    val_path,
    labels='inferred',
    label_mode='binary',
    color_mode='rgb',
    batch_size=None,
    image_size=(50,50),
    shuffle=True,
    interpolation='nearest'
)

AUTOTUNE = tf.data.experimental.AUTOTUNE
def preprocess(image, label):
    image = tf.image.convert_image_dtype(image, tf.float32) #we normalize the tensors to have them in the interval [0,1]
    return image, label

data_augmentation = tf.keras.Sequential([
    layers.RandomFlip(mode='horizontal_and_vertical'),
    layers.RandomRotation(factor=(-0.3, 0.3)),
]) #we don't alter the colors because on a former try it used to give very optimistic results on training set and poor ones in test

def augmentation(image, label):
    return (data_augmentation(image), label)


Found 193299 files belonging to 2 classes.
Found 38496 files belonging to 2 classes.


In [55]:
print(tf.data.experimental.cardinality(X_train).numpy())
print(tf.data.experimental.cardinality(X_val).numpy())

X_train = (
    X_train
    .map(preprocess, num_parallel_calls=AUTOTUNE, deterministic=True)
    .map(augmentation , num_parallel_calls=AUTOTUNE, deterministic=True)
    .shuffle(buffer_size=1000)
    .batch(16)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)
X_val = (
    X_val
    .map(preprocess, num_parallel_calls=AUTOTUNE, deterministic=True)
    .batch(16)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)

print(tf.data.experimental.cardinality(X_train).numpy())
print(tf.data.experimental.cardinality(X_val).numpy())


193299
38496
12082
2406


In [56]:
feature_extraction = [
        #input layer will be size 50x50x3
        layers.Conv2D(filters=32, kernel_size=(3,3), strides=1, padding='same', use_bias=False), #50x50x32 for base filters
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.MaxPool2D(pool_size=(2,2), strides=None, padding='valid'), #25x25x32

        layers.Conv2D(filters=64, kernel_size=(3,3), use_bias=False), #25x25x64
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.MaxPool2D(pool_size=(2,2), strides=None, padding='valid'), #12x12x64

        layers.Conv2D(filters=128, kernel_size=(3,3), use_bias=False), #12x12x128
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.MaxPool2D(pool_size=(2,2), strides=None, padding='valid'), #6x6x128

        layers.GlobalAveragePooling2D()
    ]

model_body = [
        layers.Dense(units=64, input_shape=[4608], kernel_regularizer='l2'),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.Dropout(0.5),

        layers.Dense(units=64, kernel_regularizer='l2'),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.Dropout(0.5),

        layers.Dense(units=64, kernel_regularizer='l2'),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        layers.Dropout(0.5),

        layers.Dense(units=1, activation='sigmoid')
    ]


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [57]:
model = tf.keras.Sequential([
    *feature_extraction,
    *model_body
])

model.compile(
    optimizer='adam',
    loss=tf.keras.losses.BinaryFocalCrossentropy(gamma=2.0, from_logits=False),
    metrics=['binary_accuracy',
            tf.keras.metrics.Recall(name='recall'),
            tf.keras.metrics.Precision(name='precision'),
            tf.keras.metrics.AUC(name='auc')
        ]
)


early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor= "val_loss",
    min_delta= 0.001,
    patience= 5,
    mode='min',
    restore_best_weights=True,
    start_from_epoch=10
)

!mkdir -p "../model/model4/checkpoints"
checkpoint = tf.keras.callbacks.ModelCheckpoint(
    "../model/model4/checkpoints/{epoch:02d}-{val_loss:.2f}.keras",
    monitor='val_loss',
    verbose=1,
    save_best_only=False,
    save_weights_only=False,
    save_freq='epoch'
)

reduce_lr =tf.keras.callbacks.ReduceLROnPlateau(
    monitor="val_loss", factor=0.5, patience=3, min_lr=1e-6, verbose=1
)

In [None]:
history = model.fit(
    X_train,
    epochs=60,
    verbose=1,
    callbacks=[early_stopping, checkpoint, reduce_lr],
    validation_split=0,
    validation_data=X_val,
    validation_steps=None,
    class_weight={0:1.0, 1:4.2},
    validation_freq=1
)

model.save("../model/model4/model.keras")