In [1]:
import os
import sys
sys.path.append("..")
import pathlib
import tensorflow as tf
import numpy as np

import utils as utils
import CST as CST
from metrics import recall_m, precision_m, f1_m, auc_m
from load_data import load_aj_idc

#### Load data and create generators

In [2]:
"""generator parameters"""
data_path = "../data/aj/IDC_regular_ps50_idx5"
# data_path = "../data/cifar-10/train"
batch_size = 128
tile_size = 50
class_mode = "binary"  # categorical or binary. Binary for idc dataset because it has 2 classes
"""--------------------"""

# Download the data if the folder does not exist
if not os.path.exists(data_path):
    pathlib.Path("../data/aj").mkdir(parents=True, exist_ok=True)
    load_aj_idc("../data/aj")


# Generators for training/validation
gen = tf.keras.preprocessing.image.ImageDataGenerator(
    validation_split=0.2,
    preprocessing_function=utils.normalize_image # images must be normalized and centralized in 0 for the distortions to work
)

t_flow = gen.flow_from_directory(
    directory=data_path,
    target_size=(tile_size,tile_size),
    color_mode='rgb',  
    batch_size=batch_size,
    class_mode=class_mode,
    subset='training'
)

v_flow = gen.flow_from_directory(
    directory=data_path,
    target_size=(tile_size,tile_size),
    color_mode="rgb",  
    batch_size=batch_size,
    shuffle=False,
    class_mode=class_mode, 
    subset='validation'
)

class_weight = utils.get_class_weights(t_flow.classes)

Dataset: Invasive Ductal Carcinoma Identification.
Author: Andrew Janowczyk
source: http://andrewjanowczyk.com/deep-learning/
Downloading dataset (might take a few minutes)...
...Download complete
Organizing images into class folders...
...Organization complete
Found 222020 images belonging to 2 classes.
Found 55504 images belonging to 2 classes.


In [3]:
classes = t_flow.class_indices
n_classes = len(classes)
print(f"N° of classes: {n_classes}:")
print(classes)
n_classes = 1 if n_classes == 2 else n_classes # if 2 classes, then its a binary problem 


N° of classes: 2:
{'0': 0, '1': 1}


#### Create model and cst instance

In [4]:
"""cst parameters"""
alpha = 1
dist_params = {
    "contrast": {"lower": 0.4, "upper": 1.4},
    "color": {"factor": [20,20,20]},
    "blur": {"kernel_size": 1, "sigma": 3.},  # kernel size is 'kernel_size * 2 + 1'
    "brightness": {"max_delta":0.3}
}
# model_path = "../models/CST4_alpha1_DC4.h5"

"""--------------"""
activation = "sigmoid" if class_mode == "binary" else "softmax"


# model = tf.keras.models.load_model(model_path)

model = tf.keras.Sequential([
    tf.keras.applications.ResNet50(weights='imagenet', include_top=False, input_shape=(tile_size,tile_size,3)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(n_classes, activation=activation)
])

cst = CST.ContrastiveStabilityTraining(
    model=model,
    tile_size=tile_size,
    dist_params=dist_params,
    alpha=alpha
)
cst.cst_model.summary()




_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 50, 50, 3)         0         
_________________________________________________________________
sequential (Sequential)      (None, 1)                 24112129  
Total params: 24,112,129
Trainable params: 24,059,009
Non-trainable params: 53,120
_________________________________________________________________


#### Compile 

In [5]:
# opt = tf.keras.optimizers.SGD(lr=1e-4, momentum=0.9, decay=1e-6)
opt = tf.keras.optimizers.Adam(lr=1e-4, amsgrad=True)

if class_mode == "binary":
    metrics = [tf.keras.metrics.binary_crossentropy, recall_m, precision_m, f1_m, auc_m]
    loss = tf.keras.losses.binary_crossentropy

if class_mode == "categorical":
    metrics = ["categorical_crossentropy", recall_m, precision_m, f1_m, auc_m]
    loss = tf.keras.losses.categorical_crossentropy

cst.compile_cst(optimizer=opt, metrics=metrics, loss=loss)

#### Train

In [6]:
"""train parameters"""
save_all_epochs = True
model_save_path = "../models"
model_name = "cst_nb_model_idc"
save_metrics = True
epochs = 1
"""----------------"""

cst.train_cst(
    x=t_flow,
    validation_data=v_flow,
    save_all_epochs=save_all_epochs,
    model_save_path=model_save_path,
    model_name=model_name,
    save_metrics=save_metrics,
    class_weight=class_weight,
    epochs=epochs
)

Epoch 1/1
class weights saved to path: 
cst_nb_model_idc1.h5
