In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, GlobalAveragePooling2D
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers.experimental import preprocessing
from cnnArchitectures.Xception import get_xception_model
from sklearn.model_selection import StratifiedKFold
import os
import datetime

%matplotlib inline
%reload_ext tensorboard

In [2]:
# Data paths
# E.g. D:\DATASETS\ALL_2\training\fold_0\all
dataset_path = os.path.abspath('D://DATASETS/ALL_2')
train_data_path = dataset_path + '/training'
train_data_paths = [
    dataset_path + '/training/fold_0/',
    dataset_path + '/training/fold_1/',
    dataset_path + '/training/fold_2/'
]
test_data_path = os.path.abspath(dataset_path + 'test/C-NMC_test_prelim_phase_data/C-NMC_test_prelim_phase_data')
image_format = ".bmp"

data_paths = list(map(os.path.abspath, train_data_paths))

In [3]:
def get_pathes_labels(path=None):
    # Create list of picture pathes and labels
    if path == None:
        dir_iter = os.walk(train_data_path) # If we want all training data
    else:
        dir_iter = os.walk(path)
    image_pathes = []
    labels = []
    for dir in dir_iter:
        if "all" in dir[0]:
            for img_name in dir[2]:
                image_pathes.append(dir[0] + "\\" + img_name)
                labels.append(0)
        elif "hem" in dir[0]:
            for img_name in dir[2]:
                image_pathes.append(dir[0] + "\\" + img_name)
                labels.append(1)
    image_pathes, labels = np.array(image_pathes), np.array(labels)
    return [image_pathes, labels]

In [4]:
def augment(image):
    max_gamma_delta = 0.2
    image = tf.image.random_brightness(image, max_gamma_delta = 0.2, seed=None)
    return image

In [5]:
def preprocess(image):
    result = tf.image.resize(image, (256, 256))
    result = tf.image.per_image_standardization(result)
    return result

In [6]:
def load_image(path):
    image = tf.io.decode_bmp(tf.io.read_file(path), channels=3)
    return image

In [7]:
def get_ds(filenames, labels, batch_size, pref_buf_size):
    AUTOTUNE = tf.data.experimental.AUTOTUNE
    label_ds, image_pathes = tf.data.Dataset.from_tensor_slices(labels), tf.data.Dataset.from_tensor_slices(filenames)
    images_ds = image_pathes.map(load_image, AUTOTUNE).map(preprocess, AUTOTUNE)
    ds = tf.data.Dataset.zip((images_ds, label_ds)).batch(batch_size).prefetch(pref_buf_size)
    return ds

In [8]:
# Return pair of (X_train, y_train), (X_test, y_test)
def get_stratified_datasets(X, Y):
    # Create Stratified object
    skf = StratifiedKFold(n_splits=4, shuffle=True)
    skf.get_n_splits(X, Y)
    for train_index, test_index in skf.split(X, Y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = Y[train_index], Y[test_index]
        p = np.random.permutation(len(X_train))
        X_train, y_train = X_train[p], y_train[p]
        yield [[X_train, y_train], [X_test, y_test]]

In [9]:
 # Create list of picture pathes and labels
#dir_iter = os.walk(train_data_path)
#image_pathes = []
#labels = []
#for dir in dir_iter:
#    if "all" in dir[0]:
#        for img_name in dir[2]:
#            image_pathes.append(dir[0] + "\\" + img_name)
#            labels.append(0)
#    elif "hem" in dir[0]:
#        for img_name in dir[2]:
#            image_pathes.append(dir[0] + "\\" + img_name)
#            labels.append(1)
#X, Y = np.array(image_pathes), np.array(labels)
#p = np.random.permutation(len(X))
#X, Y = X[p], Y[p]
#skf = StratifiedKFold(n_splits=4, shuffle=True, random_state=123)
#skf.get_n_splits(X, Y)
#for train_index, test_index in skf.split(X, Y):
#    X_train, X_test = X[train_index], X[test_index]
#    y_train, y_test = Y[train_index], Y[test_index]
#    print(y_train[:100])

In [10]:
# Set up dataset parameters
BATCH_SIZE = 16
IMAGE_SIZE = (256, 256)
SEED = 322
PREFETCH_BUFFER_SIZE = 300
SHUFFLE_BUFFER_SIZE = 1000
CACHE_DIR = "caches/ds_cache"
ds_params = dict(
    labels="inferred",
    label_mode="categorical",
    class_names=["all", "hem"],
    color_mode="rgb",
    batch_size=BATCH_SIZE,
    image_size=IMAGE_SIZE,
    seed=SEED
)

In [52]:
def test_model(model, callbacks=None):
    test_dir = dataset_path + "\\C-NMC_test_prelim_phase_data"
    test_data_csv = pd.read_csv(
        test_dir + "\\C-NMC_test_prelim_phase_data_labels.csv"
    )
    #print(test_data_csv.head())
    #labels = np.array(test_data_csv["labels"].to_list())
    #inverted_labels = test_data_csv[["new_names", "labels"]].sort_values("new_names")["labels"].to_list()
    #labels = np.array([1 - label for label in inverted_labels])
    test_data_dir = test_dir + "\\C-NMC_test_prelim_phase_data"
    dir_list = list(os.walk(test_data_dir))[0]
    filenames = [test_data_dir + "\\" + name for name in dir_list[2]]
    get_label_by_name = lambda x: test_data_csv.loc[test_data_csv['new_names'] == x]["labels"].to_list()[0]
    labels = [1 - get_label_by_name(name) for name in dir_list[2]]
    test_ds = get_ds(filenames, labels, BATCH_SIZE, PREFETCH_BUFFER_SIZE)
    if callbacks == None:
        model.evaluate(test_ds)
    else:
        model.evaluate(test_ds, callbacks=callbacks)

In [53]:
test_model(model)



In [12]:
def get_cnn_model_1(input_shape):

    kernel_initializer = 'lecun_uniform'
    bias_initializer = 'lecun_uniform'
    kernel_regularizer = None
    activation = "selu"

    model = tf.keras.Sequential()
    model.add(Conv2D(32, (3, 3), input_shape=input_shape, data_format="channels_last", kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer))
    model.add(Activation(activation))
    model.add(MaxPooling2D(pool_size=(3, 3)))

    model.add(Conv2D(64, (3, 3), input_shape=input_shape, data_format="channels_last", kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer))
    model.add(Activation(activation))
    model.add(MaxPooling2D(pool_size=(3, 3)))

    model.add(Conv2D(128, (3, 3), input_shape=input_shape, data_format="channels_last", kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer))
    model.add(Activation(activation))
    model.add(MaxPooling2D(pool_size=(3, 3)))

    model.add(Conv2D(256, (3, 3), input_shape=input_shape, data_format="channels_last", kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer))
    model.add(Activation(activation))
    model.add(Dropout(0.8))

    # adding fully connected layers
    model.add(Flatten())
    model.add(Dense(256, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer))
    model.add(Activation(activation))
    model.add(Dropout(0.8))
    model.add(Dense(128, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer))
    model.add(Activation(activation))
    model.add(Dropout(0.8))
    model.add(Dense(64, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer))
    model.add(Activation(activation))
    model.add(Dropout(0.6))
    model.add(Dense(32, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer))

    model.add(Dense(1))
    model.add(Activation('sigmoid'))

    return model

In [13]:
model = get_cnn_model_1(IMAGE_SIZE + (3,))
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 254, 254, 32)      896       
_________________________________________________________________
activation (Activation)      (None, 254, 254, 32)      0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 84, 84, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 82, 82, 64)        18496     
_________________________________________________________________
activation_1 (Activation)    (None, 82, 82, 64)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 27, 27, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 25, 25, 128)       7

In [14]:
adam_opt = tf.keras.optimizers.Adam(learning_rate=0.0001)
metrics = ["accuracy", tf.keras.metrics.Precision(name="precision")]
model.compile(
    optimizer=adam_opt,
    loss=tf.keras.losses.BinaryCrossentropy(),
    metrics=metrics
)

In [15]:
#model.load_weights("weights/w.h5")

In [16]:
ERAS = 10
EPOCHS = 2
update_freq = 250

In [20]:
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath="checkpoints/",
    save_weights_only=True,
    monitor='precision',
    mode='max',
    save_best_only=True)
#log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
#tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, update_freq = update_freq,
#                                                     profile_batch = '500,520')

In [21]:
pathes, labels = get_pathes_labels()

for era in range(ERAS):
    print("Era ", era)
    data_gen = get_stratified_datasets(pathes, labels)
    while True:
        try:
            train_data, valid_data = next(data_gen)
            train_ds = get_ds(*train_data, BATCH_SIZE, PREFETCH_BUFFER_SIZE)
            valid_ds = get_ds(*valid_data, BATCH_SIZE, PREFETCH_BUFFER_SIZE)
            model.fit(
                train_ds, validation_data=valid_ds, epochs=EPOCHS, 
                batch_size=BATCH_SIZE)
        except StopIteration:
            break
    if era != 0 and era % 2 == 0:
        print("Model test")
        test_model(model, [model_checkpoint_callback])

Era  0
Epoch 1/2

NotFoundError: 2 root error(s) found.
  (0) Not found:  Resource localhost/_AnonymousVar26/class tensorflow::SummaryWriterInterface does not exist.
	 [[{{node cond_1/then/_28/batch_loss}}]]
  (1) Not found:  Resource localhost/_AnonymousVar26/class tensorflow::SummaryWriterInterface does not exist.
	 [[{{node cond_1/then/_28/batch_loss}}]]
	 [[cond_2/pivot_t/_42/_91]]
0 successful operations.
0 derived errors ignored. [Op:__inference_train_function_2048]

Function call stack:
train_function -> train_function


In [None]:
test_model(model)