In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, GlobalAveragePooling2D
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers.experimental import preprocessing
from cnnArchitectures.Xception import get_xception_model
import os
import datetime

%matplotlib inline
%reload_ext tensorboard

In [4]:
# Data paths
# E.g. D:\DATASETS\ALL_2\training\fold_0\all
dataset_path = 'D://DATASETS/ALL_2'
data_paths = [
    dataset_path + '/training/fold_0/',
    dataset_path + '/training/fold_1/',
    dataset_path + '/training/fold_2/'
]
image_format = ".bmp"

data_paths = list(map(os.path.abspath, data_paths))

NameError: name 'os' is not defined

There are naming convention in ALL_2 dataset
<img src="ipynb_resources/ALL_CELLS_PHOTO_NAME_CONVENTIONS.jpg">
<img src="ipynb_resources/NORMAL_CELLS_PHOTO_NAME_CONVENTIONS.jpg">

In [None]:
# Set up dataset parameters
BATCH_SIZE = 16
IMAGE_SIZE = (256, 256)
SEED = 322
PREFETCH_BUFFER_SIZE = 100
SHUFFLE_BUFFER_SIZE = 1000
CACHE_DIR = "caches/ds_cache"
ds_params = dict(
    labels="inferred",
    label_mode="binary",
    class_names=["all", "hem"],
    color_mode="rgb",
    batch_size=BATCH_SIZE,
    image_size=IMAGE_SIZE,
    seed=SEED
)

In [None]:
# Sweet way to load data
training_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_paths[0], subset="training", shuffle=True,
    validation_split=0.2, **ds_params 
)
validation_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_paths[0], subset="validation",
    validation_split=0.2, **ds_params
)
test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_paths[0], **ds_params
)
# calculate class scewness
path_iter = os.walk(data_paths[0])
number_of_all_examples, number_of_hem_examples = 0, 0
for item in path_iter:
    if 'all' in item[0]:
        number_of_all_examples = len(item[2])
    elif 'hem' in item[0]:
        number_of_hem_examples = len(item[2])
class_weights = {0: 1, 1: number_of_all_examples/number_of_hem_examples}
print(class_weights)

In [None]:
def preprocess_image(image):
    normalization_layer = tf.keras.layers.experimental.preprocessing.Rescaling(1./255)
    result = normalization_layer(image)
    #result = tf.image.resize(result, IMAGE_SIZE)
    #result = tf.image.adjust_brightness(result, 0.1)
    #result = tf.image.adjust_contrast(result, 1.3)
    #result = tf.image.adjust_gamma(result, 1.5, 1)
    return result

In [None]:
# num_parallel_calls seems not working?
training_ds = training_ds.map(lambda x,y: (preprocess_image(x), y), 
                              num_parallel_calls=tf.data.experimental.AUTOTUNE
                             )
validation_ds = validation_ds.map(lambda x,y: (preprocess_image(x), y),
                                 num_parallel_calls=tf.data.experimental.AUTOTUNE
                                 )
test_ds = test_ds.map(lambda x,y: (preprocess_image(x), y),
                                 num_parallel_calls=tf.data.experimental.AUTOTUNE
                                 )

training_ds, validation_ds = training_ds.unbatch(), validation_ds.unbatch()

training_ds = training_ds.batch(BATCH_SIZE, drop_remainder=True)
validation_ds = validation_ds.batch(BATCH_SIZE, drop_remainder=True)

training_ds = training_ds.cache(CACHE_DIR).prefetch(PREFETCH_BUFFER_SIZE)
validation_ds = validation_ds.cache(CACHE_DIR).prefetch(PREFETCH_BUFFER_SIZE)

In [None]:
for batch in training_ds.take(1):
    print(batch)

In [None]:
def CNN_model0(input_shape):
    seed = 32
    weight_initializer = tf.keras.initializers.GlorotNormal(seed=seed)
    model = tf.keras.Sequential()
    
    model.add(Conv2D(32, (3,3), strides=(1,1), padding="valid", 
                     input_shape=input_shape, kernel_initializer=weight_initializer))
    model.add(Activation("relu"))
    model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
    
    model.add(Conv2D(32, (3,3), strides=(1,1), padding="valid", 
              kernel_initializer=weight_initializer))
    model.add(Activation("relu"))
    model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
    
    model.add(Conv2D(64, (3,3), strides=(1,1), padding="valid", 
              kernel_initializer=weight_initializer))
    model.add(Activation("relu"))
    model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
    
    model.add(Flatten())
    
    model.add(Dense(64))
    model.add(Activation("relu"))
    model.add(Dense(64))
    
    model.add(Dropout(0.5))
    
    model.add(Dense(1))
    model.add(Activation("sigmoid"))
    
    return model

In [None]:
def CNN_model(input_shape):
    model = tf.keras.Sequential()
    model.add(Conv2D(96, (11, 11), strides=(4, 4), padding="valid", input_shape=input_shape))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    
    model.add(Conv2D(256, (5, 5), padding="same"))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    
    model.add(Conv2D(384, (3, 3), padding="same"))
    model.add(Activation('relu'))
    
    model.add(Conv2D(384, (3, 3), padding="same"))
    model.add(Activation('relu'))
    
    model.add(Conv2D(256, (3, 3), padding="same"))
    model.add(Activation('relu'))
    
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    
    model.add(Flatten())
    #print(model.summary())
    model.add(Dense(2048))
    model.add(Activation('sigmoid'))
    model.add(Dense(2048))
    #model.add(Dropout(0.5))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    return model

In [None]:
model = CNN_model0(IMAGE_SIZE + (3,))

In [None]:
sgd = tf.keras.optimizers.SGD(lr=0.005, decay=1e-6, momentum=0.9, nesterov=True)
rms = tf.keras.optimizers.RMSprop(
      lr=0.001, rho=0.9, momentum=0.7, centered=True)
adam = tf.keras.optimizers.Adam(learning_rate=0.005, amsgrad=True)
adadelta = tf.keras.optimizers.Adadelta(learning_rate=0.001, rho=0.95)
adagrad = tf.keras.optimizers.Adagrad(learning_rate=0.001,initial_accumulator_value=0.1)
adamax = tf.keras.optimizers.Adamax(learning_rate=0.001)
nadam = tf.keras.optimizers.Nadam(learning_rate=0.05)
ftrl = tf.keras.optimizers.Ftrl(learning_rate=0.001,
                                learning_rate_power=-0.5,
                                initial_accumulator_value=0.1)

In [None]:
METRICS = [
    tf.keras.metrics.Accuracy(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), 
    tf.keras.metrics.AUC()
]

In [None]:
model.compile(
  optimizer=adam, 
  loss=tf.keras.losses.BinaryCrossentropy(),
  metrics=METRICS)
model.summary()

In [None]:
#for layer in model.layers[:85]:
   #layer.trainable = False
#for layer in model.layers[85:]:
  # layer.trainable = True

In [None]:
#Set up TB logs
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, 
                                                     profile_batch = '500,520')
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath="weights/",
    save_weights_only=True,
    monitor='val_precision',
    mode='max',
    save_best_only=True)

#class_weights = {0: 1, 1: 1}
# Fit model
EPOCHS = 10
hist = model.fit(training_ds,
      epochs=EPOCHS,
      validation_data=validation_ds,
      callbacks=[tensorboard_callback, model_checkpoint_callback],
      class_weight=class_weights
      )

In [None]:
plt.figure()
plt.ylabel("Loss (training and validation)")
plt.xlabel("Training Steps")
plt.ylim([0,2])
plt.plot(hist.history["loss"])
plt.plot(hist.history["val_loss"])

plt.figure()
plt.ylabel("Accuracy (training and validation)")
plt.xlabel("Training Steps")
plt.ylim([0,1])
plt.plot(hist.history["accuracy"])
plt.plot(hist.history["val_accuracy"])

In [None]:
all_false, all_true, hem_false, hem_true = 0, 0, 0, 0
prediction_scores = model.predict(test_ds.map(lambda x, _ : x))
print("Predicted")
count = 0
for item in test_ds:
    values = item[1]
    for ind in range(len(values)):
        if values[ind] == 0:
            if prediction_scores[count * 32 + ind] <= 0.5:
                all_true += 1
            else:
                all_false += 1
        else:
            if prediction_scores[count * 32 + ind] > 0.5:
                hem_true += 1
            else:
                hem_false += 1
#plt.figure()
#plt.imshow(batch[0][ind])
#plt.xlabel(get_class_string_from_index(true_index) + "\n" + 
          #get_class_string_from_index(predicted_index))

    

In [None]:
print(np.max(prediction_scores))

In [None]:
print("True ALL: ", all_true)
print("False ALL: ", all_false)
print("True/(Total_all): {:.4f}".format(all_true / (all_false+all_true)))
print("#"*60)
print("True hem: ", hem_true)
print("False hem: ", hem_false)
print("True/(Total_hem): {:.4f}".format(hem_true / (hem_false+hem_true)))
print("#"*60)
print("Total true: ", all_true + hem_true)
print("Total false: ", all_false + hem_false)
print("True/Total: {:.4f}".format((all_true + hem_true) / (hem_false+hem_true+all_false+all_true)))

In [2]:
import pandas as pd

In [5]:
dataset_path

'D://DATASETS/ALL_2'

In [8]:
labels = pd.read_csv(
    dataset_path + "/C-NMC_test_prelim_phase_data/C-NMC_test_prelim_phase_data_labels.csv"
)

In [15]:
# 1 - all
# 0 - hem
print(labels.head())

             Patient_ID new_names  labels
0   UID_57_29_1_all.bmp     1.bmp       1
1   UID_57_22_2_all.bmp     2.bmp       1
2   UID_57_31_3_all.bmp     3.bmp       1
3  UID_H49_35_1_hem.bmp     4.bmp       0
4   UID_58_6_13_all.bmp     5.bmp       1
