In [1]:
from keras.models import *
from keras.layers import *
from keras.callbacks import *
from keras.optimizers import *
from keras.applications import *
from keras.preprocessing.image import *

from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot

from glob import glob
from tqdm import tqdm
import numpy as np
import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

from sklearn.metrics import *

import multiprocessing
from multiprocessing.dummy import Pool
import pprint

Using TensorFlow backend.


In [2]:
import horovod.keras as hvd
from keras import backend as K

hvd.init()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.visible_device_list = str(hvd.local_rank())
K.set_session(tf.Session(config=config))

In [None]:
epochs = 10
model_name = 'irv2'
f = 2
width = int(2560 / f)
height = int(1920 / f)
batch_size = 1
random_seed = 20180719

fnames = glob('data/*/*.jpg')

np.random.seed(random_seed)
np.random.shuffle(fnames)

np.random.seed()
n = len(fnames)
n_train = int(n*0.8)
fnames_train = fnames[:n_train]
np.random.shuffle(fnames_train)
fnames[:n_train] = fnames_train

print(fnames[:5])

class0 = len(glob('data/normal/*.jpg'))
class1 = len(glob('data/abnormal/*.jpg'))



X = np.zeros((n, height, width, 3), dtype=np.uint8)
y = np.zeros((n, 1), dtype=np.uint8)

def f(index):
    return index, cv2.resize(cv2.imread(fnames[index]), (width, height))

# with Pool(multiprocessing.cpu_count()) as pool:
#     with tqdm(pool.imap_unordered(f, range(n)), total=n) as pbar:
#         for i, img in pbar:
#             X[i] = img
#             if 'abnormal' in fnames[i]:
#                 y[i] = 1
#             else:
#                 y[i] = 0

for i in tqdm(range(n)):
    X[i] = f(i)[1]
    if 'abnormal' in fnames[i]:
        y[i] = 1
    else:
        y[i] = 0

In [None]:
# from sklearn.model_selection import train_test_split
# X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2)

X_train = X[:n_train]
y_train = y[:n_train]
X_valid = X[n_train:]
y_valid = y[n_train:]

In [None]:
class RocAucMetricCallback(Callback):
    def __init__(self):
        super(RocAucMetricCallback, self).__init__()

    def on_epoch_end(self, epoch, logs={}):
        logs['roc_auc_val']=float('-inf')
        y_pred = model.predict(X_valid, batch_size=4)
        auc = roc_auc_score(y_valid, y_pred)
        acc = accuracy_score(y_valid[:,0], y_pred[:,0] > 0.5)
        print(f'ACC: {acc:.4f} AUC: {auc:.4f}')

In [None]:
input_tensor = Input((height, width, 3))

x = Lambda(inception_resnet_v2.preprocess_input)(input_tensor)
base_model = InceptionResNetV2(include_top=False, weights='imagenet', input_tensor=x)

x = GlobalAvgPool2D()(base_model.output)
x = Dropout(0.5)(x)
x = Dense(1, activation='sigmoid')(x)
model = Model(input_tensor, x)

In [None]:
# SVG(model_to_dot(model, show_shapes=True).create(prog='dot', format='svg'))

In [None]:
opt = Adam(1e-3)
opt = hvd.DistributedOptimizer(opt)

callbacks = [
    # Horovod: broadcast initial variable states from rank 0 to all other processes.
    # This is necessary to ensure consistent initialization of all workers when
    # training is started with random weights or restored from a checkpoin
    hvd.callbacks.BroadcastGlobalVariablesCallback(0),

    # Horovod: average metrics among workers at the end of every epoch.
    #
    # Note: This callback must be in the list before the ReduceLROnPlateau,
    # TensorBoard or other metrics-based callbacks.
    hvd.callbacks.MetricAverageCallback(),

    # Horovod: using `lr = 1.0 * hvd.size()` from the very beginning leads to worse final
    # accuracy. Scale the learning rate `lr = 1.0` ---> `lr = 1.0 * hvd.size()` during
    # the first five epochs. See https://arxiv.org/abs/1706.02677 for details.
    hvd.callbacks.LearningRateWarmupCallback(warmup_epochs=5, verbose=1),
]

if hvd.rank() == 0:
    callbacks.append(ModelCheckpoint('models/%s-{epoch}.h5' % model_name))
    callbacks.append(RocAucMetricCallback())

In [None]:
model.compile(optimizer=opt, 
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, callbacks=callbacks, 
          verbose=int(hvd.rank() == 0))

In [None]:
y_pred = model.predict(X_valid, verbose=1, batch_size=1)
pprint(roc_auc_score(y_valid, y_pred), accuracy_score(y_valid[:,0], y_pred[:,0] > 0.5))

In [None]:
pprint(confusion_matrix(y_valid > 0.5, y_pred > 0.5))

In [None]:
pprint(y_pred)