In [1]:
import os
MODEL_DIR = '../models/siamese'
LOG_DIR = '../logs/04chest_2/'
SIZE = (224, 224)
if not os.path.exists(MODEL_DIR):
    os.mkdir(MODEL_DIR)
caminho_bases = os.path.join('..', 'bases', 'chest_xray')
caminho_train = os.path.join(caminho_bases, 'train')
caminho_test = os.path.join(caminho_bases, 'test')

# Callbacks

In [2]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, \
    ReduceLROnPlateau, TensorBoard

tensorboard_logs = TensorBoard(log_dir=LOG_DIR, histogram_freq=1,
                               write_graph=False, write_images=False,
                               update_freq='epoch')
mcp_save = ModelCheckpoint(os.path.join(MODEL_DIR, 
                                        '04modelweights-loss.{epoch:02d}-{loss:.2f}.hdf5'),
                           save_best_only=True, monitor='loss', mode='min')
early_stop = EarlyStopping(monitor='loss', patience=10, verbose=0, mode='min')
reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.5, patience=2,
                              verbose=1, mode='min')

# Model

In [3]:
import tensorflow as tf
from tensorflow.keras.layers import Activation, Conv2D, \
    BatchNormalization, concatenate, Dense, Dropout, Flatten, Input, MaxPooling2D
from tensorflow.keras.optimizers import Adam, RMSprop

SIZE = (224, 224)


nuclear_model = tf.keras.models.Sequential([
  tf.keras.layers.Conv2D(16, (3, 3),
                         padding='same',
                         activation='relu',
                         input_shape=(*SIZE, 1)),
  MaxPooling2D(pool_size=(2, 2)),
  Conv2D(64, (3, 3), padding='same', activation='relu'),
  MaxPooling2D(pool_size=(2, 2)),
  Dropout(0.2),
  Conv2D(64, (3, 3), padding='same', activation='relu'),
  MaxPooling2D(pool_size=(2, 2)),
  Dropout(0.25),
  Conv2D(128, (3, 3), padding='same', activation='relu'),
  MaxPooling2D(pool_size=(2, 2)),
  Dropout(0.25),
  Conv2D(256, (3, 3), activation='relu'),
  MaxPooling2D(pool_size=(2, 2)),
  Dropout(0.25),
  Conv2D(256, (3, 3), activation='relu'),
  Flatten(),
  Dense(128, activation='relu'),
 
])

nuclear_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 224, 224, 16)      160       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 112, 112, 16)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 112, 112, 64)      9280      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 56, 56, 64)        0         
_________________________________________________________________
dropout (Dropout)            (None, 56, 56, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 56, 56, 64)        36928     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 28, 28, 64)        0

In [4]:
a_in = Input(shape=(*SIZE, 1))
b_in = Input(shape=(*SIZE, 1))

a_feat = nuclear_model(a_in)
b_feat = nuclear_model(b_in)

combined_features = concatenate([a_feat, b_feat], name = 'merge_features')
combined_features = Dense(32, activation = 'linear')(combined_features)
# combined_features = Dropout(0.2)(combined_features)
combined_features = BatchNormalization()(combined_features)
combined_features = Activation('relu')(combined_features)
# combined_features = Dense(32, activation = 'relu')(combined_features)
# combined_features = Dropout(0.2)(combined_features)
# combined_features = BatchNormalization()(combined_features)
# combined_features = Activation('relu')(combined_features)
combined_features = Dense(1, activation = 'sigmoid')(combined_features)
similarity_model = tf.keras.Model(inputs = [a_in, b_in], outputs = [combined_features], name = 'Similarity_Model')
similarity_model.summary()

similarity_model.compile(optimizer=RMSprop(lr=0.001), loss = 'binary_crossentropy', metrics = ['mae'])

Model: "Similarity_Model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 1) 0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 224, 224, 1) 0                                            
__________________________________________________________________________________________________
sequential (Sequential)         (None, 128)          1529888     input_1[0][0]                    
                                                                 input_2[0][0]                    
__________________________________________________________________________________________________
merge_features (Concatenate)    (None, 256)          0           sequential[1][0]  

# Training

In [5]:
caminho_normal_train = os.path.join(caminho_train, 'NORMAL')
caminho_pneumonia_train = os.path.join(caminho_train, 'PNEUMONIA')
caminho_normal_test = os.path.join(caminho_test, 'NORMAL')
caminho_pneumonia_test = os.path.join(caminho_test, 'PNEUMONIA')


In [6]:
import random
import numpy as np
from PIL import Image

def generate_random_batch(nvazios_list, vazios_list, batch_size=64):
    def get_item(classe):
        if classe == 0:
            return nvazios_list.pop()
        return vazios_list.pop()
    result = []
    for i in range(batch_size):
        classe1 = random.randint(0, 1)
        item1 = get_item(classe1) 
        classe2 = random.randint(0, 1)
        item2 = get_item(classe2)
        if classe1 == classe2:
            if item1 == item2:
                alpha = 0
            else:
                alpha = 0.001
        else:
            alpha = 1
        result.append((item1, item2, alpha))
    return result  

def image_generator(caminho1, caminho2, batch_size=64):
    list_files1 = [filename for filename in os.listdir(caminho1)
                   if filename[-4:] in  ['.jpg', 'jpeg', '.png']]
    list_files2 = [filename for filename in os.listdir(caminho2)
                   if filename[-4:] in  ['.jpg', 'jpeg', '.png']]
    list1 = []
    list2 = []
    while True:
        # Ciclar lista (se acabar, recarregar do começo)
        if len(list1) < batch_size * 2:
            random.shuffle(list_files1)
            list1 = [os.path.join(caminho1, arq) for arq in list_files1]
        if len(list2) < batch_size * 2:
            random.shuffle(list_files2)
            list2 = [os.path.join(caminho2, arq) for arq in list_files2]
        # Gerar um batch da lista
        triples = generate_random_batch(list1, list2, batch_size)
        X1 = np.zeros((batch_size, *SIZE, 1))
        X2 = np.zeros((batch_size, *SIZE, 1))
        y = np.zeros((batch_size, 1))
        for ind, triple in enumerate(triples):
            pil_img1 = Image.open(triple[0]).convert('L')
            # print(pil_img1.size)
            # print(pil_img1.mode)
            # print(pil_img1.getbands())
            pil_img1 = pil_img1.resize(SIZE, Image.ANTIALIAS)
            pil_img2 = Image.open(triple[1]).convert('L')
            pil_img2 = pil_img2.resize(SIZE, Image.ANTIALIAS)
            label = triple[2]
            X1[ind, :, :, 0] = np.array(pil_img1) / 255.
            X2[ind, :, :, 0] = np.array(pil_img2) / 255.
            y[ind, :] = label
        
        yield [X1, X2], y



In [7]:
train_generator = image_generator(caminho_normal_train, caminho_pneumonia_train)
validation_generator = image_generator(caminho_normal_test, caminho_pneumonia_test)

epoch = 2
val_loss = 0.69

similarity_model.load_weights(os.path.join(
    MODEL_DIR, 
    '04modelweights-loss.{:02d}-{:.2f}.hdf5'.format(epoch, val_loss)
    )
)


In [8]:
similarity_model.fit_generator(train_generator,
                               steps_per_epoch=20,
                               epochs=100,
                               verbose=1,
                               callbacks=[tensorboard_logs, mcp_save,
                                         early_stop, reduce_lr],
                               validation_data=validation_generator,
                               validation_steps=5)

Epoch 1/100


W0718 10:49:46.921875 140045613893376 deprecation.py:323] From /home/ivan/pybr/projeto/venv/lib/python3.5/site-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 00035: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 00042: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 00046: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 47/100
Epoch 48/100
Epoch 00048: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100

KeyboardInterrupt: 

In [None]:
list_files1 = [filename for filename in os.listdir(caminho_normal_train)
                   if filename[-4:] in  ['.jpg', 'jpeg', '.png']]
list_files2 = [filename for filename in os.listdir(caminho_pneumonia_train)
                   if filename[-4:] in  ['.jpg', 'jpeg', '.png']]

In [None]:
print(len(list_files1))
print(list_files1)

In [None]:
print(len(list_files2))
print(list_files2)