In [1]:
import numpy as np 
import pandas as pd
import random
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from skimage.transform import resize
from keras.layers import *
from keras.models import *
from keras import layers

from keras.applications.vgg16 import VGG16
from keras.applications.vgg19 import VGG19
from keras.applications.resnet50 import ResNet50
from keras.applications.inception_v3 import InceptionV3
from keras.applications.inception_resnet_v2 import InceptionResNetV2
from keras.applications.xception import Xception

from keras.utils.data_utils import get_file
from keras import backend as K
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.optimizers import Adam

Using TensorFlow backend.


In [2]:
magnification_list = ['40X', '100X', '200X', '400X']
benign_list = ['adenosis', 'fibroadenoma', 'phyllodes_tumor', 'tubular_adenoma']
malignant_list = ['ductal_carcinoma', 'lobular_carcinoma', 'mucinous_carcinoma', 'papillary_carcinoma']
cancer_list = benign_list + malignant_list

In [3]:
def dense_to_one_hot(labels_dense, num_classes):
    num_labels = labels_dense.shape[0]
    index_offset = np.arange(num_labels) * num_classes
    labels_one_hot = np.zeros((num_labels, num_classes))
    labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
    return labels_one_hot

In [4]:
def data_split(magnification = '40X', validation_percent = 0.15, testing_percent = 0.15, encoding='Yes'):
    validation_percent = validation_percent
    testing_percent = testing_percent
    training_images = []
    training_labels = []
    validation_images = []
    validation_labels = []
    testing_images = []
    testing_labels = []
    for root, dirnames, filenames in os.walk(r"D:/Machine Learning/Datasets/BreakHist_Dataset/" + magnification):
        if filenames == []:
            continue
        else:
            str_length = len("D:/Machine Learning/Datasets/BreakHist_Dataset/40X/")
            #print(root)
            if root[str_length:str_length+6] == 'Benign':
                string_end = 58
            elif root[str_length:str_length+9] == 'Malignant':
                string_end = 61
            elif root[str_length+1:str_length+7] == 'Benign':
                string_end = 59
            else:
                string_end = 62
            name = root[string_end:]
            #print(name)
            #print(cancer_list.index(name))
            total_images = 0
            for names in filenames:
                total_images += 1
            print(name, magnification, total_images)
            validation_size = np.int(total_images*validation_percent)
            testing_size = np.int(total_images*testing_percent)
            training_size = total_images - (validation_size + testing_size)
            print(training_size, validation_size, testing_size, total_images)
            num = 0
            for names in filenames:
                num += 1
                filepath = os.path.join(root, names)
                #print(filepath)
                image = mpimg.imread(filepath)
                image_resize = resize(image,(115,175), mode = 'constant')
                if num in range(training_size):
                    training_images.append(image_resize[:,:,:])
                    training_labels.append(cancer_list.index(name))
                elif num in range(training_size,training_size+validation_size):
                    validation_images.append(image_resize[:,:,:])
                    validation_labels.append(cancer_list.index(name))
                elif num in range(training_size+validation_size,total_images):
                    testing_images.append(image_resize[:,:,:])
                    testing_labels.append(cancer_list.index(name))
    
    training_images = np.asarray(training_images)
    validation_images = np.asarray(validation_images)
    testing_images = np.asarray(testing_images)

    training_labels = np.asarray(training_labels)
    validation_labels = np.asarray(validation_labels)
    testing_labels = np.asarray(testing_labels)
    
    if encoding == 'Yes':
    
        labels_count = np.unique(training_labels).shape[0]
    
        training_labels = dense_to_one_hot(training_labels, labels_count)
        training_labels = training_labels.astype(np.float32)
        validation_labels = dense_to_one_hot(validation_labels, labels_count)
        validation_labels = validation_labels.astype(np.float32)
        testing_labels = dense_to_one_hot(testing_labels, labels_count)
        testing_labels = testing_labels.astype(np.float32)
    
    print(training_images.shape[0],validation_images.shape[0],testing_images.shape[0])
    
    return training_images, training_labels, validation_images, validation_labels, testing_images, testing_labels

In [25]:
image_width, image_height = 210, 180 #420, 360
num_classes = 8
dropout = 0.35

def xception_model(load_weights = True):
    base_model = Xception(include_top=False, weights='imagenet', input_tensor=None, input_shape=(image_width, image_height,3), pooling='max')
    x = base_model.output
    x = Dense(1024, activation='relu')(x)
    x = Dense(256, activation='relu')(x)
    x = Dense(64, activation='relu')(x)
    x = Dense(16, activation='relu')(x)
    x = Dense(classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=x)
    model.name = 'xception'
    
    return model

def vgg16_model(load_weights = True):
    base_model = VGG16(include_top=False, weights='imagenet', input_tensor=None, input_shape=(image_width, image_height,3), pooling='max')
    x = base_model.output
    x = Dense(1024, activation='relu')(x)
    x = Dense(256, activation='relu')(x)
    x = Dense(64, activation='relu')(x)
    x = Dense(16, activation='relu')(x)
    x = Dense(8, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=x)
    model.name = 'vgg16'
    
    return model

def vgg19_model(load_weights = True):
    if load_weights:
        base_model = VGG19(include_top=False, weights='imagenet', input_tensor=None, input_shape=(image_width, image_height,3), pooling='max')
    else:
        base_model = VGG19(include_top=False, weights=None, input_tensor=None, input_shape=(image_width, image_height,3), pooling='max')
    x = base_model.output
    x = Dense(1024, activation='relu')(x)
    x = Dense(256, activation='relu')(x)
    x = Dense(64, activation='relu')(x)
    x = Dense(16, activation='relu')(x)
    x = Dense(8, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=x)
    model.name = 'vgg19'
    return model

def resnet50_model(load_weights = True):
    base_model = ResNet50(include_top=False, weights='imagenet', input_tensor=None, input_shape=(image_width, image_height,3), pooling='avg')
    x = base_model.output
    x = Dense(1024, activation='relu')(x)
    x = Dense(256, activation='relu')(x)
    x = Dense(64, activation='relu')(x)
    x = Dense(16, activation='relu')(x)
    x = Dense(8, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=x)
    model.name = 'resnet'
    return model

def inception_model(load_weights = True):
    base_model = InceptionV3(include_top=False, weights='imagenet', input_tensor=None, input_shape=(image_width, image_height,3), pooling='avg')
    x = base_model.output
    x = Dense(1024, activation='relu')(x)
    x = Dense(256, activation='relu')(x)
    x = Dense(64, activation='relu')(x)
    x = Dense(16, activation='relu')(x)
    x = Dense(8, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=x)
    model.name = 'inception'
    return model

def inception_resnet_model(load_weights = True):
    base_model = InceptionResNetV2(include_top=False, weights='imagenet', input_tensor=None, input_shape=(image_width, image_height,3), pooling='avg')
    x = base_model.output
    x = Dense(1024, activation='relu')(x)
    x = Dense(256, activation='relu')(x)
    x = Dense(64, activation='relu')(x)
    x = Dense(16, activation='relu')(x)
    x = Dense(classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=x)
    model.name = 'inception_resnet'
    return model


In [26]:
models = [vgg16_model, vgg19_model, xception_model, resnet50_model, inception_model, inception_resnet_model]

In [34]:
def compile_n_fit(validation_percent, testing_percent, load_wt, image_width=175, image_height=115,dropout = 0.3, model_name = 'vgg16_model', magnification = '40X'):
    training_images, training_labels, validation_images, validation_labels, testing_images, testing_labels = data_split(magnification = magnification, validation_percent = validation_percent, testing_percent = testing_percent)
    for i in range(len(models)):
        if models[i].__name__ == model_name:
            model = models[i]
    
    model = model(load_weights = load_wt)
    try:
        model.load_weights(model_name + '_weight_1.h5')
        print('Weights loaded!')
    except:
        print('No weights defined!')
#         pass
    
    model.compile(loss="categorical_crossentropy", optimizer=Adam(lr=0.0001), metrics=['accuracy'])
    early_stopping = EarlyStopping(patience=10, verbose=2)
    model_checkpoint = ModelCheckpoint(model_name + "_combine" +".model", save_best_only=True, verbose=2)
    reduce_lr = ReduceLROnPlateau(factor=0.1, patience=5, verbose=2) #min_lr=0.00001,

    epochs = 100
    batch_size = 32

    history = model.fit(training_images, training_labels,
                        validation_data=[validation_images, validation_labels], 
                        epochs=epochs,
                        verbose = 0,
                        batch_size=batch_size,
                        callbacks=[early_stopping, model_checkpoint, reduce_lr])

    test_loss, test_acc = model.evaluate(testing_images, testing_labels)
    
    model.save_weights(model_name + '_weight_1.h5')
    
    print("\nThe test accuracy for " + model_name + " with magnification "+ magnification +" is ", test_acc, "\n")


In [35]:
model_num = 1
name = models[model_num].__name__
iteration = 0
for types in magnification_list:
    if iteration == 0:
        load_wt = "Yes"
    else:
        load_wt = "No"
    compile_n_fit(validation_percent=0.15, testing_percent=0.15,
                    image_width=175, image_height=115, dropout = 0.3,
                    load_wt=load_wt, model_name = name, magnification = types)
    iteration += 1

adenosis 40X 114
80 17 17 114
fibroadenoma 40X 253
179 37 37 253
phyllodes_tumor 40X 109
77 16 16 109
tubular_adenoma 40X 149
105 22 22 149
ductal_carcinoma 40X 864
606 129 129 864
lobular_carcinoma 40X 156
110 23 23 156
mucinous_carcinoma 40X 205
145 30 30 205
papillary_carcinoma 40X 145
103 21 21 145
1397 295 295
No weights defined!

Epoch 00001: val_loss improved from inf to 1.89395, saving model to vgg19_model_combine.model

Epoch 00002: val_loss improved from 1.89395 to 1.74137, saving model to vgg19_model_combine.model

Epoch 00003: val_loss improved from 1.74137 to 1.71194, saving model to vgg19_model_combine.model

Epoch 00004: val_loss did not improve from 1.71194

Epoch 00005: val_loss did not improve from 1.71194

Epoch 00006: val_loss did not improve from 1.71194

Epoch 00007: val_loss did not improve from 1.71194

Epoch 00008: val_loss did not improve from 1.71194

Epoch 00008: ReduceLROnPlateau reducing learning rate to 9.999999747378752e-06.

Epoch 00009: val_loss did no