In [6]:
from PIL import Image
from glob import glob
import numpy as np
from os.path import basename
import matplotlib.pylab as plt
import fnmatch
import cv2
import itertools
import pandas as pd
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, make_scorer, accuracy_score
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras import optimizers
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import History
from imblearn.under_sampling import RandomUnderSampler
from keras.utils.np_utils import to_categorical

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [27]:
#Path of the files:
path = "/home/MariaNuila/*/*/*.png"


In [28]:
pictures = glob(path)

#Separate images based on classification
nonIDC =  fnmatch.filter(pictures, '*class0.png')
IDC = fnmatch.filter(pictures, '*class1.png')

In [29]:
width, height = 50, 50
num_classes = 2
input_shape = (50,50,3)
channels = 3

In [30]:
#Create training and testing data set from pictures
x = []
y = [] #labels
#Attempt to get better depiction of dataset by grabbing from beginning, middle, and end
for pic in pictures[0:30000]:
    image = cv2.imread(pic)
    x.append(cv2.resize(image, (width, height), interpolation=cv2.INTER_CUBIC))
    if pic in nonIDC:
        y.append(0)
    if pic in IDC:
        y.append(1)
        
for pic in pictures[70000:100000]:
    image = cv2.imread(pic)
    x.append(cv2.resize(image, (width, height), interpolation=cv2.INTER_CUBIC))
    if pic in nonIDC:
        y.append(0)
    if pic in IDC:
        y.append(1)
        
for pic in pictures[140000:170000]:
    image = cv2.imread(pic)
    x.append(cv2.resize(image, (width, height), interpolation=cv2.INTER_CUBIC))
    if pic in nonIDC:
        y.append(0)
    if pic in IDC:
        y.append(1)
        
for pic in pictures[240000:270000]:
    image = cv2.imread(pic)
    x.append(cv2.resize(image, (width, height), interpolation=cv2.INTER_CUBIC))
    if pic in nonIDC:
        y.append(0)
    if pic in IDC:
        y.append(1)
        
#Place in dataframe:
df = pd.DataFrame()
df["images"] = x
df["labels"] = y

In [31]:
#Vectorize and normalize data before training split
x =np.array(x)
print(x.shape)

(120000, 50, 50, 3)


In [32]:
x = x / 255.0
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size=0.2)

#Convert our training and testing data into np arrays:
xtrain_npArray = np.array(X_train)
xtest_npArray = np.array(X_test)
ytrain_npArray = np.array(Y_train)
ytest_npArray = np.array(Y_test)
# convert class vectors to binary class matrices
ytrain_npArray = keras.utils.to_categorical(ytrain_npArray, num_classes)
ytest_npArray = keras.utils.to_categorical(ytest_npArray, num_classes)

In [33]:
#Specify Data training information: 
batch_size = 128
epochs = 15

In [34]:
print(xtrain_npArray.shape)
print(ytest_npArray.shape)

(96000, 50, 50, 3)
(24000, 2)


# Preprocess images further to fine-tune transfer learning

In [35]:
# Deal with imbalanced class sizes below
# Make Data 1D for compatability upsampling methods
X_trainShape = X_train.shape[1]*X_train.shape[2]*X_train.shape[3]
X_testShape = X_test.shape[1]*X_test.shape[2]*X_test.shape[3]
X_trainFlat = X_train.reshape(X_train.shape[0], X_trainShape)
X_testFlat = X_test.reshape(X_test.shape[0], X_testShape)

In [36]:
print("X_train Shape: ",X_train.shape)
print("X_test Shape: ",X_test.shape)
print("X_trainFlat Shape: ",X_trainFlat.shape)
print("X_testFlat Shape: ",X_testFlat.shape)

('X_train Shape: ', (96000, 50, 50, 3))
('X_test Shape: ', (24000, 50, 50, 3))
('X_trainFlat Shape: ', (96000, 7500))
('X_testFlat Shape: ', (24000, 7500))


In [37]:
#Use random under sampling to deal with imbalance of dataset
rus = RandomUnderSampler(ratio='auto')
X_trainRus, Y_trainRus = rus.fit_sample(X_trainFlat, Y_train)
X_testRus, Y_testRus = rus.fit_sample(X_testFlat, Y_test)

Y_trainRusCat = to_categorical(Y_trainRus, num_classes = 2)
Y_testRusCat = to_categorical(Y_testRus, num_classes = 2)

In [38]:
#Check that our under sampling balances the dataset
print("X_trainRos Shape: ",X_trainRus.shape)
print("Y_trainRosHot Shape: ",Y_trainRusCat.shape)

('X_trainRos Shape: ', (52096, 7500))
('Y_trainRosHot Shape: ', (52096, 2))


In [39]:
#Resize all our images to the correct input size:
for i in range(len(X_trainRus)):
    height, width, channels = 50,50,3
    X_trainRusReshaped = X_trainRus.reshape(len(X_trainRus),height,width,channels)
    
for i in range(len(X_testRus)):
    height, width, channels = 50,50,3
    X_testRusReshaped = X_testRus.reshape(len(X_testRus),height,width,channels)

# Transfer Learning with VGG16 Keras - Fine Tuned

In [40]:
vgg16_model = keras.applications.vgg16.VGG16(weights='imagenet', include_top=False, input_shape=input_shape)


In [41]:
vgg16_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 50, 50, 3)         0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 50, 50, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 50, 50, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 25, 25, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 25, 25, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 25, 25, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 12, 12, 128)       0         
__________

In [42]:
# Make our fine-tuned model sequential for easier processing
fine_tuned = Sequential()
#Add vgg layers to our model
for layers in vgg16_model.layers:
    fine_tuned.add(layers)

In [43]:
#Adapt the previous transfer learning and allow the last 3 layers to be trainable, in hope for better results
for layers in fine_tuned.layers[:-3]:
    layers.trainable = False

In [44]:
#Now add our fully connected layer: (adding another dropout for overfitting)
fine_tuned.add(Flatten())
fine_tuned.add(Dense(64, activation='relu'))
fine_tuned.add(Dropout(0.3))
fine_tuned.add(Dense(64, activation='relu'))
fine_tuned.add(Dropout(0.5))
fine_tuned.add(Dense(num_classes, activation='softmax'))

In [45]:
fine_tuned.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 50, 50, 3)         0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 50, 50, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 50, 50, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 25, 25, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 25, 25, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 25, 25, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 12, 12, 128)       0         
__________

In [48]:
#use Adam optimizer based on stochastic optimization (Adam):
fine_tuned.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=keras.optimizers.Adam(lr=0.00146),
                  metrics=['accuracy'])

In [54]:
history = fine_tuned.fit(xtrain_npArray, ytrain_npArray,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(xtest_npArray, ytest_npArray))

Train on 96000 samples, validate on 24000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [55]:
score = fine_tuned.evaluate(X_testRusReshaped, Y_testRusCat, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

('Test loss:', 0.8075543790521421)
('Test accuracy:', 0.5)


In [56]:
def plot_loss_accuracy(history):
    fig = plt.figure(figsize=(12, 6))
    ax = fig.add_subplot(1, 2, 1)
    ax.plot(history.history["loss"],'r-x', label="Train Loss")
    ax.plot(history.history["val_loss"],'b-x', label="Validation Loss")
    ax.legend()
    ax.set_title('cross_entropy loss')
    ax.grid(True)


    ax = fig.add_subplot(1, 2, 2)
    ax.plot(history.history["acc"],'r-x', label="Train Accuracy")
    ax.plot(history.history["val_acc"],'b-x', label="Validation Accuracy")
    ax.legend()
    ax.set_title('accuracy')
    ax.grid(True)

In [None]:
plot_loss_accuracy(history)

# Predict our "fine-tuned" vgg16 keras model

In [59]:
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.figure(figsize = (5,5))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=90)
    plt.yticks(tick_marks, classes)
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
y_pred = fine_tuned.predict(X_testRusReshaped)
map_characters = {0: 'IDC(-)', 1: 'IDC(+)'}


In [None]:
print('\n', classification_report(np.where(Y_testRusCat> 0)[1], np.argmax(y_pred, axis=1),target_names=list(map_characters.values())), sep='')

In [None]:
Y_pred_classes = np.argmax(y_pred,axis=1) 
Y_true = np.argmax(Y_testRusCat,axis=1)

In [None]:
dict_characters = {0: 'IDC(-)', 1: 'IDC(+)'}
confusion_mtx = confusion_matrix(Y_true, Y_pred_classes) 
plot_confusion_matrix(confusion_mtx, classes = list(dict_characters.values())) 
plt.show()