In [0]:
import os
import pickle
import numpy as np
import pandas as pd
import glob
import matplotlib.pyplot as plt
import seaborn as sns
import keras
from PIL import Image as pil_image
sns.set_style('white')
%matplotlib inline

from keras.applications.mobilenet import MobileNet
#, relu6, DepthwiseConv2D
from keras.preprocessing import image
from keras.layers import AveragePooling2D, Conv2D, UpSampling2D, DepthwiseConv2D
from keras.models import load_model, Model
#from utils.keras_utils import preprocess_input_tf, center_crop
from keras.utils.generic_utils import CustomObjectScope
import itertools
from collections import Counter
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score, confusion_matrix, cohen_kappa_score

In [0]:
def plot_confusion_matrix(cm, classes, figname=None, normalize=False, title=None, cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    (This function is copied from the scikit docs.)
    """
    plt.figure(figsize=(7,7))
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45, fontsize=18)
    plt.yticks(tick_marks, classes, fontsize=18)
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    print(cm)
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        if normalize:
            plt.text(j, i, '%.2f' % cm[i, j], horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black",
                     fontsize=16)
        else:
            plt.text(j, i, cm[i, j], horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black",
                     fontsize=16)
    plt.tight_layout()
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    if title is not None:
        plt.title(title)
    if figname is None:
        plt.show()
    else:
        plt.savefig(figname)
        plt.close()

In [0]:
def plot_stats(y_true, y_pred, n_class=2):
    col = sns.color_palette()

    fig, axarr = plt.subplots(n_class, n_class, sharex=True, figsize=(10,10))
    for i_true in range(n_class):
        v = y_pred[y_true == i_true]
        for j_pred in range(n_class):
            axarr[i_true, j_pred].hist(v[:, j_pred], 10, normed=False, color=col[i_true])
            axarr[i_true, j_pred].set_title('class=%d, prediction=%d' % (i_true, j_pred))
    plt.setp([a.get_xticklabels() for a in axarr[0, :]], visible=False)
    plt.tight_layout()
    plt.show()

In [0]:
def get_filenames_and_classes(csv_path):
    df = pd.read_csv(csv_path, sep='\t', index_col=0)
    filenames, classes = [], []
    for base_name, primary_grade, sec_grade in zip(df.index, df.iloc[:,0], df.iloc[:,1]):
        primary_grade, sec_grade = int(primary_grade), int(sec_grade)
        filenames.append(base_name)
        classes.append(np.array([primary_grade, sec_grade]).reshape(1,2))
    classes = np.vstack(classes)
    return filenames, classes

In [0]:
init_dim = 250
target_dim = 224
target_size = (target_dim, target_dim)
input_shape = (target_size[0], target_size[1], 3)
bs = 16

# classes
class_labels = ['Gleason 3','Gleason 3']
n_class = len(class_labels)
#change this for your path
prefix = '/home/fabianleon/Documentos/gleason_CNN-master/dataset'
patch_dir = os.path.join(prefix, 'train_validation_patches_750')

In [0]:
tma = 'ZT76'
csv_path = os.path.join(prefix, 'tma_info', '%s_gleason_scores.csv' % tma)
val_filenames, val_classes = get_filenames_and_classes(csv_path)
print('Total TMAs in validation set: %d' % len(val_filenames))

In [0]:
# load model
# chang this for your path
model_weights = '/home/fabianleon/Documentos/gleason_CNN-master/model_weights/class1y2incep/model_01.h5'
with CustomObjectScope({'relu6': keras.layers.ReLU(6.),'DepthwiseConv2D': keras.layers.DepthwiseConv2D}):
    model = load_model(model_weights)

In [0]:
# test filenames
# change this for patho_1 or patho_2
test_patch_dir = os.path.join(prefix, 'test_patches_750', 'patho_1')
test_filenames = [f for f in os.listdir(test_patch_dir) if f.startswith('ZT80')]
len(test_filenames)

In [0]:
patches_csv_path = os.path.join(prefix, 'tma_info', 'ZT80_patch_grades.csv')
df_patch = pd.read_csv(patches_csv_path, sep='\t', index_col=0)
true_grades = df_patch.values
test_patch_labels = []
test_patch_names = []
# make predictions on test cohort patches
# change df_patch.iloc[:,0]  0 for pathologist 1 and 1 for pathologist 2
for i, (patch_name, y_true) in enumerate(zip(df_patch.index, df_patch.iloc[:,0])):
    if((y_true == 0) | (y_true == 1)):
        spot_name = patch_name.split('_patch_')[0]
        full_name = os.path.join(test_patch_dir, spot_name, patch_name+'_class_%d.jpg' % y_true)
        test_patch_labels.append(y_true)
        test_patch_names.append(full_name)

y_pred_prob = np.zeros((len(test_patch_names), 2))
for i, patch_name in enumerate(test_patch_names):
    img = image.load_img(patch_name, grayscale=False, target_size=(224, 224))
    X = image.img_to_array(img)
    y_pred_prob[i] = model.predict(X[np.newaxis,:,:,:], batch_size=1)[0]
y_pred = np.argmax(y_pred_prob, axis=1)
y_true = np.array(test_patch_labels)

In [0]:
y_true[y_true ==1] = 0
y_true[y_true ==2] = 1

In [0]:
# compute confusion matrix for test set - CNN vs pathologist's annotations
cm = confusion_matrix(y_true, y_pred)
plot_confusion_matrix(cm, class_labels, figname=None, normalize=True, cmap=plt.cm.BuGn)
kappa_p1 = cohen_kappa_score(y_true, y_pred, weights='quadratic')
print('\033[1m' + '\t\t\tk = ' + ('%.2f' % kappa_p1))  