# Model Evaluation
Will formalize into functions later

In [1]:
%load_ext autotime

In [2]:
import pandas as pd
import numpy as np
import pickle

import keras
from keras.utils import np_utils
from keras.utils import to_categorical
from keras.models import load_model

from pathlib import Path
from matplotlib import pyplot as plt
%matplotlib inline

import utils

Using TensorFlow backend.


time: 6.22 s


In [3]:
import tensorflow as tf
from keras import backend as K
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
sess = tf.Session(config=config)
K.set_session(sess)

time: 1.69 s


In [4]:
x_test, y_test = utils.read_mura_pickle(sample='valid')

INFO:utils.utils:loading data/MURA-v1.1/x_valid.pkl
INFO:utils.utils:loading data/MURA-v1.1/y_valid.pkl


time: 1.01 s


In [5]:
size = x_test.shape[1]
x_test = x_test.reshape(x_test.shape[0], size, size, 1)
x_test = utils.normalize_pixels(x_test)
y_test = to_categorical(y_test)

time: 362 ms


In [6]:
with tf.device('/GPU:1'):
    model = load_model('./trained_models/1_3_densenet201_mura_250.h5')

time: 44.5 s


In [7]:
y_test_hat = model.predict(x_test)

time: 27.8 s


In [8]:
true_label = np.argmax(y_test, axis=1)
pred_label = np.argmax(y_test_hat, axis=1)

time: 2.91 ms


In [9]:
from sklearn.metrics import confusion_matrix

class MURAMetrics():
    
    def __init__(self, true_label, pred_label):
        assert true_label.shape[0] == pred_label.shape[0], (
            'true_label and pred_laben must have the same length!')
        self.y = true_label
        self.yhat = pred_label
        self.N = true_label.shape[0]
        self.cm = confusion_matrix(true_label, pred_label)
        
    def accuracy(self):
        return (self.cm[0, 0] + self.cm[1, 1]) / self.N

    def kappa(self):
        p_observed = self.accuracy()
        p_expected = (
            self.cm[1, :].sum() * self.cm[:, 1].sum() 
            + self.cm[0, :].sum() * self.cm[:, 0].sum()) / self.N**2
        return (p_observed - p_expected) / (1 - p_expected)
    
    def precision_and_recall(self):
        return {
            'precision': self.cm[1, 1] / self.cm[:, 1].sum(),
            'recall': self.cm[1, 1] / self.cm[1, :].sum()
        }

time: 11.5 ms


In [24]:
metrics = MURAMetrics(true_label, pred_label)

time: 13.1 ms


In [25]:
metrics.accuracy()

0.7269314982796372

time: 5.12 ms


In [26]:
metrics.kappa()

0.4419623942875436

time: 4.89 ms


In [27]:
metrics.precision_and_recall()

{'precision': 0.9090909090909091, 'recall': 0.477124183006536}

time: 4.98 ms


## Compute metrics by patient and study

In [15]:
with Path('./data/MURA-v1.1/valid_groups.pkl').open('rb') as pkl_file:
    valid_groups = pickle.load(pkl_file)

time: 5.36 ms


In [19]:
valid_groups['pred_label'] = pred_label

time: 2.6 ms


In [20]:
valid_groups.columns

Index([             0,    'body_part',   'patient_id',     'study_id',
       'target_label',     'image_id',   'pred_label'],
      dtype='object')

time: 4.1 ms


In [37]:
valid_grouped = valid_groups.groupby(['body_part', 'patient_id', 'study_id'])[['target_label', 'pred_label']].mean().round(0)

time: 10.6 ms


In [38]:
grouped_metrics = MURAMetrics(valid_grouped['target_label'], valid_grouped['pred_label'])

time: 6.31 ms


In [39]:
grouped_metrics.accuracy()

0.7289407839866555

time: 2.86 ms


In [40]:
grouped_metrics.kappa()

0.42371199427372247

time: 4.28 ms


In [41]:
grouped_metrics.precision_and_recall()

{'precision': 0.9209486166007905, 'recall': 0.43308550185873607}

time: 4.01 ms


In [22]:
valid_grouped = valid_groups.groupby(['body_part', 'patient_id', 'study_id'])[['target_label', 'pred_label']].max()

time: 9.55 ms


In [28]:
grouped_metrics = MURAMetrics(valid_grouped['target_label'], valid_grouped['pred_label'])

time: 6.1 ms


In [29]:
grouped_metrics.accuracy()

0.7723102585487907

time: 4.21 ms


In [30]:
grouped_metrics.kappa()

0.5255834783906823

time: 4.31 ms


In [31]:
grouped_metrics.precision_and_recall()

{'precision': 0.863013698630137, 'recall': 0.5855018587360595}

time: 4.11 ms
