In [70]:
import os, shutil
import pickle
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing import image
import PIL
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as st
import matplotlib
import otolreader as otolr
import statsmodels.stats.proportion as ssp

In [2]:
font = {'size'   : 14}
matplotlib.rc('font', **font)

## Image classification results

In [62]:
basedir = '../../../OtolReader-Publication/Models'
with open(os.path.join(basedir, 'crossval_order.p'), 'rb') as f:
    cvorder = np.array(pickle.load(f))
pdir = os.path.join(basedir, 'CrossVal-Binary-5classes-30Images')
model_prefix = 'binarynet_'
nim_fold = 15
sampledir = os.path.join(basedir, 'CrossVal-ClassifierSampleTable')
samps_prefix = 'ClassifierNetworkForSelection_SmoothedSamples_'
retrained_dir = os.path.join(basedir, 'Retrained-Binary-Nets')
classmod_name_base = os.path.join(basedir, 'CrossVal-Classifier-5classes-30Images', 'classnet_')
imdir = '../../../Images/OtolithImages/'
mark_list = ['3,5H10', '1,6H', 'none', '6,2H', '4n,2n,2H']
fmod_name_base = os.path.join(retrained_dir, 'binarynet_')
n_fold = 10

In [63]:
y = []
save_scores = []
for fold_ind in range(n_fold):
    print("Currently evaluating fold number {}".format(fold_ind))
    with open(os.path.join(sampledir, samps_prefix + str(fold_ind) + '.p'), 'rb') as f:
        train_array = pickle.load(f)
    mod = keras.models.load_model(fmod_name_base + str(fold_ind) + '.h5', compile=False)
    for ind in range(fold_ind * nim_fold, (fold_ind + 1) * nim_fold):
        samps = train_array[ind]
        mark_ind, im_ind = otolr.OtolithAnalysis.feature_functions.fcn_mark_im_ind(cvorder[ind], 30)
        if mark_ind == 2:
            y.append(0)
        else:
            y.append(1)
        scores = np.mean(mod.predict(np.expand_dims(samps, axis=2)), axis=0)
        save_scores.append(scores)

Currently evaluating fold number 0
Currently evaluating fold number 1
Currently evaluating fold number 2
Currently evaluating fold number 3
Currently evaluating fold number 4
Currently evaluating fold number 5
Currently evaluating fold number 6
Currently evaluating fold number 7
Currently evaluating fold number 8
Currently evaluating fold number 9


In [67]:
yhat = []
for s in save_scores:
    if s[0] > 0.2:
        yhat.append(0)
    else:
        yhat.append(1)
acc = np.sum(np.array(yhat)==np.array(y))/len(yhat)
print('Binary accuracy at expected optimal cutoff: {:0.3f}'.format(acc))

Binary accuracy at expected optimal cutoff: 0.987


In [68]:
scbase = os.path.join(basedir, 'CrossVal-ClassifierSampleTable', 'ClassifierNetworkForSelection_Scores_')
yhatclass = np.ones(150, dtype=int) * -1
yhatfin = np.ones(150, dtype=int) * 2
yfin = np.ones(150, dtype=int) * -1
cvacc = np.zeros(n_fold)
for fold_ind in range(n_fold):
    with open(scbase + str(fold_ind) + '.p', 'rb') as f:
        sc = pickle.load(f)
    for ind in range(nim_fold * fold_ind, (fold_ind + 1) * nim_fold):
        mark, im_ind = otolr.OtolithAnalysis.feature_functions.fcn_mark_im_ind(cvorder[ind], 30)
        sctemp = np.average(sc[ind], axis=0)
        yhatclass[ind] = np.argmax(sctemp)
        if yhat[ind] != 0:
            yhatfin[ind] = yhatclass[ind] 
        yfin[ind] = mark
        if yhatfin[ind] == yfin[ind]:
            cvacc[fold_ind] += 1 / nim_fold
print('Overall classification accuracy: {:0.3f}'.format(np.sum(yhatfin==yfin) / len(yfin)))
print("Cross val accuracy by fold: ", cvacc)

Overall classification accuracy: 0.953
Cross val accuracy by fold:  [0.93333333 1.         1.         1.         0.93333333 0.93333333
 0.93333333 0.86666667 1.         0.93333333]


In [69]:
conf = np.zeros([5, 5])
for ind in range(len(yfin)):
    conf[yfin[ind], yhatfin[ind]] += 1
print(conf)

[[28.  1.  0.  0.  1.]
 [ 0. 29.  1.  0.  0.]
 [ 0.  0. 30.  0.  0.]
 [ 0.  0.  1. 29.  0.]
 [ 1.  2.  0.  0. 27.]]


In [36]:
# test the image classfication function
nm_model = keras.models.load_model(fmod_name_base + str(0) + '.h5', compile=False)
class_model = keras.models.load_model(classmod_name_base + str(0) + '.h5', compile=False)
im_ind = cvorder[0]
print("Evaluating image #{:0.0f}".format(im_ind))
mark_ind, classed_im_ind = otolr.OtolithAnalysis.feature_functions.fcn_mark_im_ind(im_ind, 30)
print("Expected mark: {}".format(mark_list[mark_ind]))
im_path = os.path.join(imdir, mark_list[mark_ind], str(classed_im_ind) + '.jpg')
mark_hat = otolr.OtolithAnalysis.im_classifier.classify_image(im_path, class_model, nm_model, 0.2)
print("Estimated mark: {}".format(mark_hat))

Evaluating image #14
Expected mark: 3,5H10
Estimated mark: 3,5H10


### Full test set

In [72]:
basedir = '../../../OtolReader-Publication/Models/N-class-test-set/5 classes/'

In [73]:
y = []
yhat = []
save_scores = []
fmod_name = os.path.join(basedir, 'retrained_binary_net', 'binarynet_0.h5')
mod = keras.models.load_model(fmod_name, compile=False)
with open(os.path.join(basedir, 'samples.p'), 'rb') as f:
    test_array = pickle.load(f)
for ind in range(len(test_array)):
    mark_ind, im_ind = otolr.OtolithAnalysis.feature_functions.fcn_mark_im_ind(ind, 20)
    if mark_ind == 2:
        y.append(0)
    else:
        y.append(1)
    scores = np.mean(mod.predict(np.expand_dims(test_array[ind], axis=2)), axis=0)
    save_scores.append(scores)

In [74]:
yhat = []
for s in save_scores:
    if s[0] > 0.2:
        yhat.append(0)
    else:
        yhat.append(1)
acc = np.sum(np.array(yhat)==np.array(y))/len(yhat)
print('Binary accuracy at expected optimal cutoff: {:0.3f}'.format(acc))

Binary accuracy at expected optimal cutoff: 0.960


In [80]:
# scpath = os.path.join(basedir, 'TestSetSamples/2019_08_26_ClassifierNetworkForSelection_Scores_0.p')
scpath = os.path.join(basedir, 'scores.p')
yhatclass = np.ones(100, dtype=int) * -1
yhatfin = np.ones(100, dtype=int) * 2
yfin = np.ones(100, dtype=int) * -1
with open(scpath, 'rb') as f:
    sc = pickle.load(f)
for ind in range(100):
    mark, im_ind = otolr.OtolithAnalysis.feature_functions.fcn_mark_im_ind(ind, 20)
    sctemp = np.average(sc[ind], axis=0)
    yhatclass[ind] = np.argmax(sctemp)
    if yhat[ind] != 0:
        yhatfin[ind] = yhatclass[ind] 
    yfin[ind] = mark
acc = np.sum(yhatfin==yfin) / len(yfin)
print('Overall classification accuracy: {:0.3f}'.format(acc))
confint = ssp.proportion_confint(np.sum(yhatfin==yfin),len(yfin), method='beta')
print("Confidence interval: -{:0.2f}, +{:0.2f}".format(acc - confint[0], confint[1] - acc))

Overall classification accuracy: 0.930
Confidence interval: -0.07, +0.04
(0.8610802715441427, 0.9713947110925613)


In [58]:
conf = np.zeros([5, 5])
for ind in range(len(yfin)):
    conf[yfin[ind], yhatfin[ind]] += 1
print(conf)

[[16.  0.  2.  2.  0.]
 [ 0. 19.  1.  0.  0.]
 [ 0.  1. 19.  0.  0.]
 [ 0.  0.  0. 19.  1.]
 [ 0.  0.  0.  0. 20.]]
