In [None]:
# This cell needs to run BEFORE any others! Ctrl-Enter this cell before trying Cell->Run All
# If this cell runs together with the one below it (and hasn't already run) the kernel freezes trying to open Qt dialog

%gui qt
import scipy.io as sio
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from PyQt4 import QtCore, QtGui

In [None]:
eoFile = QtGui.QFileDialog.getOpenFileName(None,
    "Select evaluation output file...", './', filter="MAT-files (*.mat)")

if eoFile:
    global matlabOutput
    matlabOutput = sio.loadmat(eoFile, squeeze_me=True, struct_as_record=False)

In [None]:
def conf_mat_props(c) :
    # calculations explained at https://github.com/hsosik/ifcb-analysis/blob/master/classification/conf_mat_props.m
    total = np.sum(c, axis=0)
    TP = np.diagonal(c);
    FP = total - TP;
    FN = np.sum(c, axis=1)-TP;
    TN = sum(total)-TP-FN-FP;
    return TP, TN, FP, FN

In [None]:
%matplotlib notebook
oobError = matlabOutput['oobError_output']

plt.figure(figsize=(10,6))
plt.plot(np.arange(len(oobError)), oobError);

plt.xlabel('Number of Grown Trees')
plt.ylabel('Out-of-Bag Classification Error')
plt.ylim([0, 1]);

In [None]:
indices = matlabOutput['OOBPermutedVarDeltaError_output'].ind
values = matlabOutput['OOBPermutedVarDeltaError_output'].delerr

plt.figure(figsize=(12,6))
plt.bar(np.arange(len(indices)), height=values, width=.6)

plt.ylabel('Feature importance')
plt.xlabel('Feature ranked index')

# hide meaningless xaxis labels
plt.xticks((0,1), '');

In [None]:
c1 = matlabOutput['confusionmat'].c1
gord1 = matlabOutput['confusionmat'].gord1
total = np.sum(c1, axis=1)
TP, TN, FP, FN = conf_mat_props(c1)

Pd = TP / (TP+FN)
Pr = TP / (TP+FP)

In [None]:
fig, ax = plt.subplots(figsize=(12,6))
indices = np.arange(0, len(gord1)*2, 2)
width = 0.5

bars1 = ax.bar(indices, height=total, width=width, color='r')
bars2 = ax.bar(indices+width, height=TP, width=width, color='g')
bars3 = ax.bar(indices+(width*2), height=FP, width=width, color='y')

ax.legend((bars1[0], bars2[0], bars3[0]), ('total in set', 'true pos', 'false pos'))

ax.set_xticks(indices+(width*1.5))
ax.set_xticklabels(gord1, rotation=90);
ax.set_title('score threshold = 0');

plt.tight_layout()

In [None]:
fig, ax = plt.subplots(figsize=(12,6))
# same indices as above cell
width = 0.5

bars1 = ax.bar(indices, height=Pd, width=width, color='r')
bars2 = ax.bar(indices+width, height=Pr, width=width, color='g')

ax.legend((bars1[0], bars2[0]), ('Pd', 'Pr'))
ax.set_xticks(indices+(width*1.5))
ax.set_xticklabels(gord1, rotation=90)
ax.set_title('score threshold = 0');

plt.tight_layout()

In [None]:
classes = matlabOutput['classes']
bY = matlabOutput['bY']
maxthre = matlabOutput['maxthre']
maxSfit = np.max(matlabOutput['oobPredict_output'].Sfit,axis=1)

msf = pd.Series(maxSfit, index=bY)
by_class = msf.groupby(by=msf.index)
toPlot = list(zip(*by_class))[1]

fig,ax = plt.subplots(figsize=(12,8))
ax.boxplot(toPlot, labels=classes)
stars = ax.plot(np.arange(0, len(classes)), maxthre, '*', color='g');

ax.set_xticks(np.arange(1, len(classes)))
ax.set_xticklabels(classes, rotation=90);
ax.set_title('score threshold = 0')
ax.set_ylabel('Out-of-bag winning scores')
ax.legend([stars[0]], ['optimal threshold score'], numpoints=1)

plt.tight_layout()

In [None]:
dim = len(c1)
cplot = np.zeros((dim,dim))
cplot[:dim,:dim] = c1[:,:]
        
fig,ax = plt.subplots(figsize=(12,10))
colorPlot = ax.pcolor(cplot)

ticks = np.arange(0,len(classes))
ax.set_yticks(ticks+.5)
ax.set_yticklabels(classes)
ax.set_xticks(ticks+.5)
ax.set_xticklabels(classes, rotation=90)
ax.set_title('manual vs. classifier; score threshold = 0')
if (len(classes) > 50) :
    # margin is okay with 50 ticks but broken with 51
    # setting equal axes is better than not if >50
    ax.axis('equal')

fig.colorbar(colorPlot, ticks=np.arange(0, np.max(total), 50))

plt.tight_layout()

In [None]:
c3 = matlabOutput['confusionmat2'].c3
gord3 = matlabOutput['confusionmat2'].gord
total = np.sum(c3, axis=1)
TP, TN, FP, FN = conf_mat_props(c3)

# suppress some warnings about division by 0 and NaN
with np.errstate(invalid='ignore') :
    Pd3 = TP / (TP+FN)
    Pr3 = TP / (TP+FP)
    Pm3 = c3[:,len(c3)-1] / total

In [None]:
fig, ax = plt.subplots(figsize=(12,6))
indices = np.arange(0, len(gord3)*2, 2)
width = 0.5

bars1 = ax.bar(indices, height=Pd3, width=width, color='r')
bars2 = ax.bar(indices+width, height=Pr3, width=width, color='g')
bars3 = ax.bar(indices+(width*2), height=Pm3, width=width, color='y')

ax.legend((bars1[0], bars2[0], bars3[0]), ('Pd', 'Pr', 'Pmissed'))

ax.set_xticks(indices+(width*1.5))
ax.set_xticklabels(gord3, rotation=90);
ax.set_title('optimal score threshold');

plt.tight_layout()

In [None]:
classes2 = matlabOutput['classes2']

dim = len(c3)
cplot1 = np.zeros((dim,dim))
cplot1[:dim,:dim] = c3[:,:]
        
fig,ax = plt.subplots(figsize=(12,10))
colorPlot = ax.pcolor(cplot1)

ticks = np.arange(0,len(classes2))+.5
ax.set_yticks(ticks)
ax.set_yticklabels(classes2)
ax.set_xticks(ticks)
ax.set_xticklabels(classes2, rotation=90)
ax.set_title('manual vs. classifier; optimal score threshold')
if (len(classes2) > 50) :
    # margin is okay with 50 ticks but broken with 51
    # setting equal axes is better than not if >50
    ax.axis('equal')

fig.colorbar(colorPlot, ticks=np.arange(0, np.max(total), 50))

plt.tight_layout()