In [None]:
# Full path (or just file name if in same directory as this file) to *_evaluation_output.mat
# You MUST use / to separate directories, NOT \
eval_output_file = 'C:/Users/Louis/Documents/GitHub/resources (not a repo)/MVCO_trees_25Jun2012_evaluation_output.mat'

%matplotlib notebook
import scipy.io as sio
import numpy as np
import matplotlib.pyplot as plt
matlabOutput = sio.loadmat(eval_output_file)

In [None]:
def conf_mat_props(c) :
    # calculations explained at https://github.com/hsosik/ifcb-analysis/blob/master/classification/conf_mat_props.m
    total = np.sum(c, axis=0)
    TP = np.diagonal(c);
    FP = total - TP;
    FN = np.sum(c, axis=1)-TP;
    TN = sum(total)-TP-FN-FP;
    return TP, TN, FP, FN

In [None]:
oobError = matlabOutput['oobError_output']

plt.figure(figsize=(10,6))
plt.plot(np.arange(len(oobError)), oobError);

plt.xlabel('Number of Grown Trees')
plt.ylabel('Out-of-Bag Classification Error')
plt.ylim([0, 1]);

In [None]:
indices = matlabOutput['OOBPermutedVarDeltaError_output']['ind'][0][0][0];
values = matlabOutput['OOBPermutedVarDeltaError_output']['delerr'][0][0][0];

plt.figure(figsize=(12,6))
plt.bar(np.arange(len(indices)), height=values, width=.6)

plt.ylabel('Feature importance')
plt.xlabel('Feature ranked index')

# hide meaningless xaxis labels
plt.xticks((0,1), '');

In [None]:
c1 = matlabOutput['confusionmat'][0][0][0]
gord1 = matlabOutput['confusionmat'][0][0][1]
total = np.sum(c1, axis=1)
TP, TN, FP, FN = conf_mat_props(c1)

Pd = TP / (TP+FN)
Pr = TP / (TP+FP)

In [None]:
fig, ax = plt.subplots(figsize=(12,6))
indices = np.arange(0, len(gord1)*2, 2)
width = 0.5

bars1 = ax.bar(indices, height=total, width=width, color='r')
bars2 = ax.bar(indices+width, height=TP, width=width, color='g')
bars3 = ax.bar(indices+(width*2), height=FP, width=width, color='y')

ax.legend((bars1[0], bars2[0], bars3[0]), ('total in set', 'true pos', 'false pos'))

# unfortunately gord1 is formatted oddly by scipy so we have to rebuild it in a usable form
formatted_gord1 = []
for arr1 in gord1:
    formatted_gord1.append(arr1[0][0])

ax.set_xticks(indices+(width*1.5))
ax.set_xticklabels(formatted_gord1, rotation=90);
ax.set_title('score threshold = 0');

plt.tight_layout()

In [None]:
fig, ax = plt.subplots(figsize=(12,6))
# same indices as above cell
width = 0.5

bars1 = ax.bar(indices, height=Pd, width=width, color='r')
bars2 = ax.bar(indices+width, height=Pr, width=width, color='g')

ax.legend((bars1[0], bars2[0]), ('Pd', 'Pr'))
ax.set_xticks(indices+(width*1.5))
ax.set_xticklabels(formatted_gord1, rotation=90)
ax.set_title('score threshold = 0');

plt.tight_layout()

In [None]:
Sfit = matlabOutput['oobPredict_output']['Sfit']
classes = matlabOutput['classes']
bY = matlabOutput['bY']
maxthre = matlabOutput['maxthre'][0]

# format classes properly, they're weirdly nested after reading from .mat file
formatted_classes = []
for key in classes[0] :
    formatted_classes.append(key[0])

# can't get the numpy max function to work quite the way I want it to on all of Sfit at once
# so we go one row at a time instead
maxSfit = []
for row in Sfit[0][0] :
    maxSfit.append(np.max(row))

# unfortunately, boxplot in this library doesn't work like boxplot in MATLAB
# I can't just pass bY as a grouping variable
groupedBoxEntries = {}
i = 0
for boxEntry in bY :
    if not boxEntry[0][0] in groupedBoxEntries :
        groupedBoxEntries[boxEntry[0][0]] = []
    groupedBoxEntries[boxEntry[0][0]].append(maxSfit[i])
    i = i + 1

# now format dictionary values as one big array of arrays
toPlot = []
for key in formatted_classes :
    toPlot.append(groupedBoxEntries[key])

fig,ax = plt.subplots(figsize=(12,8))
ax.boxplot(toPlot, labels=formatted_classes)
stars = ax.plot(np.arange(0, len(formatted_classes)), maxthre, '*', color='g');

ax.set_xticks(np.arange(1, len(formatted_classes)))
ax.set_xticklabels(formatted_classes, rotation=90);
ax.set_title('score threshold = 0')
ax.set_ylabel('Out-of-bag winning scores')
ax.legend([stars[0]], ['optimal threshold score'], numpoints=1)

plt.tight_layout()

In [None]:
cplot = np.zeros((len(c1),len(c1)))
for x in range(0,len(c1)) :
    for y in range(0, len(c1)) :
        cplot[x,y] = c1[x,y]
        
fig,ax = plt.subplots(figsize=(12,10))
colorPlot = ax.pcolor(cplot)

ticks = np.arange(0,len(formatted_classes))
ax.set_yticks(ticks+.5)
ax.set_yticklabels(formatted_classes)
ax.set_xticks(ticks+.5)
ax.set_xticklabels(formatted_classes, rotation=90)
ax.set_title('manual vs. classifier; score threshold = 0')
if (len(formatted_classes) > 50) :
    # margin is okay with 50 ticks but broken with 51
    # setting equal axes is better than not if >50
    ax.axis('equal')

fig.colorbar(colorPlot, ticks=np.arange(0, np.max(total), 50))

plt.tight_layout()

In [None]:
c3 = matlabOutput['confusionmat2'][0][0][0]
gord3 = matlabOutput['confusionmat2'][0][0][1]
total = np.sum(c3, axis=1)
TP, TN, FP, FN = conf_mat_props(c3)

# suppress some warnings about division by 0 and NaN
with np.errstate(invalid='ignore') :
    Pd3 = TP / (TP+FN)
    Pr3 = TP / (TP+FP)
    Pm3 = c3[:,len(c3)-1] / total

In [None]:
fig, ax = plt.subplots(figsize=(12,6))
indices = np.arange(0, len(gord3)*2, 2)
width = 0.5

bars1 = ax.bar(indices, height=Pd3, width=width, color='r')
bars2 = ax.bar(indices+width, height=Pr3, width=width, color='g')
bars3 = ax.bar(indices+(width*2), height=Pm3, width=width, color='y')

ax.legend((bars1[0], bars2[0], bars3[0]), ('Pd', 'Pr', 'Pmissed'))

# reformat gord3 like we always have to...
formatted_gord3 = []
for arr1 in gord3:
    formatted_gord3.append(arr1[0][0])

ax.set_xticks(indices+(width*1.5))
ax.set_xticklabels(formatted_gord3, rotation=90);
ax.set_title('optimal score threshold');

plt.tight_layout()

In [None]:
classes2 = matlabOutput['classes2']

# reformat as always...
formatted_classes2 = []
for key in classes2 :
    formatted_classes2.append(key[0][0])

cplot1 = np.zeros((len(c3),len(c3)))
for x in range(0,len(c3)) :
    for y in range(0,len(c3)) :
        cplot1[x,y] = c3[x,y]
        
fig,ax = plt.subplots(figsize=(12,10))
colorPlot = ax.pcolor(cplot1)

ticks = np.arange(0,len(formatted_classes2))
ax.set_yticks(ticks+.5)
ax.set_yticklabels(formatted_classes2)
ax.set_xticks(ticks+.5)
ax.set_xticklabels(formatted_classes2, rotation=90)
ax.set_title('manual vs. classifier; optimal score threshold')
if (len(formatted_classes2) > 50) :
    # margin is okay with 50 ticks but broken with 51
    # setting equal axes is better than not if >50
    ax.axis('equal')

fig.colorbar(colorPlot, ticks=np.arange(0, np.max(total), 50))

plt.tight_layout()