In [1]:
from PIL import Image
import numpy as np
from sklearn import metrics
import os
import math
from sklearn.utils import shuffle
from sklearn.externals import joblib

clf = joblib.load('NISTCharacterClassifier.pkl')

In [4]:
trainfolders = np.array(["by_class/" + foldername + "/hsf_0/" for foldername in os.listdir("by_class")])
traincharacters = np.array([chr(int(foldername,16)) for foldername in os.listdir("by_class")])
mintrainfoldersize = len(os.listdir(trainfolders[0]))
for j in range(1, len(trainfolders)):
    if len(os.listdir(trainfolders[j]))<mintrainfoldersize:
        mintrainfoldersize = len(os.listdir(trainfolders[j]))

In [5]:
expected = np.array([])
predicted = np.array([])
for (i,f) in enumerate(trainfolders):
    j=mintrainfoldersize%10
    imagelst = np.array([np.array(Image.open(f+file)) for file in os.listdir(f)[10*j:(10*j)+10]],dtype=object)
    targetlst = np.array([traincharacters[i]]*10)
    formattedimagelst = np.array([img.flatten().tolist() for img in imagelst],dtype=object)
    formattedimagelst, targetlst = shuffle(formattedimagelst, targetlst)
    predicted = np.concatenate((predicted,clf.predict(formattedimagelst)))
    expected = np.concatenate((expected,targetlst))
for j in range(mintrainfoldersize//10):
    for (i,f) in enumerate(trainfolders):
        imagelst = np.array([np.array(Image.open(f+file)) for file in os.listdir(f)[10*j:(10*j)+10]],dtype=object)
        targetlst = np.array([traincharacters[i]]*10)
        formattedimagelst = np.array([img.flatten().tolist() for img in imagelst],dtype=object)
        formattedimagelst, targetlst = shuffle(formattedimagelst, targetlst)
        predicted = np.concatenate((predicted,clf.predict(formattedimagelst)))
        expected = np.concatenate((expected,targetlst))

In [6]:
print(metrics.classification_report(expected,predicted))

             precision    recall  f1-score   support

          0       0.32      0.47      0.38       350
          1       0.27      0.63      0.38       350
          2       0.65      0.50      0.57       350
          3       0.62      0.59      0.60       350
          4       0.49      0.47      0.48       350
          5       0.51      0.54      0.53       350
          6       0.67      0.63      0.65       350
          7       0.59      0.54      0.56       350
          8       0.56      0.58      0.57       350
          9       0.52      0.55      0.54       350
          A       0.33      0.48      0.39       350
          B       0.39      0.36      0.38       350
          C       0.30      0.20      0.24       350
          D       0.40      0.24      0.30       350
          E       0.58      0.49      0.53       350
          F       0.37      0.13      0.19       350
          G       0.48      0.51      0.49       350
          H       0.28      0.27      0.27   

In [11]:
np.set_printoptions(threshold=np.nan)
print(metrics.confusion_matrix(expected,predicted))

[[163   0   0   0   0  12   3   0   0   0   0   0   3   6   0   0   6   0
    0   1   0   0   0   1  21   1  19   0   0   0   0   3   0   0   0   0
    0   0   9   0   0   0   1   0   0   0   0   0   1   0  90   1   0   6
    0   0   0   2   0   1   0   0]
 [  0 221   0   0   0   0   0   1   0   0   0   0   6   0   0   3   0   0
   16   0   0   2   0   0   0   1   0   0   0   0   0   1   0   2  12   0
    0   0   0   0   0   2   0   0  12   0   0  43   0   0   0   1   0  24
    0   3   0   0   0   0   0   0]
 [  8   1 175   0   0   1   3   3   4   3   5   0   0   1   0   0   4   7
    1   6   0   2   0   0   0   1  11   5   0   1   2   1   1   1   0  18
    6   0   1   9   0   0   0   0   1   1   1   0   1   0   2   0   0   0
    1   0  11   0   4   5   0  42]
 [  5   1   5 205   0   6   0   0   5   2   0  29   0   1   0   0   0   0
    0  11   0   0   0   0   6   0   0   0   5   1   0   0   0   0   0   2
    1   0   0   4   0   0   7   0   0   3   0   0   0   0   8   0   1   0
   41  