In [1]:
import numpy as np
import os
import copy
from scipy.ndimage import imread
from scipy.spatial.distance import cdist

In [2]:
# Parameters
nrun = 20
path_to_script_dir = os.path.abspath('')
path_to_all_runs = os.path.join(path_to_script_dir, 'all_runs')
fname_label = 'class_labels.txt' # this is where class labels are stored for each run

In [3]:
def LoadImgAsPoints(fn):
    #
    # Load image file and return coordinates of 'inked' pixels in the binary image 
    #
    # Output:
    # D : [n x 2] rows are coordinates
    I = imread(fn, flatten=True)
    I = np.array(I, dtype=bool)
    I = np.logical_not(I)
    (row, col) = I.nonzero()
    D = np.array([row,col])
    D = np.transpose(D)
    D = D.astype(float)
    n = D.shape[0]
    mean = np.mean(D,axis=0)
    for i in range(n):
        D[i,:] = D[i,:] - mean
    return D

def ModHausdorffDistance(itemA, itemB):
    # Modified Hausdorff Distance
    #
    # Input
    # itemA: [n x 2] coordinates of "inked" pixels
    # itemB: [n x 2] coordinates of "inked" pixels
    #
    D = cdist(itemA, itemB)
    mindist_A = D.min(axis=1)
    mindist_B = D.min(axis=0)
    mean_A = np.mean(mindist_A)
    mean_B = np.mean(mindist_B)
    return max(mean_A,mean_B)

def classification_run(folder, f_load, f_cost, ftype='cost'):
    assert((ftype=='cost') | (ftype=='score'))
    
    # get file names
    with open(os.path.join(path_to_script_dir, 'all_runs\\run01\\class_labels.txt')) as f:
        content = f.read().splitlines()
        pairs = [line.split() for line in content]
        test_files = [pair[0] for pair in pairs]
        train_files = [pair[1] for pair in pairs]
        answer_files = copy.copy(train_files)
        test_files.sort()
        train_files.sort()
        ntrain = len(train_files)
        ntest = len(test_files)
        
        # load the images (and if needed, extract features)
        train_items = [f_load(f) for f in train_files]
        test_items = [f_load(f) for f in test_files]
        
        # compute cost matrix
        costM = np.zeros((ntest,ntrain), float)
        for i in range(ntest):
            for c in range(ntrain):
                costM[i,c] = f_cost(test_items[i], train_items[c])
        if ftype == 'cost':
            YHAT = np.argmin(costM, axis=1)
        elif ftype == 'score':
            YHAT = np.argmax(costM, axis=1)
        else:
            assert false
            
        
        # compute the error rate
        correct = 0.0
        for i in range(ntest):
            if train_files[YHAT[i]] == answer_files[i]:
                correct = 1.0
        pcorrect = 100 * correct / ntest
        perror = 100 - pcorrect
        return perror


In [4]:
os.path.abspath('')

'C:\\Users\\Biswajit Roy\\Desktop\\future\\MachineLearningFlows'

In [5]:
os.path.join(path_to_script_dir, 'all_runs\\run01')

'C:\\Users\\Biswajit Roy\\Desktop\\future\\MachineLearningFlows\\all_runs\\run01'

In [6]:
with open(os.path.join(path_to_script_dir, 'all_runs\\run01\\class_labels.txt')) as f:
    print(f.read().splitlines())

['all_runs/run01/test/item01.png all_runs/run01/training/class08.png', 'all_runs/run01/test/item02.png all_runs/run01/training/class09.png', 'all_runs/run01/test/item03.png all_runs/run01/training/class02.png', 'all_runs/run01/test/item04.png all_runs/run01/training/class19.png', 'all_runs/run01/test/item05.png all_runs/run01/training/class10.png', 'all_runs/run01/test/item06.png all_runs/run01/training/class18.png', 'all_runs/run01/test/item07.png all_runs/run01/training/class13.png', 'all_runs/run01/test/item08.png all_runs/run01/training/class01.png', 'all_runs/run01/test/item09.png all_runs/run01/training/class04.png', 'all_runs/run01/test/item10.png all_runs/run01/training/class11.png', 'all_runs/run01/test/item11.png all_runs/run01/training/class17.png', 'all_runs/run01/test/item12.png all_runs/run01/training/class03.png', 'all_runs/run01/test/item13.png all_runs/run01/training/class20.png', 'all_runs/run01/test/item14.png all_runs/run01/training/class07.png', 'all_runs/run01/tes

In [7]:
if __name__ == "__main__":
    # 
    # Running this demo should lead to a result of 38.0% errors
    # A modified hausdroff distance for object matching
    #
    print("One-shot classification done with Modified Hausdorff Distance")
    perror = np.zeros(nrun)
    for r in range(1, nrun+1):
        rs = str(r)
        if len(rs) == 1:
            rs = '0' + rs
        perror[r-1] = classification_run('run'+rs, LoadImgAsPoints, ModHausdorffDistance, 'cost')
        print("run " + str(r) + " (error " + str(perror[r-1]) + "%)")
        total = np.mean(perror)
        print("Average error " + str(total) + "%")       

One-shot classification done with Modified Hausdorff Distance


`imread` is deprecated in SciPy 1.0.0.
Use ``matplotlib.pyplot.imread`` instead.
  import sys


run 1 (error 95.0%)
Average error 4.75%
run 2 (error 95.0%)
Average error 9.5%
run 3 (error 95.0%)
Average error 14.25%
run 4 (error 95.0%)
Average error 19.0%
run 5 (error 95.0%)
Average error 23.75%
run 6 (error 95.0%)
Average error 28.5%
run 7 (error 95.0%)
Average error 33.25%
run 8 (error 95.0%)
Average error 38.0%
run 9 (error 95.0%)
Average error 42.75%
run 10 (error 95.0%)
Average error 47.5%
run 11 (error 95.0%)
Average error 52.25%
run 12 (error 95.0%)
Average error 57.0%
run 13 (error 95.0%)
Average error 61.75%
run 14 (error 95.0%)
Average error 66.5%
run 15 (error 95.0%)
Average error 71.25%
run 16 (error 95.0%)
Average error 76.0%
run 17 (error 95.0%)
Average error 80.75%
run 18 (error 95.0%)
Average error 85.5%
run 19 (error 95.0%)
Average error 90.25%
run 20 (error 95.0%)
Average error 95.0%
