In [None]:
!git clone https://@github.com/AGiannoutsos/Latent_vs_Original_Space_Image_Classification.git
%cd Latent_vs_Original_Space_Image_Classification/

# Data preprocess

In [6]:
import emd.search_emd as emd
# Reading training and test sets
datasetFile = "/content/Latent_vs_Original_Space_Image_Classification/data/train-images-idx3-ubyte"
dlabelsFile = "/content/Latent_vs_Original_Space_Image_Classification/data/train-labels-idx1-ubyte"
testsetFile = "/content/Latent_vs_Original_Space_Image_Classification/data/t10k-images-idx3-ubyte"
tlabelsFile = "/content/Latent_vs_Original_Space_Image_Classification/data/t10k-labels-idx1-ubyte"


def load_mnist(dataset, digits=np.arange(10), type='data', numOfElements=-1):
    intType = np.dtype( 'int32' ).newbyteorder( '>' )
    if not os.path.isfile(dataset):
        return None
    fname = os.path.join(".", dataset)
    if (type == 'data'):
        nMetaDataBytes = 4 * intType.itemsize
        images = np.fromfile(fname, dtype = 'ubyte')
        magicBytes, size, rows, cols = np.frombuffer(images[:nMetaDataBytes].tobytes(), intType)
        if numOfElements == -1:
            numOfElements = size #int(len(ind) * size/100.)
        images = images[nMetaDataBytes:].astype(dtype = 'float32').reshape([numOfElements, rows, cols, 1])
        return images
    elif (type == 'labels'):
        nMetaDataBytes = 2 * intType.itemsize
        labels = np.fromfile(fname, dtype = 'ubyte')[nMetaDataBytes:]
        return labels
    else:
        return None


train_X = emd.load_mnist(datasetFile, type='data')[0:]
train_Y = emd.load_mnist(dlabelsFile, type='labels')[0:]
test_X  = emd.load_mnist(testsetFile, type='data')[0:]
test_Y  = emd.load_mnist(tlabelsFile, type='labels')[0:]

# reshape labels
train_Y = train_Y.reshape((-1,1))
test_Y  = test_Y.reshape((-1,1))

input_shape = train_X.shape[1:]
num_of_classes = train_Y.shape

print(train_X.shape, train_Y.shape)

(60000, 28, 28, 1) (60000, 1)


# Manhattan Distance Knn

In [9]:
t = 100
q = 5

# preprocess for knn manhattan
x_train = train_X.reshape((60000, -1))[0:t]
x_test = test_X.reshape((10000, -1))[0:q]
y_train = train_Y[0:t]
y_test  = test_Y[0:q]

knn = emd.KNN(10, emd.manhattan_distances)
knn.fit(x_train, y_train)
prediction = knn.predict(x_test)
print("Time: ",knn.prediction_time)
print("Accuracy: ", emd.get_Accuracy(prediction, y_test))

Time:  0.0017328262329101562
Accuracy:  0.56


# Earths Movers Distance Knn

## Cluster size 14x14

In [14]:
# preprocess for EMD
dim = 14
clusters = emd.get_Clusters(train_X[0:t], [dim, dim])
test_clusters = emd.get_Clusters(test_X[0:q], [dim, dim])

distances, distances_array = emd.get_Clusters_distances(dim, [28,28])
# 2 times the weights for the linprog
num_of_weights = 2*len(clusters[0])
# variables are the of distances
num_of_variables = len(distances)
print(num_of_weights, num_of_variables)
# get A for the EMD coefficients
A = emd.get_A(num_of_weights, num_of_variables)

emd_knn = emd.KNN(10, emd.earths_movers_distances, distances, A)
emd_knn.fit(clusters, y_train)
prediction = emd_knn.predict(test_clusters)
print("Time: ",knn.prediction_time)
print("Accuracy: ", emd.get_Accuracy(prediction, y_test))

8 16
Time:  0.0017328262329101562
Accuracy:  0.12000000000000002


## Cluster size 7x7

In [15]:
# preprocess for EMD
dim = 7
clusters = emd.get_Clusters(train_X[0:t], [dim, dim])
test_clusters = emd.get_Clusters(test_X[0:q], [dim, dim])

distances, distances_array = emd.get_Clusters_distances(dim, [28,28])
# 2 times the weights for the linprog
num_of_weights = 2*len(clusters[0])
# variables are the of distances
num_of_variables = len(distances)
print(num_of_weights, num_of_variables)
# get A for the EMD coefficients
A = emd.get_A(num_of_weights, num_of_variables)

emd_knn = emd.KNN(10, emd.earths_movers_distances, distances, A)
emd_knn.fit(clusters, y_train)
prediction = emd_knn.predict(test_clusters)
print("Time: ",knn.prediction_time)
print("Accuracy: ", emd.get_Accuracy(prediction, y_test))

32 256
Time:  0.0017328262329101562
Accuracy:  0.1


## Cluster size 4x4

In [16]:
# preprocess for EMD
dim = 4
clusters = emd.get_Clusters(train_X[0:t], [dim, dim])
test_clusters = emd.get_Clusters(test_X[0:q], [dim, dim])

distances, distances_array = emd.get_Clusters_distances(dim, [28,28])
# 2 times the weights for the linprog
num_of_weights = 2*len(clusters[0])
# variables are the of distances
num_of_variables = len(distances)
print(num_of_weights, num_of_variables)
# get A for the EMD coefficients
A = emd.get_A(num_of_weights, num_of_variables)

emd_knn = emd.KNN(10, emd.earths_movers_distances, distances, A)
emd_knn.fit(clusters, y_train)
prediction = emd_knn.predict(test_clusters)
print("Time: ",knn.prediction_time)
print("Accuracy: ", emd.get_Accuracy(prediction, y_test))

98 2401
Time:  0.0017328262329101562
Accuracy:  0.24


# Results

MNIST proved to be very good for manhattan distance. As the images have the same orientation and the labels do not differ much between them it is very easy for this metric to draw conclusions about the images.

On the other hand earths mover's distance is a better technique that compares image distributions and can learn and better compare the different features of each image even if they have a different orientation.

However, this metric in MNIST did not seem to work well as it took a long time to calculate and was not very accurate. Accuracy begins to improve with a larger cluster but the computation time increases geometrically and it is much more difficult to calculate.