In [59]:
from numpy.linalg import norm
import numpy as np
from scipy import sparse
from matplotlib import pyplot as plt
from scipy import optimize
from scipy.sparse import csr_matrix
from scipy.optimize import minimize
from sklearn.datasets import load_iris
from sklearn.datasets import load_svmlight_file
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.utils.validation import check_is_fitted, check_array, check_X_y, check_random_state
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import ParameterGrid,GridSearchCV,StratifiedKFold
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.decomposition import PCA

from LMNN import LargeMarginNearestNeighbor
from LMNN_SS import SemiSupervisedLargeMarginNearestNeighbor

from matplotlib.ticker import FuncFormatter
import matplotlib.pyplot as plt


from skimage import data, color
from skimage.transform import rescale, resize, downscale_local_mean

from pylmnn import lmnn

import pandas as pd

import cv2

import time

In [51]:
###Benchmark Grid Search Hyperparameters K, omega1,2, 3 #############
lmnn_itr_list=[2,5,10,50,100]
ssc_itr_list=[2, 5, 10, 50, 100]

lmnn_param_grid = {'max_iter': lmnn_itr_list}
ssc_param_grid = {'max_iter': ssc_itr_list}

In [78]:
def cross_validate_test(clf, X, y, cv=5):
    print ("###### Cross Validate Data Shape: ", X.shape)
    skf = StratifiedKFold(n_splits=cv) 
    scores_list=[]
    for train_index, test_index in skf.split(X, y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        clf.fit(X_train,y_train)
        scores_list.append(clf.score(X_test,y_test))
    print ("##### Average Performance: ", np.mean(scores_list))
    return scores_list

### Test 1 : Find the Best max_itr for USPS (Labeled: 200, Unlabeled: 4000, Test: 2000)
***
*  **LMNN **

In [96]:
def get_USPS_training_data():
    data = load_svmlight_file('ml_data/usps/usps')
    return data[0], data[1]

def get_USPS_testing_data():
    data = load_svmlight_file('ml_data/usps/usps.t')
    return data[0], data[1]

X_train, y_train = get_USPS_training_data()
X_test, y_test = get_USPS_testing_data()

X_train=np.asarray(X_train.todense())
y_train=np.asarray(y_train)

X_test=np.asarray(X_test.todense())
y_test=np.asarray(y_test)

X_all=np.concatenate((X_train, X_test), axis=0)
y_all=np.concatenate((y_train, y_test), axis=0)
my_model = PCA(n_components=0.95, svd_solver='full')
X_all=my_model.fit_transform(X_all)


In [97]:
X_train, X_test, y_train, y_test = train_test_split(X_all, y_all, test_size=2000, stratify=y_all, random_state=42)
X_train_labelled, X_train_rest, y_train_labelled, y_train_rest = train_test_split(X_train, y_train, train_size=200, stratify=y_train, random_state=42)

sample_inds=np.random.randint(X_train_rest.shape[0], size=4000)
X_train_unlabelled=X_train_rest[sample_inds, :]

In [99]:
##### LMNN TUNNING USPS ########  (Long execution!!)
lmnnclf = GridSearchCV(LargeMarginNearestNeighbor(), lmnn_param_grid, cv=5, n_jobs=20, verbose=3)
lmnnclf.fit(X_train_labelled, y_train_labelled)

print("LMNN Best parameters set found on development set:")
print()
print(lmnnclf.best_params_)
print()
print("Grid scores on development set:")
print()
means = lmnnclf.cv_results_['mean_test_score']
stds = lmnnclf.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, lmnnclf.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r"
          % (mean, std * 2, params))
print()

print("Detailed classification report:")
print()
print("The model is trained on the full development set.")
print("The scores are computed on the full evaluation set.")
print()
y_true, y_pred = y_test, lmnnclf.predict(X_test)
print(classification_report(y_true, y_pred))
print()

Fitting 5 folds for each of 5 candidates, totalling 25 fits
[CV] max_iter=2 ......................................................
[CV] max_iter=2 ......................................................
[CV] max_iter=2 ......................................................
[CV] max_iter=5 ......................................................
[CV] max_iter=2 ......................................................
[CV] max_iter=2 ......................................................
[CV] max_iter=5 ......................................................
[CV] max_iter=5 ......................................................
[CV] max_iter=5 ......................................................
[CV] max_iter=5 ......................................................
[CV] max_iter=10 .....................................................
[CV] max_iter=10 .....................................................
[CV] max_iter=10 .....................................................
[CV] max_iter=10 

[Parallel(n_jobs=20)]: Done   4 out of  25 | elapsed:    7.8s remaining:   40.9s


[CV] ................. max_iter=2, score=0.810810810811, total=   8.0s
[CV] max_iter=100 ....................................................
[CV] ................. max_iter=5, score=0.888888888889, total=   9.2s
[CV] ................. max_iter=5, score=0.790697674419, total=   9.8s
[CV] ................. max_iter=5, score=0.837209302326, total=  10.4s
[CV] ................. max_iter=5, score=0.780487804878, total=  11.2s
[CV] ................. max_iter=5, score=0.783783783784, total=  14.6s
[CV] ................ max_iter=10, score=0.767441860465, total=  17.9s
[CV] ................ max_iter=10, score=0.860465116279, total=  19.1s
[CV] ................ max_iter=10, score=0.756097560976, total=  22.7s


[Parallel(n_jobs=20)]: Done  13 out of  25 | elapsed:   22.8s remaining:   21.1s


[CV] ................ max_iter=10, score=0.702702702703, total=  23.6s
[CV] ................ max_iter=10, score=0.833333333333, total=  25.2s
[CV] ................ max_iter=50, score=0.837209302326, total= 2.1min
[CV] ............... max_iter=100, score=0.837209302326, total= 2.1min
[CV] ................. max_iter=50, score=0.72972972973, total= 2.3min
[CV] ................ max_iter=100, score=0.72972972973, total= 2.3min
[CV] ............... max_iter=100, score=0.767441860465, total= 2.4min
[CV] ................ max_iter=50, score=0.767441860465, total= 2.7min
[CV] ................ max_iter=50, score=0.916666666667, total= 2.9min


[Parallel(n_jobs=20)]: Done  22 out of  25 | elapsed:  2.9min remaining:   24.1s


[CV] ............... max_iter=100, score=0.916666666667, total= 2.8min
[CV] ................ max_iter=100, score=0.80487804878, total= 3.1min
[CV] ................. max_iter=50, score=0.80487804878, total= 3.2min


[Parallel(n_jobs=20)]: Done  25 out of  25 | elapsed:  3.2min finished


LMNN Best parameters set found on development set:
()
{'max_iter': 2}
()
Grid scores on development set:
()
0.815 (+/-0.059) for {'max_iter': 2}
0.815 (+/-0.081) for {'max_iter': 5}
0.785 (+/-0.112) for {'max_iter': 10}
0.810 (+/-0.123) for {'max_iter': 50}
0.810 (+/-0.123) for {'max_iter': 100}
()
Detailed classification report:
()
The model is trained on the full development set.
The scores are computed on the full evaluation set.
()
             precision    recall  f1-score   support

        1.0       0.90      0.96      0.93       334
        2.0       0.94      0.99      0.97       273
        3.0       0.86      0.77      0.81       200
        4.0       0.73      0.90      0.81       177
        5.0       0.83      0.81      0.82       183
        6.0       0.84      0.65      0.73       154
        7.0       0.91      0.83      0.87       180
        8.0       0.88      0.91      0.89       170
        9.0       0.90      0.75      0.82       152
       10.0       0.81      0

*  **SSC **

In [100]:
##### SSC LMNN TUNNING USPS ######## (Long execution!!)
sscclf = GridSearchCV(SemiSupervisedLargeMarginNearestNeighbor(X_unlabeled=X_train_unlabelled), ssc_param_grid, cv=5, n_jobs=20, verbose=3)
sscclf.fit(X_train_labelled, y_train_labelled)

print("SSC Best parameters set found on development set:")
print()
print(sscclf.best_params_)
print()
print("Grid scores on development set:")
print()
means = sscclf.cv_results_['mean_test_score']
stds = sscclf.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, sscclf.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r"
          % (mean, std * 2, params))
print()

print("Detailed classification report:")
print()
print("The model is trained on the full development set.")
print("The scores are computed on the full evaluation set.")
print()
y_true, y_pred = y_test, sscclf.predict(X_test)
print(classification_report(y_true, y_pred))
print()

Fitting 5 folds for each of 5 candidates, totalling 25 fits
[CV] max_iter=2 ......................................................
[CV] max_iter=2 ......................................................
[CV] max_iter=2 ......................................................
[CV] max_iter=2 ......................................................
[CV] max_iter=2 ......................................................
[CV] max_iter=5 ......................................................
[CV] max_iter=5 ......................................................
[CV] max_iter=5 ......................................................
[CV] max_iter=5 ......................................................
[CV] max_iter=5 ......................................................
[CV] max_iter=10 .....................................................
[CV] max_iter=10 .....................................................
[CV] max_iter=10 .....................................................
[CV] max_iter=10 

[Parallel(n_jobs=20)]: Done   4 out of  25 | elapsed:   12.7s remaining:  1.1min


[CV] ................. max_iter=2, score=0.837209302326, total=  14.0s
[CV] max_iter=100 ....................................................
[CV] ................. max_iter=5, score=0.906976744186, total=  16.0s
[CV] ................. max_iter=5, score=0.756756756757, total=  17.8s
[CV] .................. max_iter=5, score=0.80487804878, total=  21.1s
[CV] ................. max_iter=5, score=0.833333333333, total=  21.6s
[CV] ................. max_iter=5, score=0.744186046512, total=  23.3s
[CV] ................ max_iter=10, score=0.860465116279, total=  29.8s
[CV] ................ max_iter=10, score=0.916666666667, total=  34.2s
[CV] ................ max_iter=10, score=0.756756756757, total=  37.6s


[Parallel(n_jobs=20)]: Done  13 out of  25 | elapsed:   37.7s remaining:   34.8s


[CV] ................ max_iter=10, score=0.744186046512, total=  40.5s
[CV] ................. max_iter=10, score=0.80487804878, total=  40.6s
[CV] ............... max_iter=100, score=0.780487804878, total= 2.8min
[CV] ................ max_iter=50, score=0.780487804878, total= 3.2min
[CV] ................ max_iter=50, score=0.837209302326, total= 3.8min
[CV] ............... max_iter=100, score=0.837209302326, total= 3.9min
[CV] ................ max_iter=50, score=0.888888888889, total= 5.0min
[CV] ............... max_iter=100, score=0.888888888889, total= 5.2min
[CV] ................ max_iter=50, score=0.744186046512, total= 6.5min


[Parallel(n_jobs=20)]: Done  22 out of  25 | elapsed:  6.5min remaining:   53.3s


[CV] ............... max_iter=100, score=0.744186046512, total= 6.4min
[CV] ................. max_iter=50, score=0.72972972973, total= 6.9min
[CV] ................ max_iter=100, score=0.72972972973, total= 6.7min


[Parallel(n_jobs=20)]: Done  25 out of  25 | elapsed:  7.0min finished


SSC Best parameters set found on development set:
()
{'max_iter': 2}
()
Grid scores on development set:
()
0.815 (+/-0.124) for {'max_iter': 2}
0.810 (+/-0.120) for {'max_iter': 5}
0.815 (+/-0.127) for {'max_iter': 10}
0.795 (+/-0.116) for {'max_iter': 50}
0.795 (+/-0.116) for {'max_iter': 100}
()
Detailed classification report:
()
The model is trained on the full development set.
The scores are computed on the full evaluation set.
()
             precision    recall  f1-score   support

        1.0       0.91      0.98      0.95       334
        2.0       0.94      0.99      0.96       273
        3.0       0.83      0.81      0.82       200
        4.0       0.78      0.90      0.83       177
        5.0       0.81      0.75      0.78       183
        6.0       0.88      0.71      0.78       154
        7.0       0.95      0.88      0.91       180
        8.0       0.89      0.93      0.91       170
        9.0       0.91      0.72      0.80       152
       10.0       0.80      0.

* **Compare with KNN**

In [101]:
print("######## Performance Test:  Labelled = ",X_train_labelled.shape, "  ###  Unlabelled = ", X_train_unlabelled.shape, "###########")
k=4

print("########  K = ",k, " ###########")

neigh = KNeighborsClassifier(n_neighbors=k)
print("USPS KNN : ",  cross_validate_test(neigh, X_test, y_test, cv=5))

lmnn = LargeMarginNearestNeighbor(max_iter=2)
print("USPS LMNN : ", cross_validate_test(lmnn, X_test, y_test, cv=5))

ssc = SemiSupervisedLargeMarginNearestNeighbor(X_unlabeled=X_train_unlabelled,max_iter=2)
print("USPS SSC : ", cross_validate_test(ssc, X_test, y_test, cv=5))


('######## Performance Test:  Labelled = ', (200, 33), '  ###  Unlabelled = ', (4000, 33), '###########')
('########  K = ', 4, ' ###########')
('###### Cross Validate Data Shape: ', (2000, 33))
('##### Average Performance: ', 0.9469860990669063)
('USPS KNN : ', [0.9454094292803971, 0.9478908188585607, 0.9575, 0.9422110552763819, 0.9419191919191919])
('###### Cross Validate Data Shape: ', (2000, 33))
('##### Average Performance: ', 0.9479835611246699)
('USPS LMNN : ', [0.9528535980148883, 0.9404466501240695, 0.9625, 0.9447236180904522, 0.9393939393939394])
('###### Cross Validate Data Shape: ', (2000, 33))
('##### Average Performance: ', 0.9449658970467043)
('USPS SSC : ', [0.9454094292803971, 0.9478908188585607, 0.9575, 0.9422110552763819, 0.9318181818181818])


***
### Test 2 : Find the Best max_itr for MNIST (Labeled: 200, Unlabeled: 4000, Test: 2000)
***
*  **LMNN **

In [107]:
mnist = fetch_mldata('MNIST original', data_home='ml_data/mnist')  #Download MMNIST
X_all, y_all = mnist.data, mnist.target
my_model = PCA(n_components=0.95, svd_solver='full')
X_all=my_model.fit_transform(X_all)

X_train, X_test, y_train, y_test = train_test_split(X_all, y_all, test_size=2000, stratify=y_all, random_state=42)
X_train_labelled, X_train_rest, y_train_labelled, y_train_rest = train_test_split(X_train, y_train, train_size=200, stratify=y_train, random_state=42)

sample_inds=np.random.randint(X_train_rest.shape[0], size=4000)
X_train_unlabelled=X_train_rest[sample_inds, :]

In [108]:
##### LMNN TUNNING MNIST ########  (Long execution!!)
lmnnclf = GridSearchCV(LargeMarginNearestNeighbor(), lmnn_param_grid, cv=5, n_jobs=20, verbose=3)
lmnnclf.fit(X_train_labelled, y_train_labelled)

print("LMNN Best parameters set found on development set:")
print()
print(lmnnclf.best_params_)
print()
print("Grid scores on development set:")
print()
means = lmnnclf.cv_results_['mean_test_score']
stds = lmnnclf.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, lmnnclf.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r"
          % (mean, std * 2, params))
print()

print("Detailed classification report:")
print()
print("The model is trained on the full development set.")
print("The scores are computed on the full evaluation set.")
print()
y_true, y_pred = y_test, lmnnclf.predict(X_test)
print(classification_report(y_true, y_pred))
print()

Fitting 5 folds for each of 5 candidates, totalling 25 fits
[CV] max_iter=2 ......................................................
[CV] max_iter=2 ......................................................
[CV] max_iter=2 ......................................................
[CV] max_iter=2 ......................................................
[CV] max_iter=5 ......................................................
[CV] max_iter=2 ......................................................
[CV] max_iter=5 ......................................................
[CV] max_iter=5 ......................................................
[CV] max_iter=5 ......................................................
[CV] max_iter=5 ......................................................
[CV] max_iter=10 .....................................................
[CV] max_iter=10 .....................................................
[CV] max_iter=10 .....................................................
[CV] max_iter=10 

[Parallel(n_jobs=20)]: Done   4 out of  25 | elapsed:   21.6s remaining:  1.9min


[CV] ................. max_iter=2, score=0.810810810811, total=  21.8s
[CV] max_iter=100 ....................................................
[CV] ................. max_iter=5, score=0.731707317073, total=  24.1s
[CV] ................. max_iter=5, score=0.658536585366, total=  27.5s
[CV] ................. max_iter=5, score=0.769230769231, total=  30.5s
[CV] ................. max_iter=5, score=0.666666666667, total=  32.1s
[CV] ................. max_iter=5, score=0.783783783784, total=  33.0s
[CV] ................ max_iter=10, score=0.609756097561, total=  33.8s
[CV] ................. max_iter=10, score=0.72972972973, total=  39.7s
[CV] ................ max_iter=10, score=0.658536585366, total=  40.5s


[Parallel(n_jobs=20)]: Done  13 out of  25 | elapsed:   40.9s remaining:   37.7s


[CV] ................ max_iter=10, score=0.595238095238, total=  41.0s
[CV] ................ max_iter=10, score=0.615384615385, total=  41.5s
[CV] ................ max_iter=50, score=0.333333333333, total= 1.6min
[CV] ................ max_iter=50, score=0.432432432432, total= 1.6min
[CV] ................ max_iter=50, score=0.341463414634, total= 1.6min
[CV] ................ max_iter=50, score=0.365853658537, total= 1.6min
[CV] ................ max_iter=50, score=0.452380952381, total= 1.7min
[CV] ............... max_iter=100, score=0.333333333333, total= 2.2min
[CV] ................ max_iter=100, score=0.19512195122, total= 2.3min


[Parallel(n_jobs=20)]: Done  22 out of  25 | elapsed:  2.6min remaining:   21.2s


[CV] ................ max_iter=100, score=0.25641025641, total= 2.2min
[CV] ............... max_iter=100, score=0.297297297297, total= 2.3min
[CV] ............... max_iter=100, score=0.317073170732, total= 2.3min


[Parallel(n_jobs=20)]: Done  25 out of  25 | elapsed:  2.6min finished


LMNN Best parameters set found on development set:
()
{'max_iter': 2}
()
Grid scores on development set:
()
0.730 (+/-0.124) for {'max_iter': 2}
0.720 (+/-0.102) for {'max_iter': 5}
0.640 (+/-0.096) for {'max_iter': 10}
0.385 (+/-0.097) for {'max_iter': 50}
0.280 (+/-0.100) for {'max_iter': 100}
()
Detailed classification report:
()
The model is trained on the full development set.
The scores are computed on the full evaluation set.
()
             precision    recall  f1-score   support

        0.0       0.79      0.96      0.87       197
        1.0       0.83      0.92      0.87       225
        2.0       0.81      0.74      0.78       200
        3.0       0.79      0.74      0.76       204
        4.0       0.86      0.67      0.76       195
        5.0       0.79      0.68      0.73       180
        6.0       0.84      0.91      0.88       197
        7.0       0.83      0.80      0.81       208
        8.0       0.79      0.53      0.63       195
        9.0       0.58      0

In [110]:
##### SSC LMNN TUNNING MNIST ######## (Long execution!!)
sscclf = GridSearchCV(SemiSupervisedLargeMarginNearestNeighbor(X_unlabeled=X_train_unlabelled), ssc_param_grid, cv=5, n_jobs=20, verbose=3)
sscclf.fit(X_train_labelled, y_train_labelled)

print("SSC Best parameters set found on development set:")
print()
print(sscclf.best_params_)
print()
print("Grid scores on development set:")
print()
means = sscclf.cv_results_['mean_test_score']
stds = sscclf.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, sscclf.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r"
          % (mean, std * 2, params))
print()

print("Detailed classification report:")
print()
print("The model is trained on the full development set.")
print("The scores are computed on the full evaluation set.")
print()
y_true, y_pred = y_test, sscclf.predict(X_test)
print(classification_report(y_true, y_pred))
print()

Fitting 5 folds for each of 5 candidates, totalling 25 fits
[CV] max_iter=2 ......................................................
[CV] max_iter=2 ......................................................
[CV] max_iter=2 ......................................................
[CV] max_iter=2 ......................................................
[CV] max_iter=5 ......................................................
[CV] max_iter=2 ......................................................
[CV] max_iter=5 ......................................................
[CV] max_iter=5 ......................................................
[CV] max_iter=5 ......................................................
[CV] max_iter=5 ......................................................
[CV] max_iter=10 .....................................................
[CV] max_iter=10 .....................................................
[CV] max_iter=10 .....................................................
[CV] max_iter=10 

[Parallel(n_jobs=20)]: Done   4 out of  25 | elapsed:   22.9s remaining:  2.0min


[CV] ................. max_iter=2, score=0.864864864865, total=  25.8s
[CV] max_iter=100 ....................................................
[CV] ................. max_iter=5, score=0.642857142857, total=  28.6s
[CV] ................. max_iter=5, score=0.731707317073, total=  34.5s
[CV] ................. max_iter=5, score=0.717948717949, total=  35.5s
[CV] ................. max_iter=5, score=0.707317073171, total=  36.3s
[CV] ................. max_iter=5, score=0.918918918919, total=  40.7s
[CV] ................ max_iter=10, score=0.642857142857, total=  44.0s
[CV] ................ max_iter=10, score=0.658536585366, total=  44.8s
[CV] ................ max_iter=10, score=0.682926829268, total=  46.4s


[Parallel(n_jobs=20)]: Done  13 out of  25 | elapsed:   46.8s remaining:   43.2s


[CV] ................ max_iter=10, score=0.589743589744, total=  51.3s
[CV] ................ max_iter=10, score=0.864864864865, total=  59.5s
[CV] ................ max_iter=50, score=0.619047619048, total= 2.4min
[CV] ............... max_iter=100, score=0.536585365854, total= 2.5min
[CV] ................ max_iter=50, score=0.538461538462, total= 2.9min
[CV] ................ max_iter=50, score=0.536585365854, total= 2.9min
[CV] ................ max_iter=50, score=0.560975609756, total= 3.0min
[CV] ............... max_iter=100, score=0.538461538462, total= 2.7min
[CV] ................ max_iter=50, score=0.513513513514, total= 3.6min


[Parallel(n_jobs=20)]: Done  22 out of  25 | elapsed:  3.6min remaining:   29.2s


[CV] ............... max_iter=100, score=0.595238095238, total= 3.5min
[CV] ............... max_iter=100, score=0.560975609756, total= 4.0min
[CV] ............... max_iter=100, score=0.513513513514, total= 5.4min


[Parallel(n_jobs=20)]: Done  25 out of  25 | elapsed:  5.8min finished


SSC Best parameters set found on development set:
()
{'max_iter': 5}
()
Grid scores on development set:
()
0.720 (+/-0.142) for {'max_iter': 2}
0.740 (+/-0.181) for {'max_iter': 5}
0.685 (+/-0.182) for {'max_iter': 10}
0.555 (+/-0.072) for {'max_iter': 50}
0.550 (+/-0.055) for {'max_iter': 100}
()
Detailed classification report:
()
The model is trained on the full development set.
The scores are computed on the full evaluation set.
()
             precision    recall  f1-score   support

        0.0       0.80      0.94      0.86       197
        1.0       0.84      0.92      0.88       225
        2.0       0.74      0.76      0.75       200
        3.0       0.83      0.77      0.80       204
        4.0       0.70      0.69      0.70       195
        5.0       0.85      0.67      0.75       180
        6.0       0.82      0.88      0.85       197
        7.0       0.85      0.76      0.80       208
        8.0       0.79      0.64      0.71       195
        9.0       0.56      0.

In [111]:
print("######## Performance Test:  Labelled = ",X_train_labelled.shape, "  ###  Unlabelled = ", X_train_unlabelled.shape, "###########")
k=4

print("########  K = ",k, " ###########")

neigh = KNeighborsClassifier(n_neighbors=k)
print("MNIST KNN : ",  cross_validate_test(neigh, X_test, y_test, cv=5))

lmnn = LargeMarginNearestNeighbor(max_iter=2)
print("MNIST LMNN : ", cross_validate_test(lmnn, X_test, y_test, cv=5))

ssc = SemiSupervisedLargeMarginNearestNeighbor(X_unlabeled=X_train_unlabelled,max_iter=2)
print("MNIST SSC : ", cross_validate_test(ssc, X_test, y_test, cv=5))

('######## Performance Test:  Labelled = ', (200, 154), '  ###  Unlabelled = ', (4000, 154), '###########')
('########  K = ', 4, ' ###########')
('###### Cross Validate Data Shape: ', (2000, 154))
('##### Average Performance: ', 0.9009826515139308)
('MNIST KNN : ', [0.8880597014925373, 0.9154228855721394, 0.9075, 0.9047619047619048, 0.889168765743073])
('###### Cross Validate Data Shape: ', (2000, 154))
('##### Average Performance: ', 0.9070078654472944)
('MNIST LMNN : ', [0.8930348258706468, 0.9129353233830846, 0.915, 0.9072681704260651, 0.906801007556675])
('###### Cross Validate Data Shape: ', (2000, 154))
('##### Average Performance: ', 0.8910212559576692)
('MNIST SSC : ', [0.8731343283582089, 0.8930348258706468, 0.9025, 0.8922305764411027, 0.8942065491183879])
