# Cross Validation & Performance Evaluation

In [1]:
import torch
import numpy as np
import sklearn
import matplotlib.pyplot as plt  
import json
from pathlib import Path

In [2]:
%matplotlib inline

## Collecting data

In [3]:
#Models that are available for testing:
!ls data/

blacklist.txt		    moco32_phase75   simclr_phase25
class_occurences.json	    moco64	     simclr_phase50
gallery_paths_relative.txt  moco64_phase0    simclr_phase75
gallery_paths.txt	    moco64_phase25   strict_train_test.json
imgnet_pretrained	    moco64_phase50   swav
jigsaw			    moco64_phase75   swav_phase0
jigsaw_phase0		    random	     swav_phase25
jigsaw_phase100		    rotnet	     swav_phase50
jigsaw_phase25		    rotnet_phase0    swav_phase75
jigsaw_phase50		    rotnet_phase100  total_cross_val_log_mlp.txt
jigsaw_phase75		    rotnet_phase25   total_cross_val_log_svm.txt
moco32			    rotnet_phase50   total_gallery_mAP_scores_log.txt
moco32_phase0		    rotnet_phase75   total_perf_eval_log_mlp.txt
moco32_phase25		    simclr	     total_perf_eval_log_svm.txt
moco32_phase50		    simclr_phase0


In [4]:
#options rotnet, jigsaw, simclr, moco32, imgnet_pretrained
model_name = "jigsaw" #<----------Specifiy model here!

dir = Path("data/" + model_name)

embedding_gallery = torch.load(dir / "embedding_gallery.torch")
embedding_gallery_norm = torch.load(dir / "embedding_gallery_norm.torch")
labels = list()
with open(dir / "embedding_gallery_labels.txt", "r") as f:
    labels = f.read().splitlines()
print(f"Data for model {model_name} succesfully read.\nembedding_gallery size: {embedding_gallery.shape}\
\nembedding_gallery_norm size: {embedding_gallery_norm.shape}\nlabels length: {len(labels)}")

Data for model jigsaw succesfully read.
embedding_gallery size: torch.Size([1643, 2048])
embedding_gallery_norm size: torch.Size([1643, 2048])
labels length: 1643


In [5]:
#Convert to numpy arrays:
embedding_gallery = embedding_gallery.numpy()
embedding_gallery_norm = embedding_gallery_norm.numpy()
labels = np.array(labels)
print(f"embedding_gallery size: {embedding_gallery.shape} and type {type(embedding_gallery)}\
\nembedding_gallery_norm size: {embedding_gallery_norm.shape} and type {type(embedding_gallery_norm)}\
\nLabels with size {labels.shape} and type {type(labels)}")

embedding_gallery size: (1643, 2048) and type <class 'numpy.ndarray'>
embedding_gallery_norm size: (1643, 2048) and type <class 'numpy.ndarray'>
Labels with size (1643,) and type <class 'numpy.ndarray'>


## Modelling

In [6]:
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

Let's test the performance of a support vector machine (SVM) and a multi layer perceptron (MLP) on the entire data:

In [None]:
#Create SVM estimator 
svm_overfit = SVC(kernel="poly")

In [None]:
#Create MLP estimator
mlp_overfit = MLPClassifier(
    hidden_layer_sizes=(256, 64),
    solver='lbfgs',
    max_iter=10_000
)

In [7]:
#the dat to train/fit on 
data = embedding_gallery_norm.copy()
#labels are already in a numpy array called "labels"

Training/fitting on the data and labels:

In [None]:
#train SVM
svm_overfit.fit(data, labels)

In [None]:
#train MLP
mlp_overfit.fit(data, labels)

Evaluate on the same data (this is overfitting, but should yield high results and verify that the data is correct)

In [None]:
#evaluate SVM
accuracy = svm_overfit.score(data, labels) 
print(f"Accuracy svm: {accuracy*100:.1f}%\n")

In [None]:
#evaluate MLP
accuracy = mlp_overfit.score(data, labels) 
print(f"Accuracy mlp: {accuracy*100:.1f}%\n")

## cross validation

In [8]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold, StratifiedKFold, ShuffleSplit, StratifiedShuffleSplit

In [9]:
def cross_validate(classifier, fold, data, labels):
    """
    Performs a cross validation for the classifier with a train test split
    according to fold generator.
    
    Args:
        classifier: the classifier to cross validate.
        fold: the generator to use for train-test splits.
        data: the embedding gallery to train/test on.
        labels: the ground truth labels per row for the gallery.
        
    Returns:
        The mean accuracy score from the cross validation rounds and
        the standard deviation of the scores.
    """
    #cross validation generator
    cv = fold.split(data, y=labels)

    #calc cross validation
    scores = cross_val_score(
        classifier,   #the estimator/classifier
        data,         #data 
        y=labels,     #targets
        cv=cv         #generator for array indicis in data that select a certain split out of "data"
    )
    print(scores)
    accuracy = scores.mean()
    std = scores.std()
    return accuracy, std

Now let's create an svm and mlp that we don't overfit. We will test it's ability to generalize by performing cross validation.

In [None]:
#Create SVM estimator 
svm_cross = SVC(kernel="poly")

In [None]:
#Create MLP estimator
mlp_cross = MLPClassifier(
    hidden_layer_sizes=(128, 64),
    solver='lbfgs',
    max_iter=10_000
)

Create cross validation generators for train-test splits:

In [None]:
iterators = { 
    "KFold" : KFold(n_splits=5), 
    "StratifiedKFold" : StratifiedKFold(n_splits=5), 
    "ShuffleSplit" : ShuffleSplit(n_splits=5, test_size=0.2, random_state=0)
}

for k in iterators.keys():
    print(f"Generator/iterator used: {k}")
    iterator = iterators[k]
    for i, (train_index, test_index) in enumerate(iterator.split(data, y=labels)):
        print(f"Fold {i}:")
        print(f"  Train: index[:5] {train_index[:5]} with shape{train_index.shape}")
        print(f"  Test:  index[:5] {test_index[:5]} with shape{test_index.shape}")
    print()

In [None]:
#Create KFold generator
#5 folds with 1/5 test data and 4/5 training data to cross validate
kf = KFold(n_splits=5)

In [10]:
#Create StratifiedKFold generator
#5 folds with 1/5 test data and 4/5 training data to cross validate. 
#The folds have equal class distribution in this case
skf = StratifiedKFold(n_splits=5)

In [None]:
#Create ShuffleSplit generator
#samples are shuffled and then split up in a test and train set
ss = ShuffleSplit(n_splits=5, test_size=0.2, random_state=0)

Perform the cross validations on svm and mlp

### SVM cross validation

In [None]:
#with KFold
accuracy, std = cross_validate(svm_cross, kf, data, labels)
print(f"Support Vector Machine (SVM) with KFold:\nAccuracy: {accuracy*100:.1f}% \nstd_dev: {std}\n")

In [None]:
#with StratifiedKFold
accuracy, std = cross_validate(svm_cross, skf, data, labels)
print(f"Support Vector Machine (SVM) with StratifiedKFold:\nAccuracy: {accuracy*100:.1f}% \nstd_dev: {std}\n")

In [None]:
#with ShuffleSplit
accuracy, std = cross_validate(svm_cross, ss, data, labels)
print(f"Support Vector Machine (SVM) with ShuffleSplit:\nAccuracy: {accuracy*100:.1f}% \nstd_dev: {std}\n")

### MLP cross validation

In [None]:
#with KFold
accuracy, std = cross_validate(mlp_cross, kf, data, labels)
print(f"Multi Layer Perceptron (MLP) with KFold:\nAccuracy: {accuracy*100:.1f}% \nstd_dev: {std}\n")

In [None]:
#with StratifiedKFold
accuracy, std = cross_validate(mlp_cross, skf, data, labels)
print(f"Multi Layer Perceptron (MLP) with StratifiedKFold:\nAccuracy: {accuracy*100:.1f}% \nstd_dev: {std}\n")

In [None]:
#with ShuffleSplit
accuracy, std = cross_validate(mlp_cross, ss, data, labels)
print(f"Multi Layer Perceptron (MLP) with ShuffleSplit:\nAccuracy: {accuracy*100:.1f}% \nstd_dev: {std}\n")

## Performance Assessment 
In this section we will do the final test.
- Split up the data from cornershop in train (80%) and test (20%) data.
- Cross validate on the training set.
- Fit on the training set.
- Test on the testing set.

In [11]:
from sklearn.metrics import confusion_matrix

In [12]:
def eval_performance(classifier, train_set, train_labels, test_set, test_labels):
    """
    Evaluate performance for the given estimator/classifier.
    Performance is measured by:
        -Training on the train_set and train_labels
        -Measuring accuracy for classification on the test_set with the test_labels
        -Calculate a confusion matrix
    
    Args:
        classifier : the classifier to evaluate.
        train_set (np.ndarray): the data to train on.
        train_labels (np.ndarray): ground truth for training data.
        test_set (np.ndarray): the data to test on.
        test_labels (np.ndarray): ground truth for testing data.
        
    Returns:
        accuracy : accuracy score from predictions on test set.
        cm: The confusion matrix.
    """
    #train on training set
    classifier.fit(train_set, train_labels) #retrain on training set
    
    #Calculate accuracy
    accuracy = classifier.score(test_set, test_labels) #calc acc score on testing set
    
    #Calculate confusion matrix
    y_true = test_labels.copy() #Ground truth
    y_pred = classifier.predict(test_set) #Predictions made by classifier
    proba = classifier.predict_proba(test_set)
    cls = classifier.classes_
    print(f"shape proba is {proba.shape}\n examples {proba[:4]}")
    print(f"sgapr classes {cls.shape}, examples {cls[:4]}")
    print(f"shape test is {test_set.shape}\n examples {test_set[:4]}")
    print(f"amount of classes in test set {len(np.unique(test_labels))}\n")
    print(f"shape y_pred is {y_pred.shape}\n examples {y_pred[:4]}")
    #compute confusion matrix
    cm = confusion_matrix(y_true=y_true, y_pred=y_pred)
    #predicted class is the column nr, ground truth is the row nr
    #correct classifications can be found on the diagonal

    return accuracy, cm

### Train test split

In [13]:
train_set, test_set, train_labels, test_labels= train_test_split(
    data,          #data
    labels,        #targets
    test_size=0.2, #20% test set, 80% train set
    random_state=0 #for reproducable results of the random shuffling
)

In [14]:
#our training set looks like this
print(f"train set\n{train_set[:5]} \n\ntrain_labels \n{train_labels[:5]}")

train set
[[0.01895176 0.00473489 0.02060694 ... 0.00446219 0.016622   0.00592604]
 [0.00849075 0.00248358 0.04607142 ... 0.00773716 0.00650234 0.00625237]
 [0.04192992 0.00253347 0.00672528 ... 0.00556186 0.01111727 0.01544235]
 [0.01102807 0.00483616 0.02628589 ... 0.00268133 0.0317217  0.0088753 ]
 [0.04707882 0.00090549 0.02136604 ... 0.00441235 0.01279342 0.00712709]] 

train_labels 
['MayTeaFramboos1L' 'MonsterPipelinePunch500ml' 'AquariusOrangeFles'
 'MonsterUltra' 'PepsiMax']


In [15]:
#our test set looks like this
print(f"test set\n{test_set[:5]} \n\ntest_labels \n{test_labels[:5]}")

test set
[[0.03409375 0.00510009 0.01644533 ... 0.00527204 0.01750043 0.01589694]
 [0.07201073 0.00386802 0.01132445 ... 0.00936174 0.01310913 0.01728138]
 [0.01418458 0.00590722 0.02930187 ... 0.00166937 0.03630792 0.00223531]
 [0.03435394 0.00296128 0.02193487 ... 0.00518328 0.01125729 0.00957684]
 [0.02654687 0.00958286 0.02149335 ... 0.00516155 0.02269565 0.01081691]] 

test_labels 
['InnocentOrangeWithBits' 'CapriSun' 'MonsterPunchEnergy500ml'
 'NaluOriginal6x350ml' 'StellaBlik50cl']


In [16]:
#Proportion of training data
prop_train = (len(train_set)/ len(embedding_gallery_norm) ) * 100
#Proportion of testing data
prop_test = (len(test_set)/ len(embedding_gallery_norm) ) * 100
print(f"Size of the training set {len(train_set)}\nThe training set contains {prop_train}% of the data\n")
print(f"Size of the test set {len(test_set)}\nThe test set contains {prop_test}% of the data")

Size of the training set 1314
The training set contains 79.97565429093122% of the data

Size of the test set 329
The test set contains 20.02434570906878% of the data


### Cross validation on the training set

#### Support Vector Machine (SVM)

In [None]:
#Create SVM estimator 
svm_test = SVC(kernel="poly", probability=True)

In [None]:
#Create StratifiedKFold generator
#5 folds with 1/5 test data and 4/5 training data to cross validate. 
#The folds have equal class distribution in this case
skf = StratifiedKFold(n_splits=5)

In [None]:
#Perform the cross validation
accuracy, std = cross_validate(svm_test, skf, train_set, train_labels)
print(f"Support Vector Machine (SVM) with StratifiedKFold:\nAccuracy: {accuracy*100:.1f}% \nstd_dev: {std}\n")

#### Multi Layer Perceptron (MLP)

In [25]:
#Create MLP estimator
mlp_test = MLPClassifier(
    hidden_layer_sizes=(128, 64),
    solver='adam',
    max_iter=10_000,
    alpha=0.1
)

In [26]:
#Create StratifiedKFold generator
#5 folds with 1/5 test data and 4/5 training data to cross validate. 
#The folds have equal class distribution in this case
skf = StratifiedKFold(n_splits=5)

In [27]:
%%timeit -n 1 -r 1 #time cell excecution by running it only once
#Perform the cross validation
accuracy, std = cross_validate(mlp_test, skf, train_set, train_labels)
print(f"Multi Layer Perceptron (MLP) with StratifiedKFold:\nAccuracy: {accuracy*100:.1f}% \nstd_dev: {std}\n")



[0.36121673 0.31939163 0.37642586 0.3460076  0.38167939]
Multi Layer Perceptron (MLP) with StratifiedKFold:
Accuracy: 35.7% 
std_dev: 0.022536296341990447

53min 25s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


### Testing on the test set

#### Suport Vector Machine (SVM)

In [None]:
accuracy, cm = eval_performance(svm_test, train_set, train_labels, test_set, test_labels)
print(f"Support Vector Machine (SVM):\nAccuracy: {accuracy*100:.1f}% \nconfusion_matrix: \n{cm}\n")

#### Multi Layer Perceptron (MLP)

In [28]:
%%timeit -n 1 -r 1 #time cell excecution by running it only once
accuracy, cm = eval_performance(mlp_test, train_set, train_labels, test_set, test_labels)
print(f"Multi Layer Perceptron:\nAccuracy: {accuracy*100:.1f}% \nconfusion_matrix: \n{cm}\n")

shape proba is (329, 384)
 examples [[2.3103273e-12 6.4883759e-05 1.0526129e-06 ... 1.1853646e-22
  3.2502754e-23 1.1853857e-17]
 [4.4983238e-08 2.8570175e-09 1.5529840e-09 ... 1.3620998e-12
  3.7347414e-28 6.1453563e-32]
 [3.2096055e-23 3.4481753e-16 4.8504940e-06 ... 5.5487176e-06
  3.4585889e-12 2.4252001e-13]
 [5.9315391e-12 9.4572388e-06 1.5895315e-11 ... 7.2373348e-30
  5.5471451e-25 7.4631280e-19]]
sgapr classes (384,), examples ['7upFree' '7upLemon' '7upMojito' 'AADrink']
shape test is (329, 2048)
 examples [[0.03409375 0.00510009 0.01644533 ... 0.00527204 0.01750043 0.01589694]
 [0.07201073 0.00386802 0.01132445 ... 0.00936174 0.01310913 0.01728138]
 [0.01418458 0.00590722 0.02930187 ... 0.00166937 0.03630792 0.00223531]
 [0.03435394 0.00296128 0.02193487 ... 0.00518328 0.01125729 0.00957684]]
amount of classes in test set 194

shape y_pred is (329,)
 examples ['InnocentOrangeWithBits' 'CapriSun' 'CocaColaFles500ml'
 'FuzeTeaMangoChamomileFles4x400ml']
Multi Layer Perceptron:


## Strict Peformance Assessment 
In this section we will do the final test with a sticter test set.
This means that every embedding in the test set is from a different origin image than every embedding from the same class in the train set.
- Collect the split from a json file which aims to achieve a 80/20 train/test split.
- Cross validate on the strict training set.
- Fit on the strict training set.
- Test on the strict testing set.

### Train test split

In [None]:
json_file = Path("data/strict_train_test.json")
# Read the JSON file
with open(json_file, "r") as file:
    json_data = file.read()

# Convert JSON to dictionary
strict_train_test = json.loads(json_data)

# Access the dictionary
print(f"Amount of classes that can be used for strict testing: {len(strict_train_test.keys())} / {len(np.unique(labels))}")

Lets set up the strict test and train set <br>
We have a copy of embedding gallery norm in `data` and the corresponding labels are in the variable `labels`. <br>
We have the indicis of embeddings in the gallery already determined in the json file, let's apply them to get our train and test set:

In [None]:
train_set_strict = []
train_labels_strict = [] 
test_set_strict = []
test_labels_strict = []
for cls in strict_train_test.keys():
    #Collect train queries
    train_queries = strict_train_test[cls]["train"]
    for query in train_queries:
        idx = query["gallery_idx"]
        train_query = data[idx]
        train_label = labels[idx]
        train_set_strict.append(train_query)
        train_labels_strict.append(train_label)
    
    #Collect test queries
    test_queries = strict_train_test[cls]["test"]
    for query in test_queries:
        idx = query["gallery_idx"]
        test_query = data[idx]
        test_label = labels[idx]
        test_set_strict.append(test_query)
        test_labels_strict.append(test_label)
        

In [None]:
train_set_strict = np.array(train_set_strict)
test_set_strict = np.array(test_set_strict)

In [None]:
#our training set looks like this
print(f"train set\n{train_set_strict[:5]} \n\ntrain_labels \n{train_labels_strict[:5]}")

In [None]:
#our test set looks like this
print(f"test set\n{test_set_strict[:5]} \n\ntest_labels \n{test_labels_strict[:5]}")

In [None]:
#Proportion of training data
prop_train = (len(train_set_strict)/ (len(train_set_strict) + len(test_set_strict)) ) * 100
#Proportion of testing data
prop_test = (len(test_set_strict)/ (len(train_set_strict) + len(test_set_strict)) ) * 100
print(f"Size of the training set {len(train_set_strict)}\nThe training set contains {prop_train}% of the data\n")
print(f"Size of the test set {len(test_set_strict)}\nThe test set contains {prop_test}% of the data")

### Cross validation on the training set

#### Support Vector Machine (SVM)

In [None]:
#Create SVM estimator 
svm_test_strict = SVC(kernel="poly")

In [None]:
#Create StratifiedKFold generator
#5 folds with 1/5 test data and 4/5 training data to cross validate. 
#The folds have equal class distribution in this case
skf = StratifiedKFold(n_splits=5)

In [None]:
#Perform the cross validation
accuracy, std = cross_validate(svm_test_strict, skf, train_set_strict, train_labels_strict)
print(f"Support Vector Machine (SVM) with StratifiedKFold:\nAccuracy: {accuracy*100:.1f}% \nstd_dev: {std}\n")

#### Multi Layer Perceptron (MLP)

In [None]:
#Create MLP estimator
mlp_test_strict = MLPClassifier(
    hidden_layer_sizes=(256, 64),
    solver='lbfgs',
    max_iter=10_000
)

In [None]:
#Create StratifiedKFold generator
#5 folds with 1/5 test data and 4/5 training data to cross validate. 
#The folds have equal class distribution in this case
skf = StratifiedKFold(n_splits=5)

In [None]:
#Perform the cross validation
accuracy, std = cross_validate(mlp_test_strict, skf, train_set_strict, train_labels_strict)
print(f"Multi Layer Perceptron (MLP) with StratifiedKFold:\nAccuracy: {accuracy*100:.1f}% \nstd_dev: {std}\n")

### Testing on the strict test set

#### Support Vector Machine (SVM)

In [None]:
accuracy, cm = eval_performance(
    svm_test_strict, 
    train_set_strict, 
    train_labels_strict, 
    test_set_strict, 
    test_labels_strict
)
print(f"Support Vector Machine (SVM):\nAccuracy: {accuracy*100:.1f}% \nconfusion_matrix: \n{cm}\n")

#### Multi Layer Perceptron (MLP)

In [None]:
accuracy, cm = eval_performance(
    mlp_test_strict, 
    train_set_strict, 
    train_labels_strict, 
    test_set_strict, 
    test_labels_strict
)
print(f"Multi Layer Perceptron:\nAccuracy: {accuracy*100:.1f}% \nconfusion_matrix: \n{cm}\n")