# Cross Validation & Performance Evaluation

In [1]:
import torch
import numpy as np
import sklearn
import matplotlib.pyplot as plt  
import json
from pathlib import Path

In [2]:
%matplotlib inline

## Collecting data

In [3]:
#Models that are available for testing:
!ls data/

blacklist.txt		    moco32_phase75   simclr_phase25
class_occurences.json	    moco64	     simclr_phase50
gallery_paths_relative.txt  moco64_phase0    simclr_phase75
gallery_paths.txt	    moco64_phase25   strict_train_test.json
imgnet_pretrained	    moco64_phase50   swav
jigsaw			    moco64_phase75   swav_phase0
jigsaw_phase0		    random	     swav_phase25
jigsaw_phase100		    rotnet	     swav_phase50
jigsaw_phase25		    rotnet_phase0    swav_phase75
jigsaw_phase50		    rotnet_phase100  total_cross_val_log_mlp.txt
jigsaw_phase75		    rotnet_phase25   total_cross_val_log_svm.txt
moco32			    rotnet_phase50   total_gallery_mAP_scores_log.txt
moco32_phase0		    rotnet_phase75   total_perf_eval_log_mlp.txt
moco32_phase25		    simclr	     total_perf_eval_log_svm.txt
moco32_phase50		    simclr_phase0


In [4]:
#options rotnet, jigsaw, simclr, moco32, imgnet_pretrained
model_name = "moco64" #<----------Specifiy model here!

dir = Path("data/" + model_name)

embedding_gallery = torch.load(dir / "embedding_gallery.torch")
embedding_gallery_norm = torch.load(dir / "embedding_gallery_norm.torch")
labels = list()
with open(dir / "embedding_gallery_labels.txt", "r") as f:
    labels = f.read().splitlines()
print(f"Data for model {model_name} succesfully read.\nembedding_gallery size: {embedding_gallery.shape}\
\nembedding_gallery_norm size: {embedding_gallery_norm.shape}\nlabels length: {len(labels)}")

Data for model moco64 succesfully read.
embedding_gallery size: torch.Size([1643, 2048])
embedding_gallery_norm size: torch.Size([1643, 2048])
labels length: 1643


In [5]:
#Convert to numpy arrays:
embedding_gallery = embedding_gallery.numpy()
embedding_gallery_norm = embedding_gallery_norm.numpy()
labels = np.array(labels)
print(f"embedding_gallery size: {embedding_gallery.shape} and type {type(embedding_gallery)}\
\nembedding_gallery_norm size: {embedding_gallery_norm.shape} and type {type(embedding_gallery_norm)}\
\nLabels with size {labels.shape} and type {type(labels)}")

embedding_gallery size: (1643, 2048) and type <class 'numpy.ndarray'>
embedding_gallery_norm size: (1643, 2048) and type <class 'numpy.ndarray'>
Labels with size (1643,) and type <class 'numpy.ndarray'>


## Modelling

In [6]:
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

Let's test the performance of a support vector machine (SVM) and a multi layer perceptron (MLP) on the entire data:

In [7]:
#Create SVM estimator 
svm_overfit = SVC(kernel="poly")

In [8]:
#Create MLP estimator
mlp_overfit = MLPClassifier(
    hidden_layer_sizes=(256, 64),
    solver='lbfgs',
    max_iter=10_000
)

In [9]:
#the dat to train/fit on 
data = embedding_gallery_norm.copy()
#labels are already in a numpy array called "labels"

Training/fitting on the data and labels:

In [10]:
#train SVM
svm_overfit.fit(data, labels)

SVC(kernel='poly')

In [11]:
#train MLP
mlp_overfit.fit(data, labels)

MLPClassifier(hidden_layer_sizes=(256, 64), max_iter=10000, solver='lbfgs')

Evaluate on the same data (this is overfitting, but should yield high results and verify that the data is correct)

In [12]:
#evaluate SVM
accuracy = svm_overfit.score(data, labels) 
print(f"Accuracy svm: {accuracy*100:.1f}%\n")

Accuracy svm: 99.9%



In [13]:
#evaluate MLP
accuracy = mlp_overfit.score(data, labels) 
print(f"Accuracy mlp: {accuracy*100:.1f}%\n")

Accuracy mlp: 100.0%



## cross validation

In [14]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold, StratifiedKFold, ShuffleSplit, StratifiedShuffleSplit

In [15]:
def cross_validate(classifier, fold, data, labels):
    """
    Performs a cross validation for the classifier with a train test split
    according to fold generator.
    
    Args:
        classifier: the classifier to cross validate.
        fold: the generator to use for train-test splits.
        data: the embedding gallery to train/test on.
        labels: the ground truth labels per row for the gallery.
        
    Returns:
        The mean accuracy score from the cross validation rounds and
        the standard deviation of the scores.
    """
    #cross validation generator
    cv = fold.split(data, y=labels)

    #calc cross validation
    scores = cross_val_score(
        classifier,   #the estimator/classifier
        data,         #data 
        y=labels,     #targets
        cv=cv         #generator for array indicis in data that select a certain split out of "data"
    )
    print(scores)
    accuracy = scores.mean()
    std = scores.std()
    return accuracy, std

Now let's create an svm and mlp that we don't overfit. We will test it's ability to generalize by performing cross validation.

In [16]:
#Create SVM estimator 
svm_cross = SVC(kernel="poly")

In [17]:
#Create MLP estimator
mlp_cross = MLPClassifier(
    hidden_layer_sizes=(128, 64),
    solver='lbfgs',
    max_iter=10_000
)

Create cross validation generators for train-test splits:

In [18]:
iterators = { 
    "KFold" : KFold(n_splits=5), 
    "StratifiedKFold" : StratifiedKFold(n_splits=5), 
    "ShuffleSplit" : ShuffleSplit(n_splits=5, test_size=0.2, random_state=0)
}

for k in iterators.keys():
    print(f"Generator/iterator used: {k}")
    iterator = iterators[k]
    for i, (train_index, test_index) in enumerate(iterator.split(data, y=labels)):
        print(f"Fold {i}:")
        print(f"  Train: index[:5] {train_index[:5]} with shape{train_index.shape}")
        print(f"  Test:  index[:5] {test_index[:5]} with shape{test_index.shape}")
    print()

Generator/iterator used: KFold
Fold 0:
  Train: index[:5] [329 330 331 332 333] with shape(1314,)
  Test:  index[:5] [0 1 2 3 4] with shape(329,)
Fold 1:
  Train: index[:5] [0 1 2 3 4] with shape(1314,)
  Test:  index[:5] [329 330 331 332 333] with shape(329,)
Fold 2:
  Train: index[:5] [0 1 2 3 4] with shape(1314,)
  Test:  index[:5] [658 659 660 661 662] with shape(329,)
Fold 3:
  Train: index[:5] [0 1 2 3 4] with shape(1315,)
  Test:  index[:5] [987 988 989 990 991] with shape(328,)
Fold 4:
  Train: index[:5] [0 1 2 3 4] with shape(1315,)
  Test:  index[:5] [1315 1316 1317 1318 1319] with shape(328,)

Generator/iterator used: StratifiedKFold
Fold 0:
  Train: index[:5] [1 2 3 5 6] with shape(1314,)
  Test:  index[:5] [ 0  4  8 11 20] with shape(329,)
Fold 1:
  Train: index[:5] [0 2 3 4 6] with shape(1314,)
  Test:  index[:5] [ 1  5 12 16 22] with shape(329,)
Fold 2:
  Train: index[:5] [0 1 3 4 5] with shape(1314,)
  Test:  index[:5] [ 2  6 13 17 23] with shape(329,)
Fold 3:
  Train: 



In [19]:
#Create KFold generator
#5 folds with 1/5 test data and 4/5 training data to cross validate
kf = KFold(n_splits=5)

In [20]:
#Create StratifiedKFold generator
#5 folds with 1/5 test data and 4/5 training data to cross validate. 
#The folds have equal class distribution in this case
skf = StratifiedKFold(n_splits=5)

In [21]:
#Create ShuffleSplit generator
#samples are shuffled and then split up in a test and train set
ss = ShuffleSplit(n_splits=5, test_size=0.2, random_state=0)

Perform the cross validations on svm and mlp

### SVM cross validation

In [22]:
#with KFold
accuracy, std = cross_validate(svm_cross, kf, data, labels)
print(f"Support Vector Machine (SVM) with KFold:\nAccuracy: {accuracy*100:.1f}% \nstd_dev: {std}\n")

[0.         0.         0.01823708 0.01829268 0.        ]
Support Vector Machine (SVM) with KFold:
Accuracy: 0.7% 
std_dev: 0.008947945740517962



In [23]:
#with StratifiedKFold
accuracy, std = cross_validate(svm_cross, skf, data, labels)
print(f"Support Vector Machine (SVM) with StratifiedKFold:\nAccuracy: {accuracy*100:.1f}% \nstd_dev: {std}\n")



[0.69908815 0.71732523 0.68389058 0.72865854 0.67378049]
Support Vector Machine (SVM) with StratifiedKFold:
Accuracy: 70.1% 
std_dev: 0.02033606771573935



In [24]:
#with ShuffleSplit
accuracy, std = cross_validate(svm_cross, ss, data, labels)
print(f"Support Vector Machine (SVM) with ShuffleSplit:\nAccuracy: {accuracy*100:.1f}% \nstd_dev: {std}\n")

[0.65957447 0.6231003  0.65653495 0.64741641 0.69300912]
Support Vector Machine (SVM) with ShuffleSplit:
Accuracy: 65.6% 
std_dev: 0.02253343828918012



### MLP cross validation

In [25]:
#with KFold
accuracy, std = cross_validate(mlp_cross, kf, data, labels)
print(f"Multi Layer Perceptron (MLP) with KFold:\nAccuracy: {accuracy*100:.1f}% \nstd_dev: {std}\n")

[0.         0.         0.01823708 0.0152439  0.        ]
Multi Layer Perceptron (MLP) with KFold:
Accuracy: 0.7% 
std_dev: 0.008255573380298196



In [26]:
#with StratifiedKFold
accuracy, std = cross_validate(mlp_cross, skf, data, labels)
print(f"Multi Layer Perceptron (MLP) with StratifiedKFold:\nAccuracy: {accuracy*100:.1f}% \nstd_dev: {std}\n")



[0.51975684 0.53495441 0.46200608 0.55182927 0.48170732]
Multi Layer Perceptron (MLP) with StratifiedKFold:
Accuracy: 51.0% 
std_dev: 0.03338108895221815



In [27]:
#with ShuffleSplit
accuracy, std = cross_validate(mlp_cross, ss, data, labels)
print(f"Multi Layer Perceptron (MLP) with ShuffleSplit:\nAccuracy: {accuracy*100:.1f}% \nstd_dev: {std}\n")

[0.48024316 0.47720365 0.49544073 0.43465046 0.48632219]
Multi Layer Perceptron (MLP) with ShuffleSplit:
Accuracy: 47.5% 
std_dev: 0.021005656587441947



## Performance Assessment 
In this section we will do the final test.
- Split up the data from cornershop in train (80%) and test (20%) data.
- Cross validate on the training set.
- Fit on the training set.
- Test on the testing set.

In [28]:
from sklearn.metrics import confusion_matrix

In [29]:
def eval_performance(classifier, train_set, train_labels, test_set, test_labels):
    """
    Evaluate performance for the given estimator/classifier.
    Performance is measured by:
        -Training on the train_set and train_labels
        -Measuring accuracy for classification on the test_set with the test_labels
        -Calculate a confusion matrix
    
    Args:
        classifier : the classifier to evaluate.
        train_set (np.ndarray): the data to train on.
        train_labels (np.ndarray): ground truth for training data.
        test_set (np.ndarray): the data to test on.
        test_labels (np.ndarray): ground truth for testing data.
        
    Returns:
        accuracy : accuracy score from predictions on test set.
        cm: The confusion matrix.
    """
    #train on training set
    classifier.fit(train_set, train_labels) #retrain on training set
    
    #Calculate accuracy
    accuracy = classifier.score(test_set, test_labels) #calc acc score on testing set
    
    #Calculate confusion matrix
    y_true = test_labels.copy() #Ground truth
    y_pred = classifier.predict(test_set) #Predictions made by classifier
    proba = classifier.predict_proba(test_set)
    cls = classifier.classes_
#     print(f"shape proba is {proba.shape}\n examples {proba[:4]}")
#     print(f"sgapr classes {cls.shape}, examples {cls[:4]}")
#     print(f"shape test is {test_set.shape}\n examples {test_set[:4]}")
#     print(f"amount of classes in test set {len(np.unique(test_labels))}\n")
#     print(f"shape y_pred is {y_pred.shape}\n examples {y_pred[:4]}")
    #compute confusion matrix
    cm = confusion_matrix(y_true=y_true, y_pred=y_pred)
    #predicted class is the column nr, ground truth is the row nr
    #correct classifications can be found on the diagonal

    return accuracy, cm

### Train test split

In [30]:
train_set, test_set, train_labels, test_labels= train_test_split(
    data,          #data
    labels,        #targets
    test_size=0.2, #20% test set, 80% train set
    random_state=0 #for reproducable results of the random shuffling
)

In [31]:
#our training set looks like this
print(f"train set\n{train_set[:5]} \n\ntrain_labels \n{train_labels[:5]}")

train set
[[0.0000000e+00 2.7361902e-04 7.7595622e-03 ... 2.1438068e-03
  2.5144813e-04 7.6001310e-03]
 [1.9973461e-04 0.0000000e+00 0.0000000e+00 ... 1.1563284e-03
  2.9495624e-05 1.7721315e-03]
 [0.0000000e+00 3.9069530e-02 6.1618048e-03 ... 2.7245482e-02
  4.5345982e-06 1.0573921e-03]
 [3.0629247e-04 4.1899788e-03 6.3482468e-04 ... 1.3226266e-02
  0.0000000e+00 1.2904644e-03]
 [0.0000000e+00 3.7859593e-02 4.6287957e-03 ... 2.2931259e-02
  0.0000000e+00 2.4286965e-02]] 

train_labels 
['MayTeaFramboos1L' 'MonsterPipelinePunch500ml' 'AquariusOrangeFles'
 'MonsterUltra' 'PepsiMax']


In [32]:
#our test set looks like this
print(f"test set\n{test_set[:5]} \n\ntest_labels \n{test_labels[:5]}")

test set
[[0.         0.00618856 0.00811327 ... 0.01014707 0.00010826 0.01076958]
 [0.         0.01138464 0.01278724 ... 0.01439012 0.00010775 0.00108106]
 [0.         0.00894091 0.00302992 ... 0.01870816 0.         0.0043574 ]
 [0.00028309 0.01736233 0.0098097  ... 0.00242132 0.00014352 0.00211152]
 [0.         0.00975204 0.01961743 ... 0.00342917 0.00076789 0.01030275]] 

test_labels 
['InnocentOrangeWithBits' 'CapriSun' 'MonsterPunchEnergy500ml'
 'NaluOriginal6x350ml' 'StellaBlik50cl']


In [33]:
#Proportion of training data
prop_train = (len(train_set)/ len(embedding_gallery_norm) ) * 100
#Proportion of testing data
prop_test = (len(test_set)/ len(embedding_gallery_norm) ) * 100
print(f"Size of the training set {len(train_set)}\nThe training set contains {prop_train}% of the data\n")
print(f"Size of the test set {len(test_set)}\nThe test set contains {prop_test}% of the data")

Size of the training set 1314
The training set contains 79.97565429093122% of the data

Size of the test set 329
The test set contains 20.02434570906878% of the data


### Cross validation on the training set

#### Support Vector Machine (SVM)

In [34]:
#Create SVM estimator 
svm_test = SVC(kernel="poly", probability=True)

In [35]:
#Create StratifiedKFold generator
#5 folds with 1/5 test data and 4/5 training data to cross validate. 
#The folds have equal class distribution in this case
skf = StratifiedKFold(n_splits=5)

In [36]:
#Perform the cross validation
accuracy, std = cross_validate(svm_test, skf, train_set, train_labels)
print(f"Support Vector Machine (SVM) with StratifiedKFold:\nAccuracy: {accuracy*100:.1f}% \nstd_dev: {std}\n")



[0.62357414 0.64638783 0.63117871 0.63498099 0.65648855]
Support Vector Machine (SVM) with StratifiedKFold:
Accuracy: 63.9% 
std_dev: 0.011615242780547838



#### Multi Layer Perceptron (MLP)

In [37]:
#Create MLP estimator
mlp_test = MLPClassifier(
    hidden_layer_sizes=(128, 64),
    solver='lbfgs',
    max_iter=10_000,
    alpha=0.1
)

In [38]:
#Create StratifiedKFold generator
#5 folds with 1/5 test data and 4/5 training data to cross validate. 
#The folds have equal class distribution in this case
skf = StratifiedKFold(n_splits=5)

In [39]:
%%timeit -n 1 -r 1 #time cell excecution by running it only once
#Perform the cross validation
accuracy, std = cross_validate(mlp_test, skf, train_set, train_labels)
print(f"Multi Layer Perceptron (MLP) with StratifiedKFold:\nAccuracy: {accuracy*100:.1f}% \nstd_dev: {std}\n")



[0.55893536 0.63117871 0.59695817 0.58174905 0.57251908]
Multi Layer Perceptron (MLP) with StratifiedKFold:
Accuracy: 58.8% 
std_dev: 0.024770049273297186

4min 27s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


### Testing on the test set

#### Suport Vector Machine (SVM)

In [40]:
accuracy, cm = eval_performance(svm_test, train_set, train_labels, test_set, test_labels)
print(f"Support Vector Machine (SVM):\nAccuracy: {accuracy*100:.1f}% \nconfusion_matrix: \n{cm}\n")

Support Vector Machine (SVM):
Accuracy: 66.0% 
confusion_matrix: 
[[0 0 0 ... 0 0 0]
 [0 1 0 ... 0 0 0]
 [0 0 1 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 2 0]
 [0 0 0 ... 0 0 1]]



#### Multi Layer Perceptron (MLP)

In [41]:
%%timeit -n 1 -r 1 #time cell excecution by running it only once
accuracy, cm = eval_performance(mlp_test, train_set, train_labels, test_set, test_labels)
print(f"Multi Layer Perceptron:\nAccuracy: {accuracy*100:.1f}% \nconfusion_matrix: \n{cm}\n")

Multi Layer Perceptron:
Accuracy: 61.7% 
confusion_matrix: 
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 2 0]
 [0 0 0 ... 0 0 1]]

1min 4s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


## Strict Peformance Assessment 
In this section we will do the final test with a sticter test set.
This means that every embedding in the test set is from a different origin image than every embedding from the same class in the train set.
- Collect the split from a json file which aims to achieve a 80/20 train/test split.
- Cross validate on the strict training set.
- Fit on the strict training set.
- Test on the strict testing set.

### Train test split

In [42]:
json_file = Path("data/strict_train_test.json")
# Read the JSON file
with open(json_file, "r") as file:
    json_data = file.read()

# Convert JSON to dictionary
strict_train_test = json.loads(json_data)

# Access the dictionary
print(f"Amount of classes that can be used for strict testing: {len(strict_train_test.keys())} / {len(np.unique(labels))}")

Amount of classes that can be used for strict testing: 187 / 406


Lets set up the strict test and train set <br>
We have a copy of embedding gallery norm in `data` and the corresponding labels are in the variable `labels`. <br>
We have the indicis of embeddings in the gallery already determined in the json file, let's apply them to get our train and test set:

In [43]:
train_set_strict = []
train_labels_strict = [] 
test_set_strict = []
test_labels_strict = []
for cls in strict_train_test.keys():
    #Collect train queries
    train_queries = strict_train_test[cls]["train"]
    for query in train_queries:
        idx = query["gallery_idx"]
        train_query = data[idx]
        train_label = labels[idx]
        train_set_strict.append(train_query)
        train_labels_strict.append(train_label)
    
    #Collect test queries
    test_queries = strict_train_test[cls]["test"]
    for query in test_queries:
        idx = query["gallery_idx"]
        test_query = data[idx]
        test_label = labels[idx]
        test_set_strict.append(test_query)
        test_labels_strict.append(test_label)
        

In [44]:
train_set_strict = np.array(train_set_strict)
test_set_strict = np.array(test_set_strict)

In [45]:
#our training set looks like this
print(f"train set\n{train_set_strict[:5]} \n\ntrain_labels \n{train_labels_strict[:5]}")

train set
[[1.4262045e-03 4.2777877e-02 3.9714985e-03 ... 1.0115436e-02
  1.0793154e-04 4.8076797e-02]
 [5.0563511e-04 2.7824637e-02 5.0977902e-03 ... 1.0805956e-02
  0.0000000e+00 5.6268979e-02]
 [0.0000000e+00 4.1397799e-02 9.7977957e-03 ... 2.7379014e-02
  0.0000000e+00 1.1463530e-02]
 [0.0000000e+00 2.4206109e-02 4.5599015e-03 ... 8.7565649e-03
  2.8734694e-05 3.5576536e-03]
 [7.8416764e-05 4.2583842e-02 1.8381702e-02 ... 1.4155695e-02
  0.0000000e+00 1.0885181e-02]] 

train_labels 
['CarrefourSmoothieAardbeiBlauweBessen', 'CarrefourSmoothieAardbeiBlauweBessen', 'TropicanaSanguinello', 'TropicanaSanguinello', 'TropicanaSanguinello']


In [46]:
#our test set looks like this
print(f"test set\n{test_set_strict[:5]} \n\ntest_labels \n{test_labels_strict[:5]}")

test set
[[7.9559709e-04 1.1600911e-02 2.4392029e-02 ... 5.4686642e-03
  2.1046337e-05 2.8375454e-02]
 [3.2297758e-04 1.6206134e-02 2.0794634e-02 ... 3.4319926e-03
  0.0000000e+00 1.5911538e-02]
 [1.1258312e-03 3.2389425e-02 1.4648197e-02 ... 1.4577022e-02
  0.0000000e+00 1.0660346e-02]
 [7.5654367e-05 2.1227609e-02 2.2356678e-02 ... 8.3332304e-03
  4.4270296e-06 1.9667687e-02]
 [1.0755257e-03 6.9134068e-05 5.3168242e-03 ... 1.9123274e-03
  4.3884869e-04 1.8454135e-03]] 

test_labels 
['CarrefourSmoothieAardbeiBlauweBessen', 'CarrefourSmoothieAardbeiBlauweBessen', 'TropicanaSanguinello', 'TropicanaSanguinello', 'MonsterTheDoctor500ml']


In [47]:
#Proportion of training data
prop_train = (len(train_set_strict)/ (len(train_set_strict) + len(test_set_strict)) ) * 100
#Proportion of testing data
prop_test = (len(test_set_strict)/ (len(train_set_strict) + len(test_set_strict)) ) * 100
print(f"Size of the training set {len(train_set_strict)}\nThe training set contains {prop_train}% of the data\n")
print(f"Size of the test set {len(test_set_strict)}\nThe test set contains {prop_test}% of the data")

Size of the training set 852
The training set contains 74.08695652173914% of the data

Size of the test set 298
The test set contains 25.91304347826087% of the data


### Cross validation on the training set

#### Support Vector Machine (SVM)

In [48]:
#Create SVM estimator 
svm_test_strict = SVC(kernel="poly", probability=True)

In [49]:
#Create StratifiedKFold generator
#5 folds with 1/5 test data and 4/5 training data to cross validate. 
#The folds have equal class distribution in this case
skf = StratifiedKFold(n_splits=5)

In [50]:
#Perform the cross validation
accuracy, std = cross_validate(svm_test_strict, skf, train_set_strict, train_labels_strict)
print(f"Support Vector Machine (SVM) with StratifiedKFold:\nAccuracy: {accuracy*100:.1f}% \nstd_dev: {std}\n")



[0.76608187 0.77192982 0.77058824 0.75294118 0.63529412]
Support Vector Machine (SVM) with StratifiedKFold:
Accuracy: 73.9% 
std_dev: 0.0524675338390943



#### Multi Layer Perceptron (MLP)

In [51]:
#Create MLP estimator
mlp_test_strict = MLPClassifier(
    hidden_layer_sizes=(256, 64),
    solver='lbfgs',
    max_iter=10_000
)

In [52]:
#Create StratifiedKFold generator
#5 folds with 1/5 test data and 4/5 training data to cross validate. 
#The folds have equal class distribution in this case
skf = StratifiedKFold(n_splits=5)

In [53]:
#Perform the cross validation
accuracy, std = cross_validate(mlp_test_strict, skf, train_set_strict, train_labels_strict)
print(f"Multi Layer Perceptron (MLP) with StratifiedKFold:\nAccuracy: {accuracy*100:.1f}% \nstd_dev: {std}\n")



[0.5497076  0.63157895 0.58823529 0.56470588 0.44117647]
Multi Layer Perceptron (MLP) with StratifiedKFold:
Accuracy: 55.5% 
std_dev: 0.06331966006405428



### Testing on the strict test set

#### Support Vector Machine (SVM)

In [54]:
accuracy, cm = eval_performance(
    svm_test_strict, 
    train_set_strict, 
    train_labels_strict, 
    test_set_strict, 
    test_labels_strict
)
print(f"Support Vector Machine (SVM):\nAccuracy: {accuracy*100:.1f}% \nconfusion_matrix: \n{cm}\n")

Support Vector Machine (SVM):
Accuracy: 52.7% 
confusion_matrix: 
[[1 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 1 0 0]
 [0 0 0 ... 0 2 1]
 [0 0 0 ... 0 0 1]]



#### Multi Layer Perceptron (MLP)

In [55]:
accuracy, cm = eval_performance(
    mlp_test_strict, 
    train_set_strict, 
    train_labels_strict, 
    test_set_strict, 
    test_labels_strict
)
print(f"Multi Layer Perceptron:\nAccuracy: {accuracy*100:.1f}% \nconfusion_matrix: \n{cm}\n")

Multi Layer Perceptron:
Accuracy: 31.9% 
confusion_matrix: 
[[1 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 1 0 0]
 [0 0 0 ... 0 1 0]
 [0 0 0 ... 0 1 0]]

