# Cross Validation & Performance Evaluation

In [8]:
import torch
import numpy as np
import sklearn
import matplotlib.pyplot as plt  
import json
from pathlib import Path

In [9]:
%matplotlib inline

## Collecting data

In [112]:
#Models that are available for testing:
!ls data/

blacklist.txt		    moco32_phase0    rotnet_phase25
class_occurences.json	    moco32_phase25   rotnet_phase50
gallery_paths_relative.txt  moco32_phase50   rotnet_phase75
gallery_paths.txt	    moco32_phase75   simclr
imgnet_pretrained	    moco64	     simclr_phase0
jigsaw			    moco64_phase0    simclr_phase25
jigsaw_phase0		    moco64_phase25   simclr_phase50
jigsaw_phase100		    moco64_phase50   simclr_phase75
jigsaw_phase25		    moco64_phase75   strict_train_test.json
jigsaw_phase50		    rotnet	     total_cross_val_log_svm.txt
jigsaw_phase75		    rotnet_phase0
moco32			    rotnet_phase100


In [11]:
#options rotnet, jigsaw, simclr, moco32, imgnet_pretrained
model_name = "moco32" #<----------Specifiy model here!

dir = Path("data/" + model_name)

embedding_gallery = torch.load(dir / "embedding_gallery.torch")
embedding_gallery_norm = torch.load(dir / "embedding_gallery_norm.torch")
labels = list()
with open(dir / "embedding_gallery_labels.txt", "r") as f:
    labels = f.read().splitlines()
print(f"Data for model {model_name} succesfully read.\nembedding_gallery size: {embedding_gallery.shape}\
\nembedding_gallery_norm size: {embedding_gallery_norm.shape}\nlabels length: {len(labels)}")

Data for model moco32 succesfully read.
embedding_gallery size: torch.Size([1643, 2048])
embedding_gallery_norm size: torch.Size([1643, 2048])
labels length: 1643


In [12]:
#Convert to numpy arrays:
embedding_gallery = embedding_gallery.numpy()
embedding_gallery_norm = embedding_gallery_norm.numpy()
labels = np.array(labels)
print(f"embedding_gallery size: {embedding_gallery.shape} and type {type(embedding_gallery)}\
\nembedding_gallery_norm size: {embedding_gallery_norm.shape} and type {type(embedding_gallery_norm)}\
\nLabels with size {labels.shape} and type {type(labels)}")

embedding_gallery size: (1643, 2048) and type <class 'numpy.ndarray'>
embedding_gallery_norm size: (1643, 2048) and type <class 'numpy.ndarray'>
Labels with size (1643,) and type <class 'numpy.ndarray'>


## Modelling

In [13]:
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

Let's test the performance of a support vector machine (SVM) and a multi layer perceptron (MLP) on the entire data:

In [14]:
#Create SVM estimator 
svm_overfit = SVC(kernel="poly")

In [15]:
#Create MLP estimator
mlp_overfit = MLPClassifier(
    hidden_layer_sizes=(256, 64),
    solver='lbfgs',
    max_iter=10_000
)

In [16]:
#the dat to train/fit on 
data = embedding_gallery_norm.copy()
#labels are already in a numpy array called "labels"

Training/fitting on the data and labels:

In [17]:
#train SVM
svm_overfit.fit(data, labels)

SVC(kernel='poly')

In [18]:
#train MLP
mlp_overfit.fit(data, labels)

MLPClassifier(hidden_layer_sizes=(256, 64), max_iter=10000, solver='lbfgs')

Evaluate on the same data (this is overfitting, but should yield high results and verify that the data is correct)

In [19]:
#evaluate SVM
accuracy = svm_overfit.score(data, labels) 
print(f"Accuracy svm: {accuracy*100:.1f}%\n")

Accuracy svm: 99.9%



In [20]:
#evaluate MLP
accuracy = mlp_overfit.score(data, labels) 
print(f"Accuracy mlp: {accuracy*100:.1f}%\n")

Accuracy mlp: 100.0%



## cross validation

In [21]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold, StratifiedKFold, ShuffleSplit, StratifiedShuffleSplit

In [22]:
def cross_validate(classifier, fold, data, labels):
    """
    Performs a cross validation for the classifier with a train test split
    according to fold generator.
    
    Args:
        classifier: the classifier to cross validate.
        fold: the generator to use for train-test splits.
        data: the embedding gallery to train/test on.
        labels: the ground truth labels per row for the gallery.
        
    Returns:
        The mean accuracy score from the cross validation rounds and
        the standard deviation of the scores.
    """
    #cross validation generator
    cv = fold.split(data, y=labels)

    #calc cross validation
    scores = cross_val_score(
        classifier,   #the estimator/classifier
        data,         #data 
        y=labels,     #targets
        cv=cv         #generator for array indicis in data that select a certain split out of "data"
    )
    print(scores)
    accuracy = scores.mean()
    std = scores.std()
    return accuracy, std

Now let's create an svm and mlp that we don't overfit. We will test it's ability to generalize by performing cross validation.

In [23]:
#Create SVM estimator 
svm_cross = SVC(kernel="poly")

In [24]:
#Create MLP estimator
mlp_cross = MLPClassifier(
    hidden_layer_sizes=(256, 64),
    solver='lbfgs',
    max_iter=10_000
)

Create cross validation generators for train-test splits:

In [25]:
iterators = { 
    "KFold" : KFold(n_splits=5), 
    "StratifiedKFold" : StratifiedKFold(n_splits=5), 
    "ShuffleSplit" : ShuffleSplit(n_splits=5, test_size=0.2, random_state=0)
}

for k in iterators.keys():
    print(f"Generator/iterator used: {k}")
    iterator = iterators[k]
    for i, (train_index, test_index) in enumerate(iterator.split(data, y=labels)):
        print(f"Fold {i}:")
        print(f"  Train: index[:5] {train_index[:5]} with shape{train_index.shape}")
        print(f"  Test:  index[:5] {test_index[:5]} with shape{test_index.shape}")
    print()

Generator/iterator used: KFold
Fold 0:
  Train: index[:5] [329 330 331 332 333] with shape(1314,)
  Test:  index[:5] [0 1 2 3 4] with shape(329,)
Fold 1:
  Train: index[:5] [0 1 2 3 4] with shape(1314,)
  Test:  index[:5] [329 330 331 332 333] with shape(329,)
Fold 2:
  Train: index[:5] [0 1 2 3 4] with shape(1314,)
  Test:  index[:5] [658 659 660 661 662] with shape(329,)
Fold 3:
  Train: index[:5] [0 1 2 3 4] with shape(1315,)
  Test:  index[:5] [987 988 989 990 991] with shape(328,)
Fold 4:
  Train: index[:5] [0 1 2 3 4] with shape(1315,)
  Test:  index[:5] [1315 1316 1317 1318 1319] with shape(328,)

Generator/iterator used: StratifiedKFold
Fold 0:
  Train: index[:5] [1 2 3 5 6] with shape(1314,)
  Test:  index[:5] [ 0  4  8 11 20] with shape(329,)
Fold 1:
  Train: index[:5] [0 2 3 4 6] with shape(1314,)
  Test:  index[:5] [ 1  5 12 16 22] with shape(329,)
Fold 2:
  Train: index[:5] [0 1 3 4 5] with shape(1314,)
  Test:  index[:5] [ 2  6 13 17 23] with shape(329,)
Fold 3:
  Train: 



In [26]:
#Create KFold generator
#5 folds with 1/5 test data and 4/5 training data to cross validate
kf = KFold(n_splits=5)

In [27]:
#Create StratifiedKFold generator
#5 folds with 1/5 test data and 4/5 training data to cross validate. 
#The folds have equal class distribution in this case
skf = StratifiedKFold(n_splits=5)

In [28]:
#Create ShuffleSplit generator
#samples are shuffled and then split up in a test and train set
ss = ShuffleSplit(n_splits=5, test_size=0.2, random_state=0)

Perform the cross validations on svm and mlp

### SVM cross validation

In [30]:
#with KFold
accuracy, std = cross_validate(svm_cross, kf, data, labels)
print(f"Support Vector Machine (SVM) with KFold:\nAccuracy: {accuracy*100:.1f}% \nstd_dev: {std}\n")

[0.         0.         0.01823708 0.01829268 0.        ]
Support Vector Machine (SVM) with KFold:
Accuracy: 0.7% 
std_dev: 0.008947945740517962



In [31]:
#with StratifiedKFold
accuracy, std = cross_validate(svm_cross, skf, data, labels)
print(f"Support Vector Machine (SVM) with StratifiedKFold:\nAccuracy: {accuracy*100:.1f}% \nstd_dev: {std}\n")



[0.68085106 0.68389058 0.65957447 0.69817073 0.66158537]
Support Vector Machine (SVM) with StratifiedKFold:
Accuracy: 67.7% 
std_dev: 0.014502403803775064



In [32]:
#with ShuffleSplit
accuracy, std = cross_validate(svm_cross, ss, data, labels)
print(f"Support Vector Machine (SVM) with ShuffleSplit:\nAccuracy: {accuracy*100:.1f}% \nstd_dev: {std}\n")

[0.63829787 0.59878419 0.66869301 0.61702128 0.66261398]
Support Vector Machine (SVM) with ShuffleSplit:
Accuracy: 63.7% 
std_dev: 0.026539671675310714



### MLP cross validation

In [33]:
#with KFold
accuracy, std = cross_validate(mlp_cross, kf, data, labels)
print(f"Multi Layer Perceptron (MLP) with KFold:\nAccuracy: {accuracy*100:.1f}% \nstd_dev: {std}\n")

[0.         0.         0.01519757 0.01829268 0.00609756]
Multi Layer Perceptron (MLP) with KFold:
Accuracy: 0.8% 
std_dev: 0.007606965321960141



In [34]:
#with StratifiedKFold
accuracy, std = cross_validate(mlp_cross, skf, data, labels)
print(f"Multi Layer Perceptron (MLP) with StratifiedKFold:\nAccuracy: {accuracy*100:.1f}% \nstd_dev: {std}\n")



[0.50455927 0.50151976 0.49240122 0.53353659 0.5       ]
Multi Layer Perceptron (MLP) with StratifiedKFold:
Accuracy: 50.6% 
std_dev: 0.014145849044475375



In [35]:
#with ShuffleSplit
accuracy, std = cross_validate(mlp_cross, ss, data, labels)
print(f"Multi Layer Perceptron (MLP) with ShuffleSplit:\nAccuracy: {accuracy*100:.1f}% \nstd_dev: {std}\n")

[0.49544073 0.43465046 0.46808511 0.44984802 0.49848024]
Multi Layer Perceptron (MLP) with ShuffleSplit:
Accuracy: 46.9% 
std_dev: 0.024961051875559654



## Performance Assessment 
In this section we will do the final test.
- Split up the data from cornershop in train (80%) and test (20%) data.
- Cross validate on the training set.
- Fit on the training set.
- Test on the testing set.

In [36]:
from sklearn.metrics import confusion_matrix

In [133]:
def eval_performance(classifier, train_set, train_labels, test_set, test_labels):
    """
    Evaluate performance for the given estimator/classifier.
    Performance is measured by:
        -Training on the train_set and train_labels
        -Measuring accuracy for classification on the test_set with the test_labels
        -Calculate a confusion matrix
    
    Args:
        classifier : the classifier to evaluate.
        train_set (np.ndarray): the data to train on.
        train_labels (np.ndarray): ground truth for training data.
        test_set (np.ndarray): the data to test on.
        test_labels (np.ndarray): ground truth for testing data.
        
    Returns:
        accuracy : accuracy score from predictions on test set.
        cm: The confusion matrix.
    """
    #train on training set
    classifier.fit(train_set, train_labels) #retrain on training set
    
    #Calculate accuracy
    accuracy = classifier.score(test_set, test_labels) #calc acc score on testing set
    
    #Calculate confusion matrix
    y_true = test_labels.copy() #Ground truth
    y_pred = classifier.predict(test_set) #Predictions made by classifier
    proba = classifier.predict_proba(test_set)
    cls = classifier.classes_
    print(f"shape proba is {proba.shape}\n examples {proba[:4]}")
    print(f"sgapr classes {cls.shape}, examples {cls[:4]}")
    print(f"shape test is {test_set.shape}\n examples {test_set[:4]}")
    print(f"amount of classes in test set {len(np.unique(test_labels))}\n")
    print(f"shape y_pred is {y_pred.shape}\n examples {y_pred[:4]}")
    #compute confusion matrix
    cm = confusion_matrix(y_true=y_true, y_pred=y_pred)
    #predicted class is the column nr, ground truth is the row nr
    #correct classifications can be found on the diagonal

    return accuracy, cm

### Train test split

In [38]:
train_set, test_set, train_labels, test_labels= train_test_split(
    data,          #data
    labels,        #targets
    test_size=0.2, #20% test set, 80% train set
    random_state=0 #for reproducable results of the random shuffling
)

In [39]:
#our training set looks like this
print(f"train set\n{train_set[:5]} \n\ntrain_labels \n{train_labels[:5]}")

train set
[[0.00321136 0.00841789 0.05054085 ... 0.03530409 0.00102438 0.00746608]
 [0.01080993 0.00357842 0.01950858 ... 0.0003655  0.06982455 0.01050694]
 [0.00248981 0.01997812 0.01807643 ... 0.00262832 0.00243357 0.03383429]
 [0.0145637  0.01752637 0.07955744 ... 0.0202774  0.02037295 0.00243272]
 [0.         0.00439621 0.06531136 ... 0.01296624 0.00114474 0.01572978]] 

train_labels 
['MayTeaFramboos1L' 'MonsterPipelinePunch500ml' 'AquariusOrangeFles'
 'MonsterUltra' 'PepsiMax']


In [40]:
#our test set looks like this
print(f"test set\n{test_set[:5]} \n\ntest_labels \n{test_labels[:5]}")

test set
[[0.0005955  0.00680962 0.05895904 ... 0.00571111 0.00180996 0.02855435]
 [0.00060103 0.00371083 0.035034   ... 0.00371111 0.00138724 0.        ]
 [0.00717704 0.04115137 0.00825775 ... 0.00291343 0.00351391 0.00569196]
 [0.00124589 0.00510044 0.0362666  ... 0.03395296 0.00128892 0.02288807]
 [0.00989329 0.00909252 0.0198362  ... 0.00070526 0.00130978 0.04417973]] 

test_labels 
['InnocentOrangeWithBits' 'CapriSun' 'MonsterPunchEnergy500ml'
 'NaluOriginal6x350ml' 'StellaBlik50cl']


In [41]:
#Proportion of training data
prop_train = (len(train_set)/ len(embedding_gallery_norm) ) * 100
#Proportion of testing data
prop_test = (len(test_set)/ len(embedding_gallery_norm) ) * 100
print(f"Size of the training set {len(train_set)}\nThe training set contains {prop_train}% of the data\n")
print(f"Size of the test set {len(test_set)}\nThe test set contains {prop_test}% of the data")

Size of the training set 1314
The training set contains 79.97565429093122% of the data

Size of the test set 329
The test set contains 20.02434570906878% of the data


### Cross validation on the training set

#### Support Vector Machine (SVM)

In [120]:
#Create SVM estimator 
svm_test = SVC(kernel="poly", probability=True)

In [121]:
#Create StratifiedKFold generator
#5 folds with 1/5 test data and 4/5 training data to cross validate. 
#The folds have equal class distribution in this case
skf = StratifiedKFold(n_splits=5)

In [122]:
#Perform the cross validation
accuracy, std = cross_validate(svm_test, skf, train_set, train_labels)
print(f"Support Vector Machine (SVM) with StratifiedKFold:\nAccuracy: {accuracy*100:.1f}% \nstd_dev: {std}\n")



[0.58174905 0.65019011 0.61977186 0.62737643 0.63358779]
Support Vector Machine (SVM) with StratifiedKFold:
Accuracy: 62.3% 
std_dev: 0.02272240089381588



#### Multi Layer Perceptron (MLP)

In [45]:
#Create MLP estimator
mlp_test = MLPClassifier(
    hidden_layer_sizes=(256, 64),
    solver='lbfgs',
    max_iter=10_000
)

In [46]:
#Create StratifiedKFold generator
#5 folds with 1/5 test data and 4/5 training data to cross validate. 
#The folds have equal class distribution in this case
skf = StratifiedKFold(n_splits=5)

In [125]:
#Perform the cross validation
accuracy, std = cross_validate(mlp_test, skf, train_set, train_labels)
print(f"Multi Layer Perceptron (MLP) with StratifiedKFold:\nAccuracy: {accuracy*100:.1f}% \nstd_dev: {std}\n")



KeyboardInterrupt: 

### Testing on the test set

#### Suport Vector Machine (SVM)

In [134]:
accuracy, cm = eval_performance(svm_test, train_set, train_labels, test_set, test_labels)
print(f"Support Vector Machine (SVM):\nAccuracy: {accuracy*100:.1f}% \nconfusion_matrix: \n{cm}\n")

shape proba is (329, 384)
 examples [[0.00376352 0.00046783 0.00797592 ... 0.00160062 0.00225195 0.00304922]
 [0.00303699 0.00033909 0.01090303 ... 0.00249465 0.00233734 0.00343747]
 [0.00169179 0.00045987 0.0063473  ... 0.0043278  0.00242541 0.00285054]
 [0.00261415 0.00041635 0.00864764 ... 0.00185096 0.00236703 0.00343135]]
sgapr classes (384,), examples ['7upFree' '7upLemon' '7upMojito' 'AADrink']
shape test is (329, 2048)
 examples [[0.0005955  0.00680962 0.05895904 ... 0.00571111 0.00180996 0.02855435]
 [0.00060103 0.00371083 0.035034   ... 0.00371111 0.00138724 0.        ]
 [0.00717704 0.04115137 0.00825775 ... 0.00291343 0.00351391 0.00569196]
 [0.00124589 0.00510044 0.0362666  ... 0.03395296 0.00128892 0.02288807]]
amount of classes in test set 194

shape y_pred is (329,)
 examples ['InnocentOrangeWithBits' 'CapriSun' 'MonsterPunchEnergy500ml'
 'NaluOriginal6x350ml']
Support Vector Machine (SVM):
Accuracy: 63.8% 
confusion_matrix: 
[[0 0 0 ... 0 0 0]
 [0 1 0 ... 0 0 0]
 [0 0 0

#### Multi Layer Perceptron (MLP)

In [49]:
accuracy, cm = eval_performance(mlp_test, train_set, train_labels, test_set, test_labels)
print(f"Multi Layer Perceptron:\nAccuracy: {accuracy*100:.1f}% \nconfusion_matrix: \n{cm}\n")

Multi Layer Perceptron:
Accuracy: 49.8% 
confusion_matrix: 
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 1 0]
 [0 0 0 ... 0 1 0]]



## Strict Peformance Assessment 
In this section we will do the final test with a sticter test set.
This means that every embedding in the test set is from a different origin image than every embedding from the same class in the train set.
- Collect the split from a json file which aims to achieve a 80/20 train/test split.
- Cross validate on the strict training set.
- Fit on the strict training set.
- Test on the strict testing set.

### Train test split

In [77]:
json_file = Path("data/strict_train_test.json")
# Read the JSON file
with open(json_file, "r") as file:
    json_data = file.read()

# Convert JSON to dictionary
strict_train_test = json.loads(json_data)

# Access the dictionary
print(f"Amount of classes that can be used for strict testing: {len(strict_train_test.keys())} / {len(np.unique(labels))}")

Amount of classes that can be used for strict testing: 187 / 406


Lets set up the strict test and train set <br>
We have a copy of embedding gallery norm in `data` and the corresponding labels are in the variable `labels`. <br>
We have the indicis of embeddings in the gallery already determined in the json file, let's apply them to get our train and test set:

In [79]:
train_set_strict = []
train_labels_strict = [] 
test_set_strict = []
test_labels_strict = []
for cls in strict_train_test.keys():
    #Collect train queries
    train_queries = strict_train_test[cls]["train"]
    for query in train_queries:
        idx = query["gallery_idx"]
        train_query = data[idx]
        train_label = labels[idx]
        train_set_strict.append(train_query)
        train_labels_strict.append(train_label)
    
    #Collect test queries
    test_queries = strict_train_test[cls]["test"]
    for query in test_queries:
        idx = query["gallery_idx"]
        test_query = data[idx]
        test_label = labels[idx]
        test_set_strict.append(test_query)
        test_labels_strict.append(test_label)
        

In [80]:
train_set_strict = np.array(train_set_strict)
test_set_strict = np.array(test_set_strict)

In [81]:
#our training set looks like this
print(f"train set\n{train_set_strict[:5]} \n\ntrain_labels \n{train_labels_strict[:5]}")

train set
[[6.9233747e-03 2.2412590e-03 5.3472899e-02 ... 5.9745912e-03
  1.8873550e-03 2.1909457e-02]
 [2.2041237e-03 6.8761426e-04 4.1907985e-02 ... 2.2204514e-03
  2.4847253e-03 1.9992612e-02]
 [6.3734045e-03 6.6076760e-04 3.6449816e-02 ... 5.0076423e-03
  3.7306603e-03 3.5878073e-03]
 [4.5877248e-03 2.6991626e-03 4.3812297e-02 ... 7.1394523e-03
  6.0418011e-03 3.2635011e-02]
 [7.0087109e-03 8.5940241e-04 7.4370176e-02 ... 1.4216707e-04
  1.4606867e-05 1.2442449e-02]] 

train_labels 
['CarrefourSmoothieAardbeiBlauweBessen', 'CarrefourSmoothieAardbeiBlauweBessen', 'TropicanaSanguinello', 'TropicanaSanguinello', 'TropicanaSanguinello']


In [82]:
#our test set looks like this
print(f"test set\n{test_set_strict[:5]} \n\ntest_labels \n{test_labels_strict[:5]}")

test set
[[6.3076401e-03 9.9129621e-03 4.2072132e-02 ... 7.4273129e-03
  3.3059330e-03 1.3093212e-02]
 [2.1028033e-02 3.6752366e-03 5.0158482e-02 ... 1.2454252e-02
  4.8663453e-03 2.7165789e-02]
 [2.3179527e-03 5.5524646e-03 4.0926971e-02 ... 1.9700017e-02
  2.3614909e-02 1.4198274e-02]
 [1.5844429e-03 6.2769912e-05 9.0044715e-02 ... 3.0723400e-02
  3.1709992e-03 3.5455194e-03]
 [7.7588315e-04 3.6051134e-03 3.7311916e-03 ... 7.6465523e-03
  5.2548744e-02 9.1698179e-03]] 

test_labels 
['CarrefourSmoothieAardbeiBlauweBessen', 'CarrefourSmoothieAardbeiBlauweBessen', 'TropicanaSanguinello', 'TropicanaSanguinello', 'MonsterTheDoctor500ml']


In [83]:
#Proportion of training data
prop_train = (len(train_set_strict)/ (len(train_set_strict) + len(test_set_strict)) ) * 100
#Proportion of testing data
prop_test = (len(test_set_strict)/ (len(train_set_strict) + len(test_set_strict)) ) * 100
print(f"Size of the training set {len(train_set_strict)}\nThe training set contains {prop_train}% of the data\n")
print(f"Size of the test set {len(test_set_strict)}\nThe test set contains {prop_test}% of the data")

Size of the training set 852
The training set contains 74.08695652173914% of the data

Size of the test set 298
The test set contains 25.91304347826087% of the data


### Cross validation on the training set

#### Support Vector Machine (SVM)

In [85]:
#Create SVM estimator 
svm_test_strict = SVC(kernel="poly")

In [86]:
#Create StratifiedKFold generator
#5 folds with 1/5 test data and 4/5 training data to cross validate. 
#The folds have equal class distribution in this case
skf = StratifiedKFold(n_splits=5)

In [87]:
#Perform the cross validation
accuracy, std = cross_validate(svm_test_strict, skf, train_set_strict, train_labels_strict)
print(f"Support Vector Machine (SVM) with StratifiedKFold:\nAccuracy: {accuracy*100:.1f}% \nstd_dev: {std}\n")



[0.75438596 0.73684211 0.75294118 0.75882353 0.63529412]
Support Vector Machine (SVM) with StratifiedKFold:
Accuracy: 72.8% 
std_dev: 0.04677680599436755



#### Multi Layer Perceptron (MLP)

In [92]:
#Create MLP estimator
mlp_test_strict = MLPClassifier(
    hidden_layer_sizes=(256, 64),
    solver='lbfgs',
    max_iter=10_000
)

In [93]:
#Create StratifiedKFold generator
#5 folds with 1/5 test data and 4/5 training data to cross validate. 
#The folds have equal class distribution in this case
skf = StratifiedKFold(n_splits=5)

In [94]:
#Perform the cross validation
accuracy, std = cross_validate(mlp_test_strict, skf, train_set_strict, train_labels_strict)
print(f"Multi Layer Perceptron (MLP) with StratifiedKFold:\nAccuracy: {accuracy*100:.1f}% \nstd_dev: {std}\n")



[0.5380117  0.59064327 0.61176471 0.58235294 0.42941176]
Multi Layer Perceptron (MLP) with StratifiedKFold:
Accuracy: 55.0% 
std_dev: 0.06511183472661425



### Testing on the strict test set

#### Support Vector Machine (SVM)

In [96]:
accuracy, cm = eval_performance(
    svm_test_strict, 
    train_set_strict, 
    train_labels_strict, 
    test_set_strict, 
    test_labels_strict
)
print(f"Support Vector Machine (SVM):\nAccuracy: {accuracy*100:.1f}% \nconfusion_matrix: \n{cm}\n")

Support Vector Machine (SVM):
Accuracy: 50.7% 
confusion_matrix: 
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 1 0 0]
 [0 0 0 ... 0 2 1]
 [0 0 0 ... 0 0 1]]



#### Multi Layer Perceptron (MLP)

In [97]:
accuracy, cm = eval_performance(
    mlp_test_strict, 
    train_set_strict, 
    train_labels_strict, 
    test_set_strict, 
    test_labels_strict
)
print(f"Multi Layer Perceptron:\nAccuracy: {accuracy*100:.1f}% \nconfusion_matrix: \n{cm}\n")

Multi Layer Perceptron:
Accuracy: 30.2% 
confusion_matrix: 
[[1 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 2 1]
 [0 0 0 ... 0 0 1]]

