In [1]:
# Test the several architecture of ProtoNN versus the K-NN algorithm

# import ProtoNN code
import protoNN as protograph
import protoNNTrainer as prototrainer
import utils as utils
# import general modules
import numpy as np
import torch
import math
from sklearn.model_selection import StratifiedKFold
from sklearn.neighbors import KNeighborsClassifier
import gc

# need to change the dataset location
X, Y = utils.handleUSPS('../../../data_set/usps.h5')

dataset = utils.CustomDatasetProtoNN(X, Y, rescaling=True, binary=True)

  from .autonotebook import tqdm as notebook_tqdm


Dataset input rescaled
Dataset loaded, input space: (7291, 256) , output space: (7291, 1)
Dataset reshaped, input space: (7291, 256) , output space: (7291, 2)


In [2]:
# Stratified k fold
k_folds = 5 # default 5 or 10
stratifiedk_fold = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=1)
knn = KNeighborsClassifier(n_neighbors = 13)
print("Stratified KFOLD INITIALIZED")

Stratified KFOLD INITIALIZED


In [3]:
# fixed ProtoNN HYPERPARAMETERS
# regularizer of W, B and Z: HYPERPARAMETER
regW, regB, regZ = 0, 0, 0

# sparsity of W, B and Z: HYPERPARAMETER, CONSTRAINTS OF MEMORY
sparcityW, sparcityB, sparcityZ = 1, 1, 1

# initial learning rate of the OPTIMIZER: HYPERPARAMETER
learningRate = 0.1
# lossType: 'l2'
lossType= 'l2'

# TRAINING PHASE
# batchsize: HAS TO BE DEFINED
batchsize = 100
# epochs: HAS TO BE DEFINED
epochs = 100
# printStep: HAS TO BE DEFINED
printStep = 100
# valStep: HAS TO BE DEFINED
valStep = 100

# print virables of ProtoNN training
print("\nbatch size:\t", batchsize,
      "\nepochs:\t\t", epochs,
      "\nprint step:\t", printStep,
      "\nevaluation step:", valStep)


batch size:	 100 
epochs:		 100 
print step:	 100 
evaluation step: 100


In [4]:
# FOR SMALL SIZE
size = [2, 4, 8]

# For fold results
test_case = len(size)
results = np.zeros([k_folds, test_case])
structure = np.zeros([test_case, 2])

# Start print
print('--------------------------------')
# K-fold Cross Validation model evaluation
for fold, (train_ids, test_ids) in enumerate(stratifiedk_fold.split(dataset.data, Y)): # give y is just for compatibility in KFOLD

    # Print
    print(f'FOLD {fold}')
    print('--------------------------------')
    
    # Sample elements randomly from a given list of ids, no replacement.
    train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
    test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)
    
    # Define data loaders for training and testing data in this fold
    trainloader = torch.utils.data.DataLoader(
                      dataset,
                      batch_size=train_ids.shape[0],
                      sampler=train_subsampler)
    testloader = torch.utils.data.DataLoader(
                      dataset,
                      batch_size=test_ids.shape[0],
                      sampler=test_subsampler)

    train_inputs, train_targets = next(iter(trainloader))
    test_inputs, test_targets = next(iter(testloader))
    
    # SET FIXED DIMENSION
    input_dimension = train_inputs.shape[1]
    numOutputLabels = dataset.numClasses
    
    index_result = 0
    for index_size, size_limit in enumerate(size):
        
        # projection_dimension: HYPERPARAMETER
        projection_dimension = math.trunc(utils.max_projection_dimensions(size[index_size], numOutputLabels, input_dimension, numOutputLabels))
        # numPrototypes: HYPERPARAMETR
        numPrototypes = math.trunc(utils.max_number_prototypes(size[index_size], projection_dimension, input_dimension, numOutputLabels))
        structure[index_size][0] = numPrototypes
        structure[index_size][1] = projection_dimension
        
        # print structure of ProtoNN
        print("num prototypes:\t", numPrototypes,
              "\nporjection dim:\t", projection_dimension)
        
        # initialize W as random - to use random seed fixed for testing
        W = np.random.rand(input_dimension, projection_dimension) 
        
        # initialize protoNN
        protoNNmodel = protograph.ProtoNN(input_dimension, projection_dimension,
                                     numPrototypes, numOutputLabels,
                                     gamma=0.05, W=W)
        protoNNmodel.initializePrototypesAndGamma(train_inputs, train_targets,
                                     input_W=W, overrideGamma=True)
        protoNNtrainer = prototrainer.ProtoNNTrainer(protoNNmodel,
                                     sparcityW, sparcityB, sparcityZ,
                                     regW, regB, regZ,
                                     learningRate,
                                     lossType)
        print("Model size:", protoNNtrainer.getModelSize(), "KB")
        current_size = protoNNtrainer.getModelSize()
        result_dic = protoNNtrainer.train(
                              train_inputs.float(),
                              test_inputs.float(),
                              train_targets.float(),
                              test_targets.float(),
                              batchsize,
                              epochs,
                              printStep,
                              valStep,
                              verbose=False, history=False)
        correct = result_dic.get('correctPredictions').item()
        total  = result_dic.get('totalPredictions')
        accuracy = 100 * correct / total

        # Print accuracy
        print('Accuracy for fold %d with m = %f,d = %d: %d %%' % (fold, numPrototypes, projection_dimension, accuracy))
        results[fold][index_result] = accuracy
        index_result+=1
        del protoNNmodel
        del protoNNtrainer
        gc.collect()

    print('--------------------------------')

# Print fold results
print(f'K-FOLD CROSS VALIDATION RESULTS FOR {k_folds}')
print('--------------------------------')
results = np.sum(results, axis = 0) / k_folds
print("Average Accuracy")
for i in range (0, test_case):
    print("ProtoNN with size", size[0], "KB, prototype", structure[i][0], ", prj dim", structure[i][1],", Accuracy: \t:", results[i])

--------------------------------
FOLD 0
--------------------------------
num prototypes:	 81 
porjection dim:	 1
Model size: 1.996 KB
Accuracy for fold 0 with m = 81.000000,d = 1: 99 %
num prototypes:	 46 
porjection dim:	 3
Model size: 3.992 KB
Accuracy for fold 0 with m = 46.000000,d = 3: 98 %
num prototypes:	 23 
porjection dim:	 7
Model size: 7.996 KB
Accuracy for fold 0 with m = 23.000000,d = 7: 98 %
--------------------------------
FOLD 1
--------------------------------
num prototypes:	 81 
porjection dim:	 1
Model size: 1.996 KB
Accuracy for fold 1 with m = 81.000000,d = 1: 98 %
num prototypes:	 46 
porjection dim:	 3
Model size: 3.992 KB
Accuracy for fold 1 with m = 46.000000,d = 3: 98 %
num prototypes:	 23 
porjection dim:	 7
Model size: 7.996 KB
Accuracy for fold 1 with m = 23.000000,d = 7: 98 %
--------------------------------
FOLD 2
--------------------------------
num prototypes:	 81 
porjection dim:	 1
Model size: 1.996 KB
Accuracy for fold 2 with m = 81.000000,d = 1: 98

In [5]:
# FOR 16 KB
size = [16]
size_parameters = [[15, 9], [10, 120], [10, 60], [12, 60]] # dim and proto
test_case = 0;
for e in size_parameters:
    test_case += 1

# For fold results
results = np.zeros([k_folds, test_case])
structure = np.zeros([test_case, 2])
# structure [0]: numProto, [1]: prjdim

# Start print
print('--------------------------------')
# K-fold Cross Validation model evaluation
for fold, (train_ids, test_ids) in enumerate(stratifiedk_fold.split(dataset.data, Y)): # give y is just for compatibility in KFOLD

    # Print
    print(f'FOLD {fold}')
    print('--------------------------------')
    
    # Sample elements randomly from a given list of ids, no replacement.
    train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
    test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)
    
    # Define data loaders for training and testing data in this fold
    trainloader = torch.utils.data.DataLoader(
                      dataset,
                      batch_size=train_ids.shape[0],
                      sampler=train_subsampler)
    testloader = torch.utils.data.DataLoader(
                      dataset,
                      batch_size=test_ids.shape[0],
                      sampler=test_subsampler)

    train_inputs, train_targets = next(iter(trainloader))
    test_inputs, test_targets = next(iter(testloader))

    # SET FIXED DIMENSION
    input_dimension = train_inputs.shape[1]
    numOutputLabels = dataset.numClasses
    
    index_result = 0
    for index_size, couple_elem in enumerate(size_parameters):
        # projection_dimension: HYPERPARAMETER
        projection_dimension = couple_elem[0]
        # numPrototypes: HYPERPARAMETR
        numPrototypes = couple_elem[1]
        
        structure[index_size][0] = numPrototypes
        structure[index_size][1] = projection_dimension
        
        # print structure of ProtoNN
        print("num prototypes:\t", numPrototypes,
              "\nporjection dim:\t", projection_dimension)
        
        # initialize W as random - to use random seed fixed for testing
        W = np.random.rand(input_dimension, projection_dimension) 
        
        # initialize protoNN
        protoNNmodel = protograph.ProtoNN(input_dimension, projection_dimension,
                                     numPrototypes, numOutputLabels,
                                     gamma=0.05, W=W)
        protoNNmodel.initializePrototypesAndGamma(train_inputs, train_targets,
                                     input_W=W, overrideGamma=True)
        protoNNtrainer = prototrainer.ProtoNNTrainer(protoNNmodel,
                                     sparcityW, sparcityB, sparcityZ,
                                     regW, regB, regZ,
                                     learningRate,
                                     lossType)
        print("Model size:", protoNNtrainer.getModelSize(), "KB")
        current_size = protoNNtrainer.getModelSize()
        result_dic = protoNNtrainer.train(
                              train_inputs.float(),
                              test_inputs.float(),
                              train_targets.float(),
                              test_targets.float(),
                              batchsize,
                              epochs,
                              printStep,
                              valStep,
                              verbose=False, history=False)
        correct = result_dic.get('correctPredictions').item()
        total  = result_dic.get('totalPredictions')
        accuracy = 100 * correct / total
        
        # Print accuracy
        print('Accuracy for fold %d with m = %f,d = %d: %d %%' % (fold, numPrototypes, projection_dimension, accuracy))
        results[fold][index_result] = accuracy
        index_result+=1
        del protoNNmodel
        del protoNNtrainer
        gc.collect()

    print('--------------------------------')

# Print fold results
print(f'K-FOLD CROSS VALIDATION RESULTS FOR {k_folds}')
print('--------------------------------')
results = np.sum(results, axis = 0) / k_folds
print("Average Accuracy")
for i in range (0, test_case):
    print("ProtoNN with size", size[0], "KB, prototype", structure[i][0], ", prj dim", structure[i][1],", Accuracy: \t:", results[i])

--------------------------------
FOLD 0
--------------------------------
num prototypes:	 9 
porjection dim:	 15
Model size: 15.972 KB
Accuracy for fold 0 with m = 9.000000,d = 15: 98 %
num prototypes:	 120 
porjection dim:	 10
Model size: 16.0 KB
Accuracy for fold 0 with m = 120.000000,d = 10: 99 %
num prototypes:	 60 
porjection dim:	 10
Model size: 13.12 KB
Accuracy for fold 0 with m = 60.000000,d = 10: 99 %
num prototypes:	 60 
porjection dim:	 12
Model size: 15.648 KB
Accuracy for fold 0 with m = 60.000000,d = 12: 99 %
--------------------------------
FOLD 1
--------------------------------
num prototypes:	 9 
porjection dim:	 15
Model size: 15.972 KB
Accuracy for fold 1 with m = 9.000000,d = 15: 99 %
num prototypes:	 120 
porjection dim:	 10
Model size: 16.0 KB
Accuracy for fold 1 with m = 120.000000,d = 10: 99 %
num prototypes:	 60 
porjection dim:	 10
Model size: 13.12 KB
Accuracy for fold 1 with m = 60.000000,d = 10: 99 %
num prototypes:	 60 
porjection dim:	 12
Model size: 15

In [6]:
# FOR 32 KB
size = [32]
size_parameters = [[25, 59], [15, 240], [15, 120], [15, 50], [10, 450]] # dim and proto
test_case = 0;
for e in size_parameters:
    test_case += 1

# For fold results
results = np.zeros([k_folds, test_case])
structure = np.zeros([test_case, 2])
# structure [0]: numProto, [1]: prjdim

# Start print
print('--------------------------------')
# K-fold Cross Validation model evaluation
for fold, (train_ids, test_ids) in enumerate(stratifiedk_fold.split(dataset.data, Y)): # give y is just for compatibility in KFOLD

    # Print
    print(f'FOLD {fold}')
    print('--------------------------------')
    
    # Sample elements randomly from a given list of ids, no replacement.
    train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
    test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)
    
    # Define data loaders for training and testing data in this fold
    trainloader = torch.utils.data.DataLoader(
                      dataset,
                      batch_size=train_ids.shape[0],
                      sampler=train_subsampler)
    testloader = torch.utils.data.DataLoader(
                      dataset,
                      batch_size=test_ids.shape[0],
                      sampler=test_subsampler)

    train_inputs, train_targets = next(iter(trainloader))
    test_inputs, test_targets = next(iter(testloader))

    # SET FIXED DIMENSION
    input_dimension = train_inputs.shape[1]
    numOutputLabels = dataset.numClasses
    
    index_result = 0
    for index_size, couple_elem in enumerate(size_parameters):
        # projection_dimension: HYPERPARAMETER
        projection_dimension = couple_elem[0]
        # numPrototypes: HYPERPARAMETR
        numPrototypes = couple_elem[1]
        
        structure[index_size][0] = numPrototypes
        structure[index_size][1] = projection_dimension
        
        # print structure of ProtoNN
        print("num prototypes:\t", numPrototypes,
              "\nporjection dim:\t", projection_dimension)
        
        # initialize W as random - to use random seed fixed for testing
        W = np.random.rand(input_dimension, projection_dimension) 
        
        # initialize protoNN
        protoNNmodel = protograph.ProtoNN(input_dimension, projection_dimension,
                                     numPrototypes, numOutputLabels,
                                     gamma=0.05, W=W)
        protoNNmodel.initializePrototypesAndGamma(train_inputs, train_targets,
                                     input_W=W, overrideGamma=True)
        protoNNtrainer = prototrainer.ProtoNNTrainer(protoNNmodel,
                                     sparcityW, sparcityB, sparcityZ,
                                     regW, regB, regZ,
                                     learningRate,
                                     lossType)
        print("Model size:", protoNNtrainer.getModelSize(), "KB")
        current_size = protoNNtrainer.getModelSize()
        result_dic = protoNNtrainer.train(
                              train_inputs.float(),
                              test_inputs.float(),
                              train_targets.float(),
                              test_targets.float(),
                              batchsize,
                              epochs,
                              printStep,
                              valStep,
                              verbose=False, history=False)
        correct = result_dic.get('correctPredictions').item()
        total  = result_dic.get('totalPredictions')
        accuracy = 100 * correct / total
        
        # Print accuracy
        print('Accuracy for fold %d with m = %f,d = %d: %d %%' % (fold, numPrototypes, projection_dimension, accuracy))
        results[fold][index_result] = accuracy
        index_result+=1
        del protoNNmodel
        del protoNNtrainer
        gc.collect()

    print('--------------------------------')

# Print fold results
print(f'K-FOLD CROSS VALIDATION RESULTS FOR {k_folds}')
print('--------------------------------')
results = np.sum(results, axis = 0) / k_folds
print("Average Accuracy")
for i in range (0, test_case):
    print("ProtoNN with size", size[0], "KB, prototype", structure[i][0], ", prj dim", structure[i][1],", Accuracy: \t:", results[i])

--------------------------------
FOLD 0
--------------------------------
num prototypes:	 59 
porjection dim:	 25
Model size: 31.972 KB
Accuracy for fold 0 with m = 59.000000,d = 25: 98 %
num prototypes:	 240 
porjection dim:	 15
Model size: 31.68 KB
Accuracy for fold 0 with m = 240.000000,d = 15: 99 %
num prototypes:	 120 
porjection dim:	 15
Model size: 23.52 KB
Accuracy for fold 0 with m = 120.000000,d = 15: 99 %
num prototypes:	 50 
porjection dim:	 15
Model size: 18.76 KB
Accuracy for fold 0 with m = 50.000000,d = 15: 99 %
num prototypes:	 450 
porjection dim:	 10
Model size: 31.84 KB
Accuracy for fold 0 with m = 450.000000,d = 10: 94 %
--------------------------------
FOLD 1
--------------------------------
num prototypes:	 59 
porjection dim:	 25
Model size: 31.972 KB
Accuracy for fold 1 with m = 59.000000,d = 25: 99 %
num prototypes:	 240 
porjection dim:	 15
Model size: 31.68 KB
Accuracy for fold 1 with m = 240.000000,d = 15: 99 %
num prototypes:	 120 
porjection dim:	 15
Mode

In [7]:
# FOR 64 KB
size = [64]
size_parameters = [[50, 60], [15, 715], [15,  350], [15,50], [25, 350]] # dim and proto
test_case = 0;
for e in size_parameters:
    test_case += 1

# For fold results
results = np.zeros([k_folds, test_case])
structure = np.zeros([test_case, 2])
# structure [0]: numProto, [1]: prjdim

# Start print
print('--------------------------------')
# K-fold Cross Validation model evaluation
for fold, (train_ids, test_ids) in enumerate(stratifiedk_fold.split(dataset.data, Y)): # give y is just for compatibility in KFOLD

    # Print
    print(f'FOLD {fold}')
    print('--------------------------------')
    
    # Sample elements randomly from a given list of ids, no replacement.
    train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
    test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)
    
    # Define data loaders for training and testing data in this fold
    trainloader = torch.utils.data.DataLoader(
                      dataset,
                      batch_size=train_ids.shape[0],
                      sampler=train_subsampler)
    testloader = torch.utils.data.DataLoader(
                      dataset,
                      batch_size=test_ids.shape[0],
                      sampler=test_subsampler)

    train_inputs, train_targets = next(iter(trainloader))
    test_inputs, test_targets = next(iter(testloader))

    # SET FIXED DIMENSION
    input_dimension = train_inputs.shape[1]
    numOutputLabels = dataset.numClasses
    
    index_result = 0
    for index_size, couple_elem in enumerate(size_parameters):
        # projection_dimension: HYPERPARAMETER
        projection_dimension = couple_elem[0]
        # numPrototypes: HYPERPARAMETR
        numPrototypes = couple_elem[1]
        
        structure[index_size][0] = numPrototypes
        structure[index_size][1] = projection_dimension
        
        # print structure of ProtoNN
        print("num prototypes:\t", numPrototypes,
              "\nporjection dim:\t", projection_dimension)
        
        # initialize W as random - to use random seed fixed for testing
        W = np.random.rand(input_dimension, projection_dimension) 
        
        # initialize protoNN
        protoNNmodel = protograph.ProtoNN(input_dimension, projection_dimension,
                                     numPrototypes, numOutputLabels,
                                     gamma=0.05, W=W)
        protoNNmodel.initializePrototypesAndGamma(train_inputs, train_targets,
                                     input_W=W, overrideGamma=True)
        protoNNtrainer = prototrainer.ProtoNNTrainer(protoNNmodel,
                                     sparcityW, sparcityB, sparcityZ,
                                     regW, regB, regZ,
                                     learningRate,
                                     lossType)
        print("Model size:", protoNNtrainer.getModelSize(), "KB")
        current_size = protoNNtrainer.getModelSize()
        result_dic = protoNNtrainer.train(
                              train_inputs.float(),
                              test_inputs.float(),
                              train_targets.float(),
                              test_targets.float(),
                              batchsize,
                              epochs,
                              printStep,
                              valStep,
                              verbose=False, history=False)
        correct = result_dic.get('correctPredictions').item()
        total  = result_dic.get('totalPredictions')
        accuracy = 100 * correct / total
        
        # Print accuracy
        print('Accuracy for fold %d with m = %f,d = %d: %d %%' % (fold, numPrototypes, projection_dimension, accuracy))
        results[fold][index_result] = accuracy
        index_result+=1
        del protoNNmodel
        del protoNNtrainer
        gc.collect()

    print('--------------------------------')

# Print fold results
print(f'K-FOLD CROSS VALIDATION RESULTS FOR {k_folds}')
print('--------------------------------')
results = np.sum(results, axis = 0) / k_folds
print("Average Accuracy")
for i in range (0, test_case):
    print("ProtoNN with size", size[0], "KB, prototype", structure[i][0], ", prj dim", structure[i][1],", Accuracy: \t:", results[i])

--------------------------------
FOLD 0
--------------------------------
num prototypes:	 60 
porjection dim:	 50
Model size: 63.68 KB
Accuracy for fold 0 with m = 60.000000,d = 50: 99 %
num prototypes:	 715 
porjection dim:	 15
Model size: 63.98 KB
Accuracy for fold 0 with m = 715.000000,d = 15: 98 %
num prototypes:	 350 
porjection dim:	 15
Model size: 39.16 KB
Accuracy for fold 0 with m = 350.000000,d = 15: 99 %
num prototypes:	 50 
porjection dim:	 15
Model size: 18.76 KB
Accuracy for fold 0 with m = 50.000000,d = 15: 99 %
num prototypes:	 350 
porjection dim:	 25
Model size: 63.4 KB
Accuracy for fold 0 with m = 350.000000,d = 25: 98 %
--------------------------------
FOLD 1
--------------------------------
num prototypes:	 60 
porjection dim:	 50
Model size: 63.68 KB
Accuracy for fold 1 with m = 60.000000,d = 50: 99 %
num prototypes:	 715 
porjection dim:	 15
Model size: 63.98 KB
Accuracy for fold 1 with m = 715.000000,d = 15: 98 %
num prototypes:	 350 
porjection dim:	 15
Model s

In [8]:
# Start print
knn_results = {}
print('--------------------------------')
# K-fold Cross Validation model evaluation
for fold, (train_ids, test_ids) in enumerate(stratifiedk_fold.split(dataset.data, Y)): # give y is just for compatibility in KFOLD

    # Print
    print(f'FOLD {fold}')
    print('--------------------------------')
    
    # Sample elements randomly from a given list of ids, no replacement.
    train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
    test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)
    
    # Define data loaders for training and testing data in this fold
    trainloader = torch.utils.data.DataLoader(
                      dataset,
                      batch_size=train_ids.shape[0],
                      sampler=train_subsampler)
    testloader = torch.utils.data.DataLoader(
                      dataset,
                      batch_size=test_ids.shape[0],
                      sampler=test_subsampler)

    train_inputs, train_targets = next(iter(trainloader))
    test_inputs, test_targets = next(iter(testloader))

    # K-NN
    # test KNN
    knn.fit(train_inputs, train_targets)
    knn_foldscore = knn.score(test_inputs,test_targets)
    knn_results[fold] = knn_foldscore*100
    print("KNN accuracy:",knn_foldscore)
    print('--------------------------------')

sum = 0.0
for key, value in knn_results.items():
    print(f'Fold {key}: {value} %')
    sum += value
print(f'Average KNN: {sum/len(knn_results.items())} %')

--------------------------------
FOLD 0
--------------------------------
KNN accuracy: 0.9917751884852639
--------------------------------
FOLD 1
--------------------------------
KNN accuracy: 0.9869684499314129
--------------------------------
FOLD 2
--------------------------------
KNN accuracy: 0.9883401920438958
--------------------------------
FOLD 3
--------------------------------
KNN accuracy: 0.9924554183813443
--------------------------------
FOLD 4
--------------------------------
KNN accuracy: 0.9890260631001372
--------------------------------
Fold 0: 99.17751884852639 %
Fold 1: 98.69684499314128 %
Fold 2: 98.83401920438958 %
Fold 3: 99.24554183813443 %
Fold 4: 98.90260631001372 %
Average KNN: 98.97130623884108 %


In [10]:
import winsound
# to notify that I'm done running
duration = 1000  # milliseconds
freq = 440  # Hz
winsound.Beep(freq, duration)
winsound.Beep(500, duration)