In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import csv
from tqdm import tqdm

In [2]:
if(os.path.split(os.getcwd())[1] == "misc"):
    os.chdir("..")
print("Current Working Directory: {}".format(os.path.split(os.getcwd())[1]))

cuda = True

Current Working Directory: progettoVIPM


In [38]:
from typing import Literal

_TYPES = Literal["highest", "lowest"]

class Confusion_Matrix_stats():
    def __init__(self, filename, classes):
        
        # Load the data from file

        self.filename = filename

        if ".npy" in filename:
            print("File is readable by numpy")
            self.cm = np.load(filename).astype("float32")
        if ".csv" in filename:
            print("File is readable by csv")
            self.cm = np.loadtxt(filename, delimiter=",", dtype=np.int64)
        
        
        self.classes = classes

        # Compute basic metrics

        self.accuracy = self.cm.diagonal()/self.cm.sum(axis=1)

        self.total_accuracy = self.cm.diagonal().sum() / self.cm.sum()
        
        # the following matrix is used to compute both the false negative and false positives by using dot prodct
        # the values will be retrieved from the diagonal of the obtained matrix
        prod_matrix = np.ones(self.cm.shape, dtype=np.int8)
        np.fill_diagonal(prod_matrix, 0)

        self.false_negative = np.dot(self.cm, prod_matrix).diagonal()

        self.false_positive = np.dot(np.transpose(self.cm), prod_matrix).diagonal()

        self.true_positive = self.cm.diagonal()

        self.real_positive = np.sum(self.cm, axis=1)

        self.real_negative = np.sum(self.cm, axis=0)

        # the true negative values are computed by excluding the rest of the values from the total sum of the matrix

        self.true_negative = -self.real_negative + self.cm.sum() - self.real_positive + self.true_positive

        self.predicted_positive = self.true_positive + self.false_positive

        self.predicted_negative = self.false_negative + self.true_negative

        # Compute advanced metrics

        self.precision = self.true_positive / self.predicted_positive

        self.recall = self.true_positive/self.real_positive

        self.f1 = 2*(self.precision * self.recall)/(self.precision + self.recall)

    def show_top_k_confused_classes(self, k):
        # This function returns the top k confused classes as a list of double values
        # the returned list is bicriterional in the sense that it returns the top k confused classes in both ways.
        # As an example: if the couple (1,2) is returned it means that the total amount of cases in which 1 was confused with 2
        # and vice versa was great

        
        # The following matrix "summed_mat" is initialized, it will eventually contain
        # the sums of the rows and columns of the confusion mat of each class
        # This is basically accomplished by summing the upper triangular part of the matrix with the lower triangular part
        # the height of the matrix is set as self.cm.shape[0]-1 because we do self.cm.shape[0]-1 iterations
        # since at every iteration the remaining parts of the matrix to confront shrinks when the last row
        # is reached there will be no confront to make
        summed_mat = np.zeros((self.cm.shape[0]-1, self.cm.shape[1]), dtype=np.int64)
        for i in range(self.cm.shape[0]-1):
            # sum the errors in the row and column relative to the current class i
            i_row = self.cm[i, i+1:]
            i_column = self.cm[i+1:, i]
            i_row = i_row + i_column
            # there is a padding of -1s in order to obtain a vector of correct lenght.
            # -1 are chosen because when doing argmax operation they will never be chosen, since in self.cm the minimum value is 0
            i_row = np.pad(i_row, (i+1,0), 'constant', constant_values=(-1))
            summed_mat[i] = i_row
        top_k= []
        print("These are the top k confused couples of classes")
        for i in range(k):
            # unravel_index is needed because np.argmax returns the position of the maximum value in the flattened array
            pos = np.unravel_index(np.argmax(summed_mat, axis=None), summed_mat.shape)
            print("#{}: {}".format(i+1, pos))
            top_k.append(pos)
            # after printing ad appending the couple we set the value at its position as -1 in order to not choose it again
            summed_mat[pos]=-1
        return top_k



    def show_metrics(self, index):
        
        print("---------------------------------------------------------")
        print("Showing metrics of class: {}; filename:{}".format(self.classes[index], self.filename))
        print("Total accuracy of given cm: {} %".format(self.total_accuracy * 100))
        print("-------------------basic metrics------------------------")

        print("Accuracy: {} %".format(self.accuracy[index] * 100))

        print("N° of real positives: {}".format(self.real_positive[index]))
        print("N° of true positives: {}".format(self.true_positive[index]))
        print("N° of false negatives: {}".format(self.false_negative[index]))

        print("N° of real negatives: {}".format(self.real_negative[index]))
        print("N° of true negatives: {}".format(self.true_negative[index]))
        print("N° of false positives: {}".format(self.false_positive[index]))
        
        print("N° of predicted positives: {}".format(self.predicted_positive[index]))
        print("N° of predicted negatives: {}".format(self.predicted_negative[index]))

        print("-------------------advanced metrics----------------------")

        print("Precision: {}".format(self.precision[index]))
        print("Recall: {}".format(self.recall[index]))
        print("F1-score: {}".format(self.f1[index]))

        print("---------------------------------------------------------")
    
    def show_metrics_k_acc(self, k, criterion: _TYPES ="highest"):
        index_best = np.argsort(self.accuracy)[:k]
        if criterion == "lowest":
            index_best = np.argsort(self.accuracy)[-k:]
        print("Showing metrics of {} {} accuracy classes".format(criterion, k))
        for index in index_best:
            self.show_metrics(index)
    
    def show_metrics_k_tp(self, k, criterion: _TYPES ="highest"):
        index_best = np.argsort(self.true_positive)[:k]
        if criterion == "lowest":
            index_best = np.argsort(self.true_positive)[-k:]
        print("Showing metrics of {} {} true positives classes".format(criterion, k))
        for index in index_best:
            self.show_metrics(index)
    
    def show_metrics_k_tn(self, k, criterion: _TYPES ="highest"):
        index_best = np.argsort(self.true_negative)[:k]
        if criterion == "lowest":
            index_best = np.argsort(self.true_negative)[-k:]
        print("Showing metrics of {} {} true negatives classes".format(criterion, k))
        for index in index_best:
            self.show_metrics(index)
    
    def show_metrics_k_precision(self, k, criterion: _TYPES ="highest"):
        index_best = np.argsort(self.precision)[:k]
        if criterion == "lowest":
            index_best = np.argsort(self.precision)[-k:]
        print("Showing metrics of {} {} precision classes".format(criterion, k))
        for index in index_best:
            self.show_metrics(index)
    
    def show_metrics_k_recall(self, k, criterion: _TYPES ="highest"):
        index_best = np.argsort(self.recall)[:k]
        if criterion == "lowest":
            index_best = np.argsort(self.recall)[-k:]
        print("Showing metrics of {} {} recall classes".format(criterion, k))
        for index in index_best:
            self.show_metrics(index)
    
    def show_metrics_k_f1(self, k, criterion: _TYPES ="highest"):
        index_best = np.argsort(self.f1)[:k]
        if criterion == "lowest":
            index_best = np.argsort(self.f1)[-k:]
        print("Showing metrics of {} {} f1-score classes".format(criterion, k))
        for index in index_best:
            self.show_metrics(index)

In [49]:
filename = "./Transfer_Learning/model_metrics/ConfM_{}_from_AlexNet_until7_pca.npy".format("Naive Bayes")
classes = np.loadtxt("disambiguation.csv", delimiter=",", dtype="str")[:,1]
# classes = [0,1,2,3]
confusion_matrix = Confusion_Matrix_stats(filename, classes)
confusion_matrix.show_metrics(1)


File is readable by numpy
---------------------------------------------------------
Showing metrics of class: beignet; filename:./Transfer_Learning/model_metrics/ConfM_Naive Bayes_from_AlexNet_until7_pca.npy
Total accuracy of given cm: 2.19123512506485 %
-------------------basic metrics------------------------
Accuracy: 0.0 %
N° of real positives: 4.0
N° of true positives: 0.0
N° of false negatives: 4.0
N° of real negatives: 0.0
N° of true negatives: 1000.0
N° of false positives: 0.0
N° of predicted positives: 0.0
N° of predicted negatives: 1004.0
-------------------advanced metrics----------------------
Precision: nan
Recall: 0.0
F1-score: nan
---------------------------------------------------------


  self.precision = self.true_positive / self.predicted_positive
  self.f1 = 2*(self.precision * self.recall)/(self.precision + self.recall)


In [30]:
classes = np.arange(251)

rows = ["{}".format(i) for i in classes] 
text = "\n".join(rows) 
  
with open('disambiguation.csv', 'w') as f: 
    f.write(text)