In [None]:
from sklearn.metrics import accuracy_score, plot_confusion_matrix, confusion_matrix, ConfusionMatrixDisplay

class Evaluation:

  def __init__(self):
        pass
  
  def aggregate_metrics(y, y_hat):
    
    """ Metrics for entire set 
        
        Arguments passed are: 
                      y     : ground truth  
                      y_hat : predicted class
        Calculations:
                  accuracy  : (tp + tn) / (total samples =(p + n))
                  precision : tp / (tp + fp)
                  recall    : tp / (tp + fn)
                  f1        : 2 tp / (2 tp + fp + fn)
    """
    accuracy  = accuracy_score(y, y_hat)
    precision = precision_score(y, y_hat)
    recall    = recall_score(y, y_hat)
    f1        = f1_score(y, y_hat)
    return {
        "Accuracy"     : accuracy,
        "Precision"    : precision,
        "Recall Score" : recall,
        "F1 Score"     : f1
        }

  def classwise_metrics(y, y_hat):

    """ Metrics for each class. 
        Average is set to macro for calculating the score of each label, and find their unweighted mean. 
        This does not take label imbalance into account.
        
        Arguments passed are: 
                      y     : ground truth  
                      y_hat : predicted class
        Calculations:
                  accuracy  : (tp + tn) / (total samples =(p + n))
                  precision : tp / (tp + fp)
                  recall    : tp / (tp + fn)
                  f1        : 2 tp / (2 tp + fp + fn)
    """
    
    Class_precision = precision_score(y, y_hat, average='macro')
    Class_recall    = recall_score(y, y_hat, average='macro')
    Class_f1        = f1_score(y, y_hat, average='macro')
    
    return {
        "Class-wise Precision"    : Class_precision,
        "Class-wise Recall Score" : Class_recall,
        "Class-wise F1 Score"     : Class_f1
        }