In [1]:
# Imports
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import subprocess
from IPython.core.debugger import set_trace

In [6]:
class MetricHelper():
    
    def __init__(self, trues:np.array, preds:np.array, task:str="Conviction", *metrics:str):
        self.trues_ = trues
        self.preds_ = preds
        self.task_ = task
        self.metrics = metrics
        self.classes = (0, 1) if task == "Conviction" else (0,1,2,3,4,5,6)
        
    def compute_average_metrics(self) -> dict:
        """
        Calculate the metrics' average over all classes

        Returns:
            dict: Dictionary containing the metric name as key and its averaged value as value.
        """
        metrics_per_class = self.compute_metrics_per_class()
        # metrics averages over all classes with equal weight per class
        metrics_averaged = {}
        sum = 0.0
        for m in self.metrics:
            for c in self.classes:
                sum += metrics_per_class[c][m]
                sum /= len(self.classes)
                metrics_averaged[m] = sum
                sum = 0.0
        
        return metrics_averaged
    
    def compute_metrics_per_class(self) -> dict:
        """
        Compute the metrics Recall, Precision, F1-Score, G-Mean per class. 

        Params:
            trues (np.array): True values
            preds (np.array): predictions.
            task (str, optional): The task for which the metrics per class need to be computed. Defaults to "Conviction".
            *metrics: Metrics to compute. Can be one of the following: 'precision', 'recall', 'f1', 'gmean'.

        Returns:
            dict: Dictionary containing a class label as key and a dictionary with the metric name as key and its value as value.
        """
        # Compute tuples of (metric_name, metric_value)
        metric_values = [(m, getattr(self, f"compute_{m}")(self.trues, self.preds)) for m in self.metrics]
        # Compute dictionary with metric values per class
        metrics_per_class = {c : {m[0] : m[1][c] for m in metric_values} for c in self.classes}
        
        return metrics_per_class
     
    def compute_accuracy(self, trues, preds) -> dict:
        combined = np.array(list(zip(trues, preds)))
        accuracy = {c:None for c in self.classes}
        for c in accuracy:
            acc = np.sum(filtered:= ((combined[i][0] == c) and (combined[i][1] == c) for i in range(len(combined))))
            acc /= len(filtered)
            accuracy[c] = acc
    
    def compute_precision(self, trues, preds) -> dict:
        """
        Calculate the precision metric for the given true and prediction values.

        Params:
            trues (np.array): [description]
            preds (np.array): [description]
            
        Returns:
            dict: A dictionary containing the classes as key and the precision value for this class as value. 
        """
        combined = np.array(list(zip(trues, preds)))
        precision = {c:None for c in self.classes}
        for c in precision:
            tp = np.sum((combined[i][0] == c) and (combined[i][1] == c) for i in range(len(combined)))
            fp = np.sum((combined[i][0] != c) and (combined[i][1] == c) for i in range(len(combined)))
            precision[c] = tp / (tp + fp)
        
        return precision
    
    
    def compute_recall(self, trues, preds) -> dict:
        """
        Calculate the recall metric for the given true and prediction values.

        Params:
            trues (np.array): [description]
            preds (np.array): [description]
            
        Returns:
            dict: A dictionary containing the classes as key and the recall value for this class as value. 
        """
        combined = np.array(list(zip(trues, preds)))
        recall = {c:None for c in self.classes}
        for c in recall:
            tp = np.sum((combined[i][0] == c) and (combined[i][1] == c) for i in range(len(combined)))
            fn = np.sum((combined[i][0] == c) and (combined[i][1] != c) for i in range(len(combined)))
            recall[c] = tp / (tp + fn)
        
        return recall

    def compute_f1(self, trues:np.array, preds:np.array, beta:float=1.0) -> dict:
        """
        Calculate the precision metric for the given true and prediction values.

        Params:
            trues (np.array): [description]
            preds (np.array): [description]
            
        Returns:
            dict: A dictionary containing the classes as key and the f1 value for this class as value. 
        """
        assert beta >= 0, "beta needs to be non-negative."
        recall = self.compute_recall(trues, preds)
        precision = self.compute_precision(trues, preds)
        f1 = {c:None for c in self.classes}
        for c in f1:
            score = ((1 + beta)**2 * recall * precision) / (beta**2 * recall + precision)
            f1[c] = score
        
        return f1
    
    def compute_gmean(self,trues, preds) -> dict:
        """
        Calculate the precision metric for the given true and prediction values.

        Params:
            trues (np.array): [description]
            preds (np.array): [description]
            
        Returns:
            dict: A dictionary containing the classes as key and the gmean value for this class as value. 
        """
        combined = np.array(list(zip(trues, preds)))
        gmean = {c:None for c in self.classes}
        for c in gmean:
            tp = np.sum((combined[i][0] == c) and (combined[i][1] == c) for i in range(len(combined)))
            fn = np.sum((combined[i][0] == c) and (combined[i][1] != c) for i in range(len(combined))) 
            fp = np.sum((combined[i][0] != c) and (combined[i][1] == c) for i in range(len(combined))) 
            tn = np.sum((combined[i][0] != c) and (combined[i][1] != c) for i in range(len(combined)))
            gmean[c] = np.sqrt((tp / (tp + fn)) * (tn/(tn+fp)))
            
        return gmean


In [None]:
mh = MetricHelper(trues, preds, task, metrics)