In [1]:
from collections import OrderedDict

import torch
from torch import nn, optim

from ignite.engine import *
from ignite.handlers import *
from ignite.metrics import *
from ignite.utils import *
from ignite.contrib.metrics.regression import *
from ignite.contrib.metrics import *



In [2]:
def eval_step(engine, batch): # create default evaluator for doctests
    return batch

In [3]:
default_evaluator = Engine(eval_step) # create default optimizer for doctests

In [4]:
param_tensor = torch.zeros([1], requires_grad=True)         # create default trainer for doctests
default_optimizer = torch.optim.SGD([param_tensor], lr=0.1) # as handlers could be attached to the trainer,
                                                            # each test must define his own trainer using `.. testsetup:`

In [5]:
def get_default_trainer():                # create default model for doctests

    def train_step(engine, batch):
        return batch

    return Engine(train_step)

In [6]:
default_model = nn.Sequential(OrderedDict([
    ('base', nn.Linear(4, 2)),
    ('fc', nn.Linear(2, 1))
]))

manual_seed(666)

In [7]:
          
y_true = torch.tensor([2, 0, 2, 1, 0, 1])          #Multiclass Input Tensors
y_pred = torch.tensor([
    [0.0266, 0.1719, 0.3055],
    [0.6886, 0.3978, 0.8176],
    [0.9230, 0.0197, 0.8395],
    [0.1785, 0.2670, 0.6084],
    [0.8448, 0.7177, 0.7288],
    [0.7748, 0.9542, 0.8573],
])

In [8]:
metric = Accuracy() #Accuracy Function
metric.attach(default_evaluator, "accuracy")
state = default_evaluator.run([[y_pred, y_true]])
print(state.metrics["accuracy"])

0.5


In [9]:
metric = Recall(average=False) #Recall Function
metric.attach(default_evaluator, "recall")
state = default_evaluator.run([[y_pred, y_true]])
print(state.metrics["recall"])

tensor([0.5000, 0.5000, 0.5000], dtype=torch.float64)


In [10]:
metric = Precision(average=False) #Precision Function
metric.attach(default_evaluator, "precision")
state = default_evaluator.run([[y_pred, y_true]])
print(state.metrics["precision"])

tensor([0.5000, 1.0000, 0.3333], dtype=torch.float64)


In [11]:
precision = Precision(average=False)                        #F1 Score Function                                                                                                                                               
recall = Recall(average=False)                              #We have to be careful that average=False, i.e. 
F1 = precision * recall * 2 / (precision + recall + 1e-20)  #use the unaveraged precision and recall, otherwise we will not be computing F-beta metrics.
F1 = MetricsLambda(lambda t: torch.mean(t).item(), F1)


In [13]:
metric = ClassificationReport(output_dict=True)  #Classification Report precision recall and f score broken down per label
metric.attach(default_evaluator, "cr")
state = default_evaluator.run([[y_pred, y_true]])
print('Classes: ',state.metrics["cr"].keys())
print('0: ',state.metrics["cr"]["0"])
print('1: ',state.metrics["cr"]["1"])
print('2: ',state.metrics["cr"]["2"])
print('macro avg: ',state.metrics["cr"]["macro avg"]) # Macro averaging reduces your multiclass predictions down to multiple sets of binary predictions, 
                                        # calculates the corresponding metric for each of the binary cases, and then averages the results together

Classes:  dict_keys(['0', '1', '2', 'macro avg'])
0:  {'precision': 0.5, 'recall': 0.5, 'f1-score': 0.49999999999999944}
1:  {'precision': 1.0, 'recall': 0.5, 'f1-score': 0.6666666666666662}
2:  {'precision': 0.3333333333333333, 'recall': 0.5, 'f1-score': 0.3999999999999995}
macro avg:  {'precision': 0.611111111111111, 'recall': 0.5, 'f1-score': 0.5222222222222217}
