In [1]:
import pandas as pd
import torch
import numpy as np

from model.scores import AverageCoverageError, IntervalScore, IntervalScorePaper
from pathlib import Path

In [2]:
PATH_DATA = Path.cwd() / 'data' / 'files'

In [3]:
data = pd.read_pickle((PATH_DATA / 'tests.pkl'))

y_pred = data['pred'] 
target = data['target']

quantiles = torch.tensor([0.025, 0.05, 0.1, 0.15, 0.85, 0.9, 0.95, 0.975])

Functions from [github](https://github.com/hatalis/Quantile_Detector/blob/master/evaluation/evaluate_results.py) profile of the author of the paper:

In [4]:
def intervalScore(q_hat, tau, n_tau, n_test, y_test):
    n_pi = int(n_tau / 2)  # number of prediction intervals
    interval_score = torch.zeros((n_pi, 1))
    sharp_score = torch.zeros((n_pi, 1))

    # calculate PICP
    PINC = [0] * n_pi
    for m in range(0, n_pi):
        PINC[m] = tau[-(m + 1)] - tau[m]

    # calculate interval score sharpness
    for m in range(0, n_pi):
        LB = q_hat[:, m]
        UB = q_hat[:, -(m + 1)]
        alpha = 1 - PINC[m]

        IS = torch.zeros((n_test, 1))
        sharpness = torch.zeros((n_test, 1))
        for i in range(0, n_test):
            L = LB[i]
            U = UB[i]
            delta = U - L
            y = y_test[i]
            sharpness[i] = delta
            if y < L:
                IS[i] = -2 * alpha * delta - 4 * (L - y)
            elif y > U:
                IS[i] = -2 * alpha * delta - 4 * (y - U)
            else:
                IS[i] = -2 * alpha * delta

        sharp_score[m] = torch.mean(sharpness)
        interval_score[m] = torch.mean(IS)

    # average q-scores from all PIs into a single score
    interval_score = torch.mean(interval_score)
    sharp_score = torch.mean(sharp_score)

    return interval_score, sharp_score, PINC, IS


def coverageScore(q_hat, tau, n_tau, n_test, y_test):
    n_pi = int(n_tau / 2)  # number of prediction intervals
    PICP = np.zeros((n_pi, 1))
    ACE = np.zeros((n_pi, 1))

    # calculate PICP
    PINC = [0] * n_pi
    for m in range(0, n_pi):
        PINC[m] = tau[-(m + 1)] - tau[m]
        PINC[m] = PINC[m]

        # calculate PICP and then ACE
    for m in range(0, n_pi):
        LB = q_hat[:, m]
        UB = q_hat[:, -(m + 1)]
        c = 0
        for i in range(0, n_test):
            if y_test[i] <= UB[i] and y_test[i] >= LB[i]:
                c += 1
        PICP[m] = (1 / n_test) * c
        ACE[m] = abs(PICP[m]-PINC[m])
#         ACE[m] = (PICP[m] - PINC[m]) * 100

    # average q-scores from all PIs into a single score
    ACE = np.mean(ACE)*100

    return ACE

#### Interval Score 

In [5]:
interval_score, sharp_score, PINC, IS = intervalScore(y_pred, quantiles, len(quantiles), len(target), target)
interval_score.item(), sharp_score.item()

(-0.07624354213476181, 0.20293907821178436)

In [6]:
iscore = IntervalScore(quantiles)
iscore.forward(y_pred, target)

(-0.07624353468418121, 0.20293907821178436)

Paper interval score

In [7]:
iscore = IntervalScorePaper(quantiles)
iscore.forward(y_pred, target)

(0.2631121575832367, 0.20293907821178436)

#### Average Coverage Error

In [8]:
ace = coverageScore(
    y_pred.detach().numpy(), quantiles.tolist(), len(quantiles), len(target), target.detach().numpy()
)
ace

2.59999991618097

In [9]:
acerror = AverageCoverageError(quantiles)
acerror.forward(y_pred, target)

tensor(2.6000)