## MultiTHUMOS Segmentation-map Eval

### Setup Environment

In [3]:
import os
import math
import torch
import pandas as pd
import numpy as np
import json
from sklearn.metrics import average_precision_score
from scipy.stats import norm
from tqdm import tqdm

In [4]:
def load_json(file):
    with open(file) as json_file:
        data = json.load(json_file)
        return data

### Init Seg-mAP Evaluator

In [5]:
class Meter(object):
    def reset(self):
        pass

    def add(self):
        pass

    def value(self):
        pass


class APMeter(Meter):
    """
    The APMeter measures the average precision per class.
    The APMeter is designed to operate on `NxK` Tensors `output` and
    `target`, and optionally a `Nx1` Tensor weight where (1) the `output`
    contains model output scores for `N` examples and `K` classes that ought to
    be higher when the model is more convinced that the example should be
    positively labeled, and smaller when the model believes the example should
    be negatively labeled (for instance, the output of a sigmoid function); (2)
    the `target` contains only values 0 (for negative examples) and 1
    (for positive examples); and (3) the `weight` ( > 0) represents weight for
    each sample.
    """
    def __init__(self,weighted=False):
        super(APMeter, self).__init__()
        self.reset()
        self.weighted=weighted

    def reset(self):
        """Resets the meter with empty member variables"""
        self.scores = torch.FloatTensor(torch.FloatStorage())
        self.targets = torch.LongTensor(torch.LongStorage())
        self.weights = torch.FloatTensor(torch.FloatStorage())

    def add(self, output, target, weight=None):
        """
        Args:
            output (Tensor): NxK tensor that for each of the N examples
                indicates the probability of the example belonging to each of
                the K classes, according to the model. The probabilities should
                sum to one over all classes
            target (Tensor): binary NxK tensort that encodes which of the K
                classes are associated with the N-th input
                    (eg: a row [0, 1, 0, 1] indicates that the example is
                         associated with classes 2 and 4)
            weight (optional, Tensor): Nx1 tensor representing the weight for
                each example (each weight > 0)
        """
        if not torch.is_tensor(output):
            output = torch.from_numpy(output)
        if not torch.is_tensor(target):
            target = torch.from_numpy(target)

        if weight is not None:
            if not torch.is_tensor(weight):
                weight = torch.from_numpy(weight)
            weight = weight.squeeze()
        if output.dim() == 1:
            output = output.view(-1, 1)
        else:
            assert output.dim() == 2, \
                'wrong output size (should be 1D or 2D with one column \
                per class)'
        if target.dim() == 1:
            target = target.view(-1, 1)
        else:
            assert target.dim() == 2, \
                'wrong target size (should be 1D or 2D with one column \
                per class)'
        if weight is not None:
            assert weight.dim() == 1, 'Weight dimension should be 1'
            assert weight.numel() == target.size(0), \
                'Weight dimension 1 should be the same as that of target'
            assert torch.min(weight) >= 0, 'Weight should be non-negative only'
        assert torch.equal(target**2, target), \
            'targets should be binary (0 or 1)'
        if self.scores.numel() > 0:
            assert target.size(1) == self.targets.size(1), \
                'dimensions for output should match previously added examples.'

        # make sure storage is of sufficient size
        if self.scores.storage().size() < self.scores.numel() + output.numel():
            new_size = math.ceil(self.scores.storage().size() * 1.5)
            new_weight_size = math.ceil(self.weights.storage().size() * 1.5)
            self.scores.storage().resize_(int(new_size + output.numel()))
            self.targets.storage().resize_(int(new_size + output.numel()))
            if weight is not None:
                self.weights.storage().resize_(int(new_weight_size
                                               + output.size(0)))

        # store scores and targets
        offset = self.scores.size(0) if self.scores.dim() > 0 else 0
        self.scores.resize_(offset + output.size(0), output.size(1))
        self.targets.resize_(offset + target.size(0), target.size(1))
        self.scores.narrow(0, offset, output.size(0)).copy_(output)
        self.targets.narrow(0, offset, target.size(0)).copy_(target)

        if weight is not None:
            self.weights.resize_(offset + weight.size(0))
            self.weights.narrow(0, offset, weight.size(0)).copy_(weight)

    def value(self):
        """Returns the model's average precision for each class
        Return:
            ap (FloatTensor): 1xK tensor, with avg precision for each class k
        """

        if self.scores.numel() == 0:
            return 0
        ap = torch.zeros(self.scores.size(1))

        rg = torch.arange(1, self.scores.size(0)+1).float()
        if self.weights.numel() > 0:
            weight = self.weights.new(self.weights.size())
            weighted_truth = self.weights.new(self.weights.size())

        # compute average precision for each class
        # print(self.scores.size(1))
        for k in range(self.scores.size(1)):
            # sort scores
            scores = self.scores[:, k]
            targets = self.targets[:, k]
            scores_2, sortind = torch.sort(scores, 0, True)
            
            truth = targets[sortind]
            # if k==35:
            #     print(scores_2)
            if self.weights.numel() > 0:
                weight = self.weights[sortind]
                weighted_truth = truth.float() * weight
                rg = weight.cumsum(0)

            # compute true positive sums
            if self.weights.numel() > 0:
                tp = weighted_truth.cumsum(0)
            else:
                tp = truth.float().cumsum(0)

            # compute precision curve
            precision = tp.div(rg)

            # compute average precision
            # print(truth)
            ap[k] = precision[truth.bool()].sum() / max(truth.sum(), 1)

        return ap

  from .autonotebook import tqdm as notebook_tqdm


### Read Sparse Predictions and Dense Scores

In [6]:
pred = pd.read_csv('/path/to/Multithumos_results_eval.csv')
dense_segs = os.listdir('/path/to/dense_results')


In [8]:
dense_segments = {}
for i in dense_segs:
    groups = i.split('_')
    vid = groups[0]+'_'+groups[1]+'_'+groups[2]
    base = float(groups[3])
    # print(vid, base)
    if vid not in dense_segments.keys():
        dense_segments[vid] = []
    dense_segments[vid].append((base, torch.load(f'/path/to/dense_results/{i}', map_location='cpu')))
    

### Evaluate Dense Scores

In [22]:
frame_dict = load_json('/path/to/multithumos_frames.json')
annotations = load_json('/path/to/multithumos.json')
base_scores = {}
gt_dense = {}

# Merge sliding windows
n_feature_per_window = 64
interval = 4
for vid in dense_segments.keys():
    num_frames = int(frame_dict[vid]) // interval
    num_features = (num_frames // n_feature_per_window + 1) * n_feature_per_window
    fps = float(int(frame_dict[vid]) / annotations[vid]['duration'])
    scores = np.zeros((num_features, 65))
    gt = np.zeros((num_features, 65))
    for (base, seg) in dense_segments[vid]:
        win_size = seg.shape[0]
        scores[int(base//interval):int(base//interval) + win_size,:] = seg
    base_scores[vid] = scores[:num_frames,:]
    for anno in annotations[vid]['actions']:
        start, end = int(anno[1]*fps), int(anno[2]*fps)
        label = anno[0]-1
        gt[int(start//interval):int(end//interval)+1,label] = 1
    gt_dense[vid] = gt[:num_frames,:]

In [23]:
apm = APMeter()
for vid in base_scores.keys():
    logit = base_scores[vid]
    apm.add(logit, gt_dense[vid])
val_map = 100 * apm.value().mean()
print ("Test Frame-based map", val_map)

Test Frame-based map tensor(34.4330)


### Transform Sparse Preds to Dense Predictions with Gaussian Kernels

In [28]:
def sparse2dense(prediction, annotations, n_classes, scale=3,ratio=5, thresh=0.01):
    y_preds = {}
    y_gts = {}
    for vid in tqdm(base_scores.keys()):
        num_features = base_scores[vid].shape[0]
        fps = num_features / annotations[vid]['duration']
        vlen = num_features
        x_d = np.linspace(1, vlen, vlen)
        preds = prediction[prediction['video-id']==vid]

        y_true = np.zeros((vlen, 65))
        vdata = annotations[vid]
        actions = vdata['actions']
        for act in actions:
            label = act[0]-1
            start = int(act[1]*fps)
            end = int(act[2]*fps)
            for f in range(start, end+1):
                if f < vlen:
                    y_true[f,label] = 1
        
        temp = {}
        for label in range(n_classes):
            temp[label] = {}
            
        for i, p in preds.iterrows():
            label = int(p['label'])
            score = float(p['score'])
            if score > thresh:
                for loc in range(int(p['t-start']*fps), int(p['t-end']*fps)):
                    if loc in temp[label]:
                        temp[label][loc] = max(temp[label][loc], score)
                    else:
                        temp[label][loc] = score
        y_scores = []
        for label in range(n_classes):
            if len(temp[label]) == 0:
                y_scores.append(np.zeros((vlen,1)))
            else:
                y_scores.append(np.array(sum(norm(loc=loc, scale=scale).pdf(x_d)*ratio*temp[label][loc] for loc in temp[label])).reshape(-1,1))
        y_scores = np.concatenate(y_scores, axis=-1)
        y_preds[vid]=y_scores
        y_gts[vid]=y_true
    return y_preds, y_gts


In [29]:
extend_logits, gt_extend = sparse2dense(pred, annotations, 65)

100%|██████████| 213/213 [05:08<00:00,  1.45s/it]


### Fuse Sparse2Dense Scores and Dense Scores, and Evaluate

In [30]:
for theta in np.arange(0,1.1,0.1):
    print(theta)
    apm = APMeter()
    for vid in extend_logits.keys():
        logit = theta * extend_logits[vid] + (1-theta) * base_scores[vid]
        apm.add(logit, gt_extend[vid])
    val_map = 100 * apm.value().mean()
    print ("Test Frame-based map", val_map)

0.0
Test Frame-based map tensor(34.4291)
0.1
Test Frame-based map tensor(40.6733)
0.2
Test Frame-based map tensor(41.1709)
0.30000000000000004
Test Frame-based map tensor(40.6506)
0.4
Test Frame-based map tensor(39.9089)
0.5
Test Frame-based map tensor(39.2042)
0.6000000000000001
Test Frame-based map tensor(38.6092)
0.7000000000000001
Test Frame-based map tensor(38.0807)
0.8
Test Frame-based map tensor(37.6205)
0.9
Test Frame-based map tensor(37.2111)
1.0
Test Frame-based map tensor(36.3598)
