# [Fine-grained Incident Video Retrieval (FIVR)](https://arxiv.org/abs/1809.04094)
## Dataset
- 数据集特征目录：/home/camp/FIVR/features/vcms_v1
- Annotation目录：/home/camp/FIVR/annotation
- 描述：我们提供视频的帧级别特征
    - 每个h5文件有两个group: images和names
    - images group 保存了每个视频帧的特征，id是视频id，value是帧特征
    - names group 保存了每个视频帧的名字，id是视频id，value是帧的名字，例如\[1.jpg, 2.jpg,...\]
- 一些关键词的解释
    - vid: 和视频一一对应。
    - name: 和视频一一对应，annotation中的使用的是name
    - 通过vid2name和name2vid可以确定他们之间的映射关系
- 三种相似的视频
    - Duplicate Scene Video (DSV)
    - Complementary Scene Video (CSV), 如果A，B两个视频描述的同一个事件，且时间上有overlap，则认为是彼此之间的相似关系是CSV
    - Incident Scene Video (ISV)，如果A，B两个视频描述的是同一个时间，时间上没有overlap，则认为彼此之间的相似关系是ISV
- 三个任务
    - DSVR：负责检索出DSV的相似
    - CSVR：负责检索出DSV+CSV的相似
    - ISVR：负责检索出DSV+CSV+ISV的相似

In [1]:
import numpy as np
import os
import h5py
from tqdm import tqdm
from glob import glob
import pickle as pk
import json
import time
from scipy.spatial.distance import cdist
from future.utils import viewitems, lrange
from sklearn.metrics import precision_recall_curve

In [2]:
def read_h5file(path):
    hf = h5py.File(path, 'r')
    g1 = hf.get('images')
    g2 = hf.get('names')
    return g1.keys(), g1, g2
def load_features(dataset_dir, is_gv=True):
    '''
    加载特征
    :param dataset_dir: 特征所在的目录, 例如：/home/camp/FIVR/features/vcms_v1
    :param is_gv: 是否取平均。True：返回帧平均的结果，False：保留所有帧的特征
    :return:
    '''
    h5_paths = glob(os.path.join(dataset_dir, '*.h5'))
    print(h5_paths)
    vid2features = {}
    final_vids = []
    features = []
    for h5_path in h5_paths:
        vids, g1, g2 = read_h5file(h5_path)
        for vid in tqdm(vids):
            if is_gv:
                cur_arr = g1.get(vid)
                cur_arr = np.mean(cur_arr, axis=0, keepdims=False)
                cur_arr /= (np.linalg.norm(cur_arr, ord=2, axis=0))
                vid2features[vid] = cur_arr
            else:
                cur_arr = g1.get(vid)
                cur_arr = np.concatenate([cur_arr, np.mean(cur_arr, axis=0, keepdims=True)], axis=0)
                vid2features[vid] = cur_arr
                final_vids.extend([vid] * len(cur_arr))
                features.extend(cur_arr)
    if is_gv:
        return vid2features
    else:
        return final_vids, features, vid2features
def calculate_similarities(query_features, all_features):
    """
      用于计算两组特征(已经做过l2-norm)之间的相似度
      Args:
        queries: shape: [N, D]
        features: shape: [M, D]
      Returns:
        similarities: shape: [N, M]
    """
    similarities = []
    # 计算待查询视频和所有视频的距离
    dist = np.nan_to_num(cdist(query_features, all_features, metric='cosine'))
    for i, v in enumerate(query_features):
        # 归一化，将距离转化成相似度
        # sim = np.round(1 - dist[i] / dist[i].max(), decimals=6)
        sim = 1-dist[i]
        # 按照相似度的从大到小排列，输出index
        similarities += [[(s, sim[s]) for s in sim.argsort()[::-1] if not np.isnan(sim[s])]]
    return similarities
def evaluateOfficial(annotations, results, relevant_labels, dataset, quiet):
    """
      Calculate of mAP and interpolated PR-curve based on the FIVR evaluation process.
      Args:
        annotations: the annotation labels for each query
        results: the similarities of each query with the videos in the dataset
        relevant_labels: labels that are considered positives
        dataset: video ids contained in the dataset
      Returns:
        mAP: the mean Average Precision
        ps_curve: the values of the PR-curve
    """
    pr, mAP = [], []
    iterations = viewitems(annotations) if not quiet else tqdm(viewitems(annotations))
    for query, gt_sets in iterations:
        query = str(query)
        if query not in results: print('WARNING: Query {} is missing from the result file'.format(query)); continue
        if query not in dataset: print('WARNING: Query {} is not in the dataset'.format(query)); continue

        # set of relevant videos
        query_gt = set(sum([gt_sets[label] for label in relevant_labels if label in gt_sets], []))
        query_gt = query_gt.intersection(dataset)
        if not query_gt: print('WARNING: Empty annotation set for query {}'.format(query)); continue

        # calculation of mean Average Precision (Eq. 6)
        i, ri, s = 0.0, 0, 0.0
        y_target, y_score = [], []
        for video, sim in sorted(viewitems(results[query]), key=lambda x: x[1], reverse=True):
            if video in dataset:
                y_score.append(sim)
                y_target.append(1.0 if video in query_gt else 0.0)
                ri += 1
                if video in query_gt:
                    i += 1.0
                    s += i / ri
        mAP.append(s / len(query_gt))
        if not quiet:
            print('Query:{}\t\tAP={:.4f}'.format(query, s / len(query_gt)))

        # add the dataset videos that are missing from the result file
        missing = len(query_gt) - y_target.count(1)
        y_target += [1.0 for _ in lrange(missing)] # add 1. for the relevant videos
        y_target += [0.0 for _ in lrange(len(dataset) - len(y_target))] # add 0. for the irrelevant videos
        y_score += [0.0 for _ in lrange(len(dataset) - len(y_score))]

        # calculation of interpolate PR-curve (Eq. 5)
        precision, recall, thresholds = precision_recall_curve(y_target, y_score)
        p = []
        for i in lrange(20, -1, -1):
            idx = np.where((recall >= i * 0.05))[0]
            p.append(np.max(precision[idx]))
        pr.append(p)
    # return mAP
    return mAP, np.mean(pr, axis=0)[::-1]
class GTOBJ:
    def __init__(self):
        annotation_path = '/home/camp/FIVR/annotation/annotation.json'
        dataset_path = '/home/camp/FIVR/annotation/youtube_ids.txt'
        with open(annotation_path, 'r') as f:
            self.annotations = json.load(f)
        self.dataset = set(np.loadtxt(dataset_path, dtype=str).tolist())
gtobj = GTOBJ()
relevant_labels_mapping = {
    'DSVR': ['ND','DS'],
    'CSVR': ['ND','DS','CS'],
    'ISVR': ['ND','DS','CS','IS'],
}

In [3]:
vid2features = load_features('/home/camp/FIVR/features/vcms_v1', is_gv=True)

 10%|▉         | 303/3165 [00:00<00:00, 3022.39it/s]

['/home/camp/FIVR/features/vcms_v1/26.h5', '/home/camp/FIVR/features/vcms_v1/29.h5', '/home/camp/FIVR/features/vcms_v1/15.h5', '/home/camp/FIVR/features/vcms_v1/3.h5', '/home/camp/FIVR/features/vcms_v1/12.h5', '/home/camp/FIVR/features/vcms_v1/10.h5', '/home/camp/FIVR/features/vcms_v1/18.h5', '/home/camp/FIVR/features/vcms_v1/7.h5', '/home/camp/FIVR/features/vcms_v1/8.h5', '/home/camp/FIVR/features/vcms_v1/13.h5', '/home/camp/FIVR/features/vcms_v1/11.h5', '/home/camp/FIVR/features/vcms_v1/23.h5', '/home/camp/FIVR/features/vcms_v1/5.h5', '/home/camp/FIVR/features/vcms_v1/27.h5', '/home/camp/FIVR/features/vcms_v1/16.h5', '/home/camp/FIVR/features/vcms_v1/17.h5', '/home/camp/FIVR/features/vcms_v1/19.h5', '/home/camp/FIVR/features/vcms_v1/25.h5', '/home/camp/FIVR/features/vcms_v1/22.h5', '/home/camp/FIVR/features/vcms_v1/30.h5', '/home/camp/FIVR/features/vcms_v1/6.h5', '/home/camp/FIVR/features/vcms_v1/0.h5', '/home/camp/FIVR/features/vcms_v1/28.h5', '/home/camp/FIVR/features/vcms_v1/20.h5

100%|██████████| 3165/3165 [00:01<00:00, 3015.83it/s]
100%|██████████| 859/859 [00:00<00:00, 3064.52it/s]
100%|██████████| 10868/10868 [00:03<00:00, 3126.71it/s]
100%|██████████| 9893/9893 [00:03<00:00, 3120.39it/s]
100%|██████████| 12856/12856 [00:04<00:00, 3099.00it/s]
100%|██████████| 6903/6903 [00:02<00:00, 3108.02it/s]
100%|██████████| 13833/13833 [00:04<00:00, 3072.88it/s]
100%|██████████| 9884/9884 [00:03<00:00, 3093.52it/s]
100%|██████████| 11870/11870 [00:03<00:00, 3071.39it/s]
100%|██████████| 11888/11888 [00:03<00:00, 3077.68it/s]
100%|██████████| 3959/3959 [00:01<00:00, 3127.46it/s]
100%|██████████| 3164/3164 [00:01<00:00, 2964.55it/s]
100%|██████████| 7923/7923 [00:02<00:00, 3072.69it/s]
100%|██████████| 2297/2297 [00:00<00:00, 2966.92it/s]
100%|██████████| 18810/18810 [00:06<00:00, 3043.03it/s]
100%|██████████| 11878/11878 [00:03<00:00, 2998.83it/s]
100%|██████████| 10861/10861 [00:03<00:00, 3007.57it/s]
100%|██████████| 3165/3165 [00:01<00:00, 2910.96it/s]
100%|█████████

In [4]:
# 加载特征
vids = list(vid2features.keys())
print(vids[:10])
global_features = np.squeeze(np.asarray(list(vid2features.values()), np.float32))
print(np.shape(global_features))

['v0202d060000bkl0g5s9hq5ohaiab7qg', 'v0202d060000bkl0ifs9hq5ohaiabjf0', 'v0202d060000bkl0jfk9hq5ohaiabor0', 'v0202d060000bkl0mdk9hq5ohaiac7j0', 'v0202d060000bkl0q549hq5ohaiacp70', 'v0202d060000bkl0rds9hq5ohaiacujg', 'v0202d060000bkl0rt49hq5ohaiad0ug', 'v0202d060000bkl0th49hq5ohaiad77g', 'v0202d060000bkl0tok9hq5ohaiad85g', 'v0202d060000bkl0vhc9hq5ohaiadfdg']
(225959, 512)


In [5]:
# 加载vid2name 和 name2vid
with open('/home/camp/FIVR/vid2name.pk', 'rb') as pk_file:
    vid2names = pk.load(pk_file)
with open('/home/camp/FIVR/vid2name.pk', 'rb') as pk_file:
    name2vids = pk.load(pk_file)

In [6]:
# 开始评估
annotation_dir = '/home/camp/FIVR/annotation'
names = np.asarray([vid2names[vid][0] for vid in vids])
query_names = None
results = None
for task_name in ['DSVR', 'CSVR', 'ISVR']:
    annotation_path = os.path.join(annotation_dir, task_name + '.json')
    with open(annotation_path, 'r') as annotation_file:
        json_obj = json.load(annotation_file)
    if results is None:
        query_names = json_obj.keys()
        query_names = [str(query_name) for query_name in query_names]
        query_indexs = []
        for query_name in query_names:
            tmp = np.where(names == query_name)
            if len(tmp) != 0 and len(tmp[0]) != 0:
                query_indexs.append(tmp[0][0])
            else:
                print('skip query: ', query_name)
        query_features = np.squeeze(global_features[query_indexs])
        similarities = calculate_similarities(query_features, global_features)
        results = dict()
        for query_idx, query_name in enumerate(query_names):
            cur_sim = similarities[query_idx]
            query_result = dict(
                map(lambda v: (names[v[0]], v[1]), cur_sim)
            )
            del query_result[query_name]
            results[query_name] = query_result
    mAPOffcial, precisions = evaluateOfficial(annotations=gtobj.annotations, results=results,
                                              relevant_labels=relevant_labels_mapping[task_name],
                                              dataset=gtobj.dataset,
                                              quiet=False)
    print('{} mAPOffcial is {}'.format(task_name, np.mean(mAPOffcial)))

Query:wrC_Uqk3juY		AP=0.2612
Query:Ql15rKfPtgE		AP=0.4428
Query:5MBA_7vDhII		AP=0.2473
Query:2ncF3Gfkbao		AP=0.8216
Query:Zb5gPorHKJc		AP=0.4739
Query:cDXTcNL1ebY		AP=0.7391
Query:_R7TYV5YxzU		AP=0.6957
Query:-VkKyuMhBTg		AP=0.4303
Query:hILt-pP83AU		AP=0.3728
Query:6at-OEesGXc		AP=0.5236
Query:fxtVHWmlP0Q		AP=0.5215
Query:-1t97fYWeyQ		AP=0.3109
Query:H6nSyV-EdLM		AP=0.8890
Query:xKI7asn80JY		AP=0.4014
Query:GH0NAHXwVCY		AP=0.8502
Query:2n30dbPBNKE		AP=0.4523
Query:RfgJ6VfFR64		AP=0.5390
Query:sxYUicqiZvw		AP=0.1728
Query:K8PckJMMpRU		AP=0.1515
Query:KPDQuEtKB0E		AP=0.7394
Query:kadmEBYEkxE		AP=0.0712
Query:CCMolsTugus		AP=0.2070
Query:n_dfai6F5lg		AP=0.5146
Query:Jsguw1rO08o		AP=0.3472
Query:GJij-4mt6SA		AP=0.1568
Query:FvcRE7-bnBM		AP=0.5613
Query:iKezL521mEI		AP=0.3368
Query:cJBA2PjbeJA		AP=0.2621
Query:8ja0i5wV-io		AP=0.4860
Query:KcwGKFrMNEU		AP=0.5431
Query:5CVZLZZqvzI		AP=0.4844
Query:TmD3SibKHmI		AP=0.3129
Query:iNO8LOBn73U		AP=0.3457
Query:k_NT43aJ_Jw		AP=0.2144
Query:8nYB60jF

Query:yVvVI9PLoOc		AP=0.6645
Query:rsJhjHGatXg		AP=0.1535
Query:GqmclzTXHqs		AP=0.5757
Query:NtmNO8pgXnE		AP=0.5395
Query:Mw_nlr5pZEI		AP=0.3405
Query:9aqU9UiDBD4		AP=0.0458
Query:4zL36PmWSOs		AP=0.5256
Query:7E-FUxOVmoI		AP=0.4198
Query:wSOMeS83c1o		AP=0.3694
Query:Zskb1cnH63Q		AP=0.4320
Query:cK4pSsWC7do		AP=0.2773
Query:CLqE1ylvSY0		AP=0.5874
Query:K_ZCc46lJBk		AP=0.0556
Query:3QV_zTsmr1Q		AP=0.5452
Query:mJbhNTmE-yQ		AP=0.2813
Query:NVc4lP3Rh70		AP=0.3368
Query:6dPo0jmeGZc		AP=0.4281
Query:D6oBg1vVQZo		AP=0.0557
Query:8jV2KxDjRCA		AP=0.2065
Query:resmD0PSzHs		AP=0.4462
ISVR mAPOffcial is 0.3547844331676474
