In [56]:
from pathlib import Path
import csv
import numpy as np
import pandas as pd
from mmaction.core import confusion_matrix

In [8]:
p_pkl_dir = Path('/data/hyogun/repos/haawron_mmaction2/data/features')
weight = 'k400'
task = 'sim2real'
probed_on = 'k400'
p_subtask_dir = p_pkl_dir / weight / task / probed_on
p_pkl_train = p_subtask_dir / 'train.pkl'
p_pkl_test = p_subtask_dir / 'test.pkl'
x_train = np.array(np.load(p_pkl_train, allow_pickle=True))
x_test = np.array(np.load(p_pkl_test, allow_pickle=True))
x_train.shape, x_test.shape

((23085, 768), (961, 768))

In [36]:
p_ann_dir = Path(f"/data/hyogun/repos/haawron_mmaction2/data/_filelists/{probed_on}{'/processed' if probed_on=='k400' else ''}")
ann_train = p_ann_dir / f'filelist_{probed_on}_train_open.txt'
ann_test = p_ann_dir / f'filelist_{probed_on}_test_open.txt'
with ann_train.open() as f1, ann_test.open() as f2:
    ann_train = np.array(list(csv.reader(f1, delimiter=' ')))
    ann_test = np.array(list(csv.reader(f2, delimiter=' ')))
print(ann_train.shape); print(ann_train[:5])

(23085, 3)
[['tap_dancing/--6q_33gNew_000132_000142' '300' '5']
 ['climbing_a_rope/--EaS9P7ZdQ_000013_000023' '300' '12']
 ['balloon_blowing/--Ntf6n-j9Q_000017_000027' '300' '12']
 ['javelin_throw/--_S9IDQPLg_000135_000145' '300' '2']
 ['balloon_blowing/--gx7yb1-x0_000298_000308' '300' '12']]


In [23]:
sim = x_train @ x_test.T  # [N_train, N_test]
idx_best = sim.argmax(axis=0)  # [N_test], for each test data find the best-matching train point
idx_best.shape, idx_best[:5]

((961,), array([21480,  7553, 11331,  3376, 17984]))

In [45]:
pred = np.take_along_axis(ann_train[:,-1], idx_best, axis=0).astype(int)
gt = ann_test[:,-1].astype(int)
np.vstack((pred[:5], gt[:5]))

array([[12, 11,  9, 12,  6],
       [ 8, 11,  9, 12,  6]])

In [48]:
conf = confusion_matrix(pred, gt)
conf

array([[ 58,   0,   0,   1,   0,   0,   0,   0,   0,   0,   0,   0,   0],
       [  0,  38,   2,   1,   0,   0,   1,   0,   0,   0,   0,   1,   0],
       [  1,   1,  60,   8,   0,   2,   0,   0,   0,   2,   0,   0,   2],
       [  0,   0,   3,  57,   1,   0,   1,   0,   1,   1,   0,   0,   0],
       [  0,   0,   0,   0,  13,   1,   0,   0,   0,   0,   0,   0,   1],
       [  0,   0,   0,   0,   1,  95,   0,   0,   1,   0,   0,   0,   1],
       [  0,   0,   0,   0,   0,   1,  90,   0,   1,   1,   0,   0,   4],
       [  0,   0,   0,   0,   0,   0,   0,  19,   0,   0,   0,   0,   0],
       [  0,   1,   1,   0,   1,   1,   0,   0,  51,   1,   2,   1,  12],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,  15,   0,   0,   0],
       [  1,   0,   1,   1,   0,   3,   1,   0,   0,   2,  31,   0,   3],
       [  0,   0,   0,   0,   0,   0,   0,   1,   1,   0,   0,  48,   0],
       [  0,   0,   1,   4,   2,   5,   1,   0,   3,   1,   4,   0, 290]])

In [55]:
recalls = conf.diagonal() / conf.sum(axis=1)
os_star = recalls[:-1].mean()
unk = recalls[-1]
H = 2 / (1/os_star + 1/unk)
print(
    f'''H:   {100*H:.1f}
OS*: {100*os_star:.1f}
UNK: {100*unk:.1f}'''
)

H:   91.2
OS*: 89.3
UNK: 93.2


# Altogether

In [3]:
from pathlib import Path
import csv
import numpy as np
import pandas as pd
from mmaction.core import confusion_matrix
import inspect


def acc(pred=None, gt=None, conf=None):
    assert (pred is not None and gt is not None) or conf is not None
    if conf is None:
        conf = confusion_matrix(pred, gt)
    return conf.diagonal().sum() / conf.sum()

def topk(pred=None, gt=None, **kwargs):
    '''pred: [k, N]'''
    assert pred is not None and gt is not None
    return ((pred == gt[None,:]).sum(axis=0) > 0).sum() / gt.shape[0]

def mca(pred=None, gt=None, conf=None):
    assert (pred is not None and gt is not None) or conf is not None
    if conf is None:
        conf = confusion_matrix(pred, gt)
    den = conf.sum(axis=1)
    recalls = conf.diagonal()[den>0] / den[den>0]  # ek100 has 0-pop classes
    return recalls.mean()

weights = ['tsm', 'i3d', 'k400-SVT', 'k400', 'h100m', 'ssv2', 'in1k']
tasks = {
    'hello': ['ucf', 'hmdb'],
    'ek100': ['P02', 'P04', 'P22'],
    'simnreal': ['k400', 'babel']
}
real_dataset_name_mapping = {
    'ucf': ['ucf101', 'ucf'],
    'hmdb': ['hmdb51', 'hmdb'],
    'P02': ['ek100', 'P02'],
    'P04': ['ek100', 'P04'],
    'P22': ['ek100', 'P22'],
    'k400': ['k400', 'k400'],
    'babel': ['babel', 'babel'],
}
num_classes_mapping = {
    'ucf': [12, 22],
    'hmdb': [12, 22],
    'P02': [5, 15],
    'P04': [5, 15],
    'P22': [5, 15],
    'k400': [12, 27],
    'babel': [12, 20],
}

rows = []
p_pkl_dir = Path('/data/hyogun/repos/haawron_mmaction2/data/features')
for weight in weights:
    for task in tasks:
        for probed_on in tasks[task]:
            p_subtask_dir = p_pkl_dir / weight / task / probed_on
            print(f'{weight} --> {probed_on}')
            p_pkl_train = p_subtask_dir / 'train.pkl'
            p_pkl_test = p_subtask_dir / 'test_merged.pkl'
            if not p_pkl_train.is_file() or not p_pkl_test.is_file():
                rows.append(['0', '0', '0'])
                print(p_subtask_dir)
                print('\tpassed\n')
                continue
            x_train = np.array(np.load(p_pkl_train, allow_pickle=True))
            x_test = np.array(np.load(p_pkl_test, allow_pickle=True))

            dataset_dirname, dataset_filelist_name = real_dataset_name_mapping[probed_on]
            p_ann_dir = Path(f"/data/hyogun/repos/haawron_mmaction2/data/_filelists/{dataset_dirname}{'/processed' if task in ['ek100', 'simnreal'] else ''}")
            p_ann_train = p_ann_dir / f'filelist_{dataset_filelist_name}_train_open_all.txt'
            p_ann_test = p_ann_dir / f'filelist_{dataset_filelist_name}_test_merged_open_all.txt'
            with p_ann_train.open() as f1, p_ann_test.open() as f2:
                ann_train = np.array(list(csv.reader(f1, delimiter=' ')))
                ann_test = np.array(list(csv.reader(f2, delimiter=' ')))

            sim = x_train @ x_test.T  # [N_train, N_test]
            idxx_best = sim.argsort(axis=0)[::-1]  # [N_train, N_test]
            pred = np.take(ann_train[:,-1].astype(int), idxx_best[:5])  # [5, N_test]
            gt = ann_test[:,-1].astype(int)  # [N_test]
            conf = confusion_matrix(pred[0], gt)

            num_old_classes, num_all_classes = num_classes_mapping[probed_on]
            all_acc = acc(conf=conf)
            old_acc = acc(conf=conf[:num_old_classes])
            new_acc = acc(conf=np.vstack([np.zeros_like(conf[:num_old_classes]), conf[num_old_classes:]]))
            all_b_acc = mca(conf=conf)
            old_b_acc = mca(conf=conf[:num_old_classes])
            new_b_acc = mca(conf=np.vstack([np.zeros_like(conf[:num_old_classes]), conf[num_old_classes:]]))

            rows.append([f'{100*all_b_acc:.1f}', f'{100*old_b_acc:.1f}', f'{100*new_b_acc:.1f}'])

            with np.printoptions(linewidth=10000, threshold=np.inf):
                print(conf)
            print(
                inspect.cleandoc(
                    f'''
                    ALL(B):  {100*all_b_acc:.1f}
                    Old(B): {100*old_b_acc:.1f}
                    New(B): {100*new_b_acc:.1f}
                    ALL:  {100*all_acc:.1f}
                    Old: {100*old_acc:.1f}
                    New: {100*new_acc:.1f}
                    '''
                ), '\n'
            )
print(rows)

tsm --> ucf
/data/hyogun/repos/haawron_mmaction2/data/features/tsm/hello/ucf
	passed

tsm --> hmdb
/data/hyogun/repos/haawron_mmaction2/data/features/tsm/hello/hmdb
	passed

tsm --> P02
/data/hyogun/repos/haawron_mmaction2/data/features/tsm/ek100/P02
	passed

tsm --> P04
/data/hyogun/repos/haawron_mmaction2/data/features/tsm/ek100/P04
	passed

tsm --> P22
/data/hyogun/repos/haawron_mmaction2/data/features/tsm/ek100/P22
	passed

tsm --> k400
/data/hyogun/repos/haawron_mmaction2/data/features/tsm/simnreal/k400
	passed

tsm --> babel
[[ 82  14   4   0   0   0   0   0   1   1   0   0   0   0   0   1   0   1   7   0]
 [ 23 187   7   3   0   0   0   1   0   0   0   0   0   0   1   0   0   7   1   0]
 [ 21   3   8   3   0   0   0   1   1  17   0   0   0   0   0   4   0   1   0   0]
 [ 18  29   4  10   0   0   0   2   0   1   0   0   2   0   0   0   2   7   2   0]
 [  9  17   6   3   1   0   0   0   1   4   0   0   0   0   0   1   0   0   2   0]
 [  1   0   0  13   0   1   0   1   0   3   0   

In [5]:
all_subtasks = np.array(sum(map(list, tasks.values()), []) + ['Average'])
colnames = pd.MultiIndex.from_product([all_subtasks, ['ALL(B)', 'Old(B)', 'New(B)']])
rownames = np.array(weights)
print(colnames.shape, rownames.shape)
rows = np.array(rows).astype(float)
average = rows.reshape(len(weights), all_subtasks.shape[0]-1, 3).mean(axis=1).round(1)  # [# weights, 3]
result = np.hstack((rows.reshape(len(weights), -1), average))  # [# weights, # subtasks x 3] ++ [# weights, 3] -> [# weights, (# subtasks + 1) x 3]
df = pd.DataFrame(result, columns=colnames, index=rownames)
pd.set_option('display.colheader_justify', 'center')
with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.expand_frame_repr', None):
    print(df)
print()
print(df.to_latex(multicolumn_format='c'))

(24,) (7,)
          ucf                  hmdb                 P02                  P04                  P22                  k400                babel                Average              
         ALL(B) Old(B) New(B) ALL(B) Old(B) New(B) ALL(B) Old(B) New(B) ALL(B) Old(B) New(B) ALL(B) Old(B) New(B) ALL(B) Old(B) New(B) ALL(B) Old(B) New(B)  ALL(B) Old(B) New(B)
tsm         0.0    0.0    0.0   0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0   16.2   20.5    9.8     2.3    2.9    1.4 
i3d         0.0    0.0    0.0   0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0    0.0   10.3   14.1    4.7     1.5    2.0    0.7 
k400-SVT    4.1    0.2    8.8   4.5    0.0   10.0    6.6   19.8    0.0    6.4   17.9    0.7    7.8   23.5    0.0    3.7    8.3    0.0    5.6    1.4   12.0     5.5   10.2    4.5 
k400      100.0  100.0  100.0  89.3   95.0   82.3   24.4   36.5   18.3   23.1   30.9   19.3   34.9 

{   } & MCA  & Top1 &  Top5 && MCA  & Top1 &  Top5 && MCA  &  Top1 &  Top5 && MCA  &  Top1 &  Top5 && MCA  &  Top1 &  Top5 &&  MCA  &  Top1 &  Top5 &&  MCA  &  Top1 &  Top5 &&   MCA   &  Top1 &  Top5 \\
\midrule
k400  &  5.8 &  7.0 &  24.7 &&  4.2 &  5.9 &  26.2 &&  1.1 &   9.6 &  35.5 &&  1.4 &   7.4 &  31.0 &&  2.0 &  14.7 &  47.3 &&  86.8 &  87.4 &  94.5 &&  55.2 &  65.6 &  85.3 &&   22.4  &  28.2 &  49.2 \\
h100m &  4.2 &  5.9 &  31.8 &&  4.7 &  4.4 &  19.4 &&  1.5 &  20.7 &  42.3 &&  1.3 &  16.0 &  41.9 &&  2.3 &  22.5 &  49.0 &&   4.0 &   9.4 &  16.0 &&   5.0 &  19.5 &  57.7 &&    3.3  &  14.1 &  36.9 \\
ssv2  &  4.5 &  5.3 &  26.4 &&  4.5 &  4.6 &  30.8 &&  1.5 &  20.8 &  45.4 &&  1.3 &   3.6 &   8.4 &&  2.3 &  23.1 &  53.6 &&   3.2 &   2.4 &  16.8 &&   5.0 &   1.6 &  48.6 &&    3.2  &   8.8 &  32.9 \\
in1k  &  4.3 &  4.5 &  22.2 &&  4.2 &  5.2 &  28.9 &&  1.4 &  18.4 &  33.8 &&  1.6 &  16.8 &  25.7 &&  2.2 &   4.7 &  27.5 &&   4.0 &   3.8 &  18.2 &&   5.0 &  19.5 &  19.5 &&    3.2  &  10.4 &  25.1 \\