In [44]:
from slurm.gcd4da.commons.kmeans import SSKMeansTrainer as KMeans
from mmaction.datasets.custom_metrics import split_cluster_acc_v2, split_cluster_acc_v2_balanced
import numpy as np
from pathlib import Path
import csv
import inspect
from itertools import permutations

In [60]:
tasks = {
    'hello': ['ucf', 'hmdb'],
    'ek100': ['P02', 'P04', 'P22'],
    'simnreal': ['k400', 'babel']
}
real_dataset_name_mapping = {
    'ucf': ['ucf101', 'ucf'],
    'hmdb': ['hmdb51', 'hmdb'],
    'P02': ['ek100', 'P02'],
    'P04': ['ek100', 'P04'],
    'P22': ['ek100', 'P22'],
    'k400': ['k400', 'k400'],
    'babel': ['babel', 'babel'],
}
num_classes_mapping = {
    'ucf': [12, 22],
    'hmdb': [12, 22],
    'P02': [5, 15],
    'P04': [5, 15],
    'P22': [5, 15],
    'k400': [12, 27],
    'babel': [12, 20],
}

def get_ann_from_domainname(domain:str):
    dataset_dirname, dataset_filelist_name = real_dataset_name_mapping[domain]
    p_ann_dir = p_filelist_base / dataset_dirname
    if domain not in ['ucf', 'hmdb']:
        p_ann_dir /= 'processed'
    p_ann = p_ann_dir / f'filelist_{dataset_filelist_name}_train_open_all.txt'
    assert p_ann.is_file()
    with p_ann.open() as f:
        ann = np.array([int(line[-1]) for line in csv.reader(f, delimiter=' ')])
    return ann

def get_features_from_domainname(task:str, domain:str):
    weight = 'k400' if task != 'simnreal' else 'in1k'
    p_pkl = Path(f'/data/hyogun/repos/haawron_mmaction2/data/features/{weight}/{task}/{domain}/train.pkl')
    pkl = np.array(np.load(p_pkl, allow_pickle=True))
    return pkl

models = {}
p_filelist_base = Path(f"/data/hyogun/repos/haawron_mmaction2/data/_filelists")
for task in tasks:
    for source, target in permutations(tasks[task], 2):
        if task == 'ek100':
            target, source = source, target
        ann_source = get_ann_from_domainname(source)
        ann_target = get_ann_from_domainname(target)
        pkl_source = get_features_from_domainname(task, source)
        pkl_target = get_features_from_domainname(task, target)
        assert ann_source.shape[0] == pkl_source.shape[0]
        assert ann_target.shape[0] == pkl_target.shape[0]
        num_old_classes, num_all_classes = num_classes_mapping[target]
        print(source, target)
        print('\t', ann_source.shape,  ann_target.shape)
        print('\t', pkl_source.shape,  pkl_target.shape)

        # just building
        kmeans = KMeans(autoinit=False, num_known_classes=num_old_classes)
        kmeans.Xs = {
            'train_source': pkl_source[ann_source<num_old_classes],
            'train_target': pkl_target,
            'valid': pkl_target,
            'test': pkl_target,
        }
        kmeans.anns = {
            'train_source': ann_source[ann_source<num_old_classes],
            'train_target': ann_target,
            'valid': ann_target,
            'test': ann_target,
        }
        models[f'{source}_{target}'] = kmeans

for task in tasks:
    for source, target in permutations(tasks[task], 2):
        if task == 'ek100':
            target, source = source, target
        num_old_classes, num_all_classes = num_classes_mapping[target]
        print(source, target)
        kmeans = models[f'{source}_{target}']
        kmeans.train()

        pred_target = kmeans.predict(kmeans.model_best)['test']
        old_mask = (ann_target < num_old_classes)
        total_acc, old_acc, new_acc, conf = split_cluster_acc_v2(ann_target, pred_target, old_mask, True)
        log = inspect.cleandoc(f'''
            ALL: {100*total_acc:4.1f}
            Old: {100*old_acc:4.1f}
            New: {100*new_acc:4.1f}
        ''') + '\n\n'
        total_acc, old_acc, new_acc = split_cluster_acc_v2_balanced(ann_target, pred_target, old_mask)
        log += inspect.cleandoc(f'''
            ALL: {100*total_acc:4.1f}
            Old: {100*old_acc:4.1f}
            New: {100*new_acc:4.1f}
        ''') + '\n'
        print(log)
        with np.printoptions(linewidth=1000, threshold=1000):
            print(conf)

ucf hmdb
	 (2102,) (1904,)
	 (2102, 768) (1904, 768)
hmdb ucf
	 (1904,) (2102,)
	 (1904, 768) (2102, 768)
P04 P02
	 (4555,) (4810,)
	 (4555, 768) (4810, 768)
P22 P02
	 (8061,) (4810,)
	 (8061, 768) (4810, 768)
P02 P04
	 (4810,) (4555,)
	 (4810, 768) (4555, 768)
P22 P04
	 (8061,) (4555,)
	 (8061, 768) (4555, 768)
P02 P22
	 (4810,) (8061,)
	 (4810, 768) (8061, 768)
P04 P22
	 (4555,) (8061,)
	 (4555, 768) (8061, 768)
k400 babel
	 (23085,) (10176,)
	 (23085, 768) (10176, 768)
babel k400
	 (10176,) (23085,)
	 (10176, 768) (23085, 768)
ucf hmdb


  sim = X @ Y.T / (norm_X * norm_Y)**.5
  sim = X @ Y.T / (norm_X * norm_Y)**.5
  sim = X @ Y.T / (norm_X * norm_Y)**.5
  sim = X @ Y.T / (norm_X * norm_Y)**.5


hmdb ucf


  sim = X @ Y.T / (norm_X * norm_Y)**.5
  sim = X @ Y.T / (norm_X * norm_Y)**.5


KeyboardInterrupt: 

# LEGACY

In [3]:
p_pkl_source = Path('/data/hyogun/repos/haawron_mmaction2/data/features/in1k/simnreal/babel/train.pkl')
p_pkl_target = Path('/data/hyogun/repos/haawron_mmaction2/data/features/in1k/simnreal/k400/train.pkl')

pkl_source = np.array(np.load(p_pkl_source, allow_pickle=True))
pkl_target = np.array(np.load(p_pkl_target, allow_pickle=True))
pkl_source.shape, pkl_target.shape

((10176, 768), (23085, 768))

In [4]:
p_ann_source = Path('/data/hyogun/repos/haawron_mmaction2/data/_filelists/babel/processed/filelist_babel_train_open_all.txt')
p_ann_target = Path('/data/hyogun/repos/haawron_mmaction2/data/_filelists/k400/processed/filelist_k400_train_open_all.txt')

with p_ann_source.open() as f1, p_ann_target.open() as f2:
    ann_source = np.array([int(line[-1]) for line in csv.reader(f1, delimiter=' ')])
    ann_target = np.array([int(line[-1]) for line in csv.reader(f2, delimiter=' ')])
ann_source.shape, ann_target.shape

((10176,), (23085,))

In [19]:
num_old_classes = 12
kmeans = KMeans(autoinit=False, num_known_classes=num_old_classes)
kmeans.Xs = {
    'train_source': pkl_source[ann_source<num_old_classes],
    'train_target': pkl_target,
    'valid': pkl_target,
    'test': pkl_target,
}
kmeans.anns = {
    'train_source': ann_source[ann_source<num_old_classes],
    'train_target': ann_target,
    'valid': ann_target,
    'test': ann_target,
}
kmeans.train() 

  sim = X @ Y.T / (norm_X * norm_Y)**.5
  sim = X @ Y.T / (norm_X * norm_Y)**.5
  sim = X @ Y.T / (norm_X * norm_Y)**.5


In [42]:
pred_target = kmeans.predict(kmeans.model_best)['test']
old_mask = (ann_target < num_old_classes)
total_acc, old_acc, new_acc, conf = split_cluster_acc_v2(ann_target, pred_target, old_mask, True)
log = inspect.cleandoc(f'''
    ALL: {100*total_acc:4.1f}
    Old: {100*old_acc:4.1f}
    New: {100*new_acc:4.1f}
''') + '\n\n'
total_acc, old_acc, new_acc = split_cluster_acc_v2_balanced(ann_target, pred_target, old_mask)
log += inspect.cleandoc(f'''
    ALL: {100*total_acc:4.1f}
    Old: {100*old_acc:4.1f}
    New: {100*new_acc:4.1f}
''') + '\n'
print(log)
with np.printoptions(linewidth=1000, threshold=1000):
    print(conf)

ALL: 13.4
Old: 16.1
New:  8.5

ALL:  8.5
Old: 11.1
New:  6.5

[[ 239    0  127   78    0   59  139    0  249    0    0  182  167    0    0    0    0    0   82    0    0    0    0    0   49    0    0]
 [  20    0   39   16    0   63   25    0   67    0    0   95   72    0    0    0    0    0   30    0    0    0    0    0   23    0    0]
 [ 188    0  129  219    0  117  166    0  234    0    0  341  173    0    0    0    0    0  103    0    0    0    0    0   49    0    0]
 [  67    0   87  253    0  179   72    0  127    0    0  320  230    0    0    0    0    0   75    0    0    0    0    0   78    0    0]
 [  68    0   34   12    0   64   40    0   67    0    0   65   34    0    0    0    0    0   25    0    0    0    0    0    5    0    0]
 [ 246    0  111   38    0 1003  122    0  529    0    0  318  204    0    0    0    0    0   97    0    0    0    0    0   39    0    0]
 [ 202    0  134   48    0  116  180    0  204    0    0  240  222    0    0    0    0    0  206    0    0    