# Test evaluations

In [1]:
from os.path import realpath
from pathlib import Path
import pandas as pd
import itertools
import json
import shutil

def load_json(path):
    with open(path) as f:
        cfg = json.load(f)
    return cfg

RUNS_DIR = Path(realpath('.')) / 'runs'
if not 'domain-adaptation/runs' in str(RUNS_DIR):
    RUNS_DIR = Path(realpath('.')).parent / 'runs'
assert('domain-adaptation/runs' in str(RUNS_DIR))

In [119]:
def load_cls_rep_paths_new(
    method,
    experiment_id,
    prefix=None,
    config_key=None,
    config_value=None,
):
    d = RUNS_DIR / method / experiment_id
    paths = []
    for item in d.glob('*'):
        if (item.is_dir()):
            if config_key:
                if not (config_value == load_json(item / 'config.json')[config_key]):
                    continue
            if prefix:
                if not item.name.startswith(prefix):
                    continue
            paths.append( item / 'report.json' ) 
    return sorted(paths)

def get_score_new(
    report_paths,
    metric:str='precision',
    avg_types=['macro avg', 'weighted avg'],
    map_col_name= lambda n: n
):
    reports = [ pd.read_json(p) for p in report_paths ]
    score = pd.DataFrame(
        [[r[avgt][metric] for avgt in avg_types]
         for r in reports
        ],
        columns = [ map_col_name(avgt) for avgt in avg_types]
    ) 
    return score

def get_score_combinations_new(
    method,
    experiment_id,
    config_key=None,
    config_value=None,
    domains=['A','W','D'], 
    metric:str='recall',
    avg_types=['weighted avg'],
):
    combos = [c for c in itertools.product(domains, repeat=2) if c[0] != c[1]]
    scores = [
        get_score_new(
            report_paths=load_cls_rep_paths_new(method, experiment_id, '{}{}'.format(c[0],c[1]), config_key, config_value),
            metric=metric,
            avg_types=avg_types,
            map_col_name=lambda n: '{}->{}'.format(c[0],c[1]),
        )
        for c in combos
    ]
    return pd.concat(scores, sort=False)


def load_cls_rep_paths(
    suffix:str, 
    runs_dir:Path=RUNS_DIR, 
    from_date:str='19700101000000', 
    to_date:str='30001010000000'
):
    assert(len(from_date)==14 and len(to_date)==14)
    return sorted([
        item / 'report.json' 
        for item in runs_dir.glob('*') 
        if item.is_dir() 
        and item.name.endswith(suffix)
        and int(item.name.split('_')[0]) >= int(from_date)
        and int(item.name.split('_')[0]) <= int(to_date)
    ])

def get_score(
    suffix:str, 
    runs_dir:Path=RUNS_DIR, 
    metric:str='precision',
    avg_types=['macro avg', 'weighted avg'],
    from_date:str='19700101000000', 
    to_date:str='30001010000000',
    map_col_name= lambda n: n
):
    report_paths = load_cls_rep_paths(suffix, RUNS_DIR, from_date, to_date)
    reports = [ pd.read_json(p) for p in report_paths ]
    score = pd.DataFrame(
        [[r[avgt][metric] for avgt in avg_types]
         for r in reports
        ],
        columns = [ map_col_name(avgt) for avgt in avg_types]
    ) 
    return score


def get_score_combinations(
    suffix:str, 
    domains=['A','W','D'], 
    runs_dir:Path=RUNS_DIR, 
    metric:str='accuracy',
    avg_types=['macro avg', 'weighted avg'],
    from_date:str='19700101000000', 
    to_date:str='30001010000000',
):
    combos = [c for c in itertools.product(domains, repeat=2) if c[0] != c[1]]
    scores = [
        get_score(
            suffix='{}_{}_{}'.format(c[0],c[1],suffix),
            runs_dir=runs_dir,
            metric=metric,
            avg_types=avg_types,
            from_date=from_date,
            to_date=to_date,
            map_col_name=lambda n: '{}->{}'.format(c[0],c[1]),
        )
        for c in combos
    ]
    return pd.concat(scores, sort=False)
    

## Tune source only
In this experiment, we tune a VGG16-network pretrained on ImageNet with all available source data.
The target data is used for validation (during training) and test.

In [120]:
# without augmentation
# tune_source_no_aug_scores = get_score_combinations(
#     suffix='tune_source', 
#     domains=['A','W','D'], 
#     avg_types=['macro avg'],
#     from_date='20191014123846',
#     to_date='20191014162536'
# )*100
# tune_source_no_aug_scores.describe().T

tune_source_no_aug_scores = get_score_combinations_new(
    method='tune_source',
    experiment_id='tune_source_no_aug',
)*100
tune_source_no_aug_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,52.65625,1.25,51.09375,51.5625,53.125,53.75,53.75
A->D,5.0,56.964286,3.209598,53.571429,53.571429,58.035714,59.22619,60.416667
W->A,5.0,42.341867,1.474058,40.549699,41.114458,42.846386,43.072289,44.126506
W->D,5.0,98.571429,0.572482,97.916667,98.214286,98.511905,98.809524,99.404762
D->A,5.0,36.84488,2.337159,33.471386,35.61747,37.688253,37.951807,39.495482
D->W,5.0,91.03125,2.005851,87.96875,90.46875,91.09375,92.65625,92.96875


In [121]:
# with augmentation
# tune_source_scores = get_score_combinations(
#     suffix='tune_source', 
#     domains=['A','W','D'], 
#     avg_types=['macro avg'],
#     from_date='20191022103424',
#     to_date='20191022142437'
# )*100
# tune_source_scores.describe().T

tune_source_scores = get_score_combinations_new(
    method='tune_source',
    experiment_id='tune_source_with_aug',
)*100
tune_source_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,55.34375,1.856681,52.8125,53.90625,56.5625,56.5625,56.875
A->D,5.0,59.107143,1.691461,57.738095,58.035714,58.035714,60.119048,61.607143
W->A,5.0,40.911145,2.005155,38.441265,39.721386,40.662651,42.206325,43.524096
W->D,5.0,98.75,0.678676,97.916667,98.511905,98.511905,99.107143,99.702381
D->A,5.0,40.820783,1.713495,38.177711,40.173193,41.340361,41.86747,42.545181
D->W,5.0,93.625,1.202212,91.71875,93.28125,93.90625,94.53125,94.6875


## Tune source and target

In [122]:
# tune_both_scores = get_score_combinations(
#     suffix='tune_target', 
#     domains=['A','W','D'], 
#     avg_types=['macro avg'],
#     from_date='20191023072318',
#     to_date='20191023123426'
# )*100
# tune_both_scores.describe().T

tune_both_scores = get_score_combinations_new(
    method='tune_target',
    experiment_id='tune_target_with_aug',
)*100
tune_both_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,60.0625,3.517534,55.15625,57.65625,61.40625,62.96875,63.125
A->D,5.0,65.535714,3.102955,61.011905,64.285714,65.77381,67.559524,69.047619
W->A,5.0,51.438253,1.755988,50.0,50.150602,51.28012,51.393072,54.36747
W->D,5.0,99.22619,0.580166,98.511905,98.809524,99.404762,99.404762,100.0
D->A,5.0,52.771084,2.706081,49.284639,51.84488,51.920181,54.480422,56.325301
D->W,5.0,94.5625,2.707231,89.84375,95.15625,95.15625,96.09375,96.5625


## Multitask

In [123]:
multitask_scores = get_score_combinations_new(
    method='ccsa',
    experiment_id='ccsa_with_batchnorn_alpha_0',
)*100
multitask_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,81.6875,1.937122,78.75,81.25,81.5625,83.28125,83.59375
A->D,5.0,80.233236,3.690092,75.510204,78.425656,80.174927,81.632653,85.422741
W->A,5.0,62.907588,1.6662,60.66867,62.509391,62.69722,63.373403,65.289256
W->D,5.0,96.326531,1.907338,94.169096,95.043732,95.918367,97.667638,98.833819
D->A,5.0,62.14876,1.908953,58.90308,62.021037,62.96018,63.22314,63.636364
D->W,5.0,93.375,1.063189,92.03125,92.5,93.75,94.0625,94.53125


In [124]:
multitask_from_feat_scores = get_score_combinations_new(
    method='multitask',
    experiment_id='multitask_from_feat',
)*100
multitask_from_feat_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,77.34375,1.608692,75.3125,75.9375,78.125,78.4375,78.90625
A->D,5.0,77.201166,2.065655,74.635569,75.510204,77.842566,78.425656,79.591837
W->A,5.0,61.600301,1.392823,59.504132,61.495116,61.570248,62.058603,63.373403
W->D,5.0,93.002915,2.507967,90.670554,91.253644,93.002915,93.002915,97.084548
D->A,5.0,62.231405,0.682726,61.570248,61.758077,62.021037,62.546957,63.260706
D->W,5.0,91.34375,0.831215,90.46875,90.78125,91.09375,91.875,92.5


## CCSA

__Run 1__: The were some severe stability issues when introducting the CSA loss (alpha 0.25 as done by original author).
Employed parameters:
- alpha=0.01 
- freeze_base=true

In [125]:
# tune_ccsa_scores = get_score_combinations(
#     suffix='ccsa', 
#     domains=['A','W','D'], 
#     avg_types=['macro avg'],
#     from_date='20191025141713',
#     to_date='20191025170306'
# )*100
# tune_ccsa_scores.describe().T

tune_ccsa_scores = get_score_combinations_new(
    method='ccsa',
    experiment_id='ccsa_without_batchnorn',
)*100
tune_ccsa_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,75.375,3.392101,73.125,73.125,74.21875,75.15625,81.25
A->D,5.0,77.259475,3.873274,73.760933,74.927114,75.218659,79.300292,83.090379
W->A,5.0,58.790383,1.411842,57.287754,57.325319,59.316304,59.616829,60.40571
W->D,5.0,93.002915,2.803992,89.795918,91.836735,92.419825,93.586006,97.376093
D->A,5.0,58.827949,1.546368,57.550714,57.625845,58.414726,59.241172,61.307288
D->W,5.0,92.3125,1.955935,89.6875,91.40625,92.65625,92.8125,95.0


We observe a large performance increment for the domain adaptations where domains are far apart. However, for similar domains (W,D)

__Run 3__ multi-task learning (ccsa code with alpha=0)

In [127]:
# multitask_scores = get_score_combinations(
#     suffix='ccsa', 
#     domains=['A','W','D'], 
#     avg_types=['macro avg'],
#     from_date='20191101173414',
#     to_date='20191101191220'
# )*100
# multitask_scores.describe().T

multitask_scores = get_score_combinations_new(
    method='ccsa',
    experiment_id='ccsa_with_batchnorn_alpha_0',
)*100
multitask_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,81.6875,1.937122,78.75,81.25,81.5625,83.28125,83.59375
A->D,5.0,80.233236,3.690092,75.510204,78.425656,80.174927,81.632653,85.422741
W->A,5.0,62.907588,1.6662,60.66867,62.509391,62.69722,63.373403,65.289256
W->D,5.0,96.326531,1.907338,94.169096,95.043732,95.918367,97.667638,98.833819
D->A,5.0,62.14876,1.908953,58.90308,62.021037,62.96018,63.22314,63.636364
D->W,5.0,93.375,1.063189,92.03125,92.5,93.75,94.0625,94.53125


__Run 4__ ccsa with alpha = 0.25

In [128]:
# ccsa_scores = get_score_combinations(
#     suffix='ccsa', 
#     domains=['A','W','D'], 
#     avg_types=['macro avg'],
#     from_date='20191101150707',
#     to_date='20191101171905'
# )*100
# ccsa_scores.describe().T

ccsa_scores = get_score_combinations_new(
    method='ccsa',
    experiment_id='ccsa_with_batchnorn_alpha_0.25',
)*100
ccsa_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,82.5,2.715335,78.75,80.625,83.75,84.0625,85.3125
A->D,5.0,82.274052,3.614455,77.259475,80.174927,83.381924,83.965015,86.588921
W->A,5.0,59.939895,2.546253,56.235913,59.429001,60.14275,60.555973,63.335838
W->D,5.0,93.236152,2.519801,89.212828,92.419825,94.169096,95.043732,95.335277
D->A,5.0,60.112697,1.91519,58.302029,58.37716,59.87979,61.232156,62.772352
D->W,5.0,92.4375,2.334879,88.4375,92.65625,92.96875,93.75,94.375


In [129]:
# ccsa_uneven_scores = get_score_combinations(
#     suffix='ccsa', 
#     domains=['A','W','D'], 
#     avg_types=['macro avg'],
#     from_date='20191104152050',
#     to_date='20191104185818'
# )*100
# ccsa_uneven_scores.describe().T

ccsa_uneven_scores = get_score_combinations_new(
    method='ccsa',
    experiment_id='ccsa_uneven',
)*100
ccsa_uneven_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,83.46875,1.463254,81.875,82.65625,83.4375,83.59375,85.78125
A->D,5.0,82.915452,2.245087,79.591837,82.215743,82.798834,84.83965,85.131195
W->A,5.0,60.924117,3.093009,57.362885,57.963937,62.396694,62.471826,64.425244
W->D,5.0,93.93586,1.654374,92.12828,93.002915,93.294461,95.043732,96.209913
D->A,5.0,60.826446,2.820359,58.978212,59.128475,60.030053,60.217881,65.777611
D->W,5.0,92.53125,2.615496,89.0625,91.40625,92.1875,94.0625,95.9375


In [130]:
# ccsa_resnet_uneven_scores = get_score_combinations(
#     suffix='ccsa', 
#     domains=['A','W','D'], 
#     avg_types=['macro avg'],
#     from_date='20191106083058',
#     to_date='20191106144631'
# )*100
# ccsa_resnet_uneven_scores.describe().T

ccsa_resnet_uneven_scores = get_score_combinations_new(
    method='ccsa',
    experiment_id='ccsa_resnet_uneven',
)*100
ccsa_resnet_uneven_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,86.6875,1.843485,84.375,85.3125,86.875,88.125,88.75
A->D,6.0,84.985423,1.697484,83.090379,83.746356,84.693878,86.078717,87.463557
W->A,5.0,66.46882,2.010838,64.312547,64.800902,66.378663,67.693464,69.158527
W->D,5.0,93.35277,2.055342,89.795918,93.586006,93.877551,94.752187,94.752187
D->A,6.0,63.47984,4.039071,56.386176,62.190083,64.406461,66.115702,67.543201
D->W,5.0,92.1875,1.184824,90.9375,91.40625,91.875,92.8125,93.90625


### CCSA from features
Testing if batch size has an impact on CCSA (like seems to have on DAGE)

In [132]:
ccsa_from_feat_batch_size_16_uneven_scores = get_score_combinations_new(
    method='ccsa',
    experiment_id='ccsa_batch_size',
    config_key='batch_size',
    config_value=16,
)*100
ccsa_from_feat_batch_size_16_uneven_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,79.25,1.810074,76.875,77.96875,79.53125,80.78125,81.09375
A->D,5.0,78.483965,1.64148,76.093294,77.842566,78.717201,79.300292,80.466472
W->A,5.0,59.556724,0.794051,58.602554,58.940646,59.654395,60.030053,60.555973
W->D,5.0,90.029155,3.433556,86.880466,87.172012,89.212828,91.836735,95.043732
D->A,5.0,58.767844,2.074398,56.574005,57.175056,58.189331,60.93163,60.969196
D->W,5.0,90.53125,0.921743,89.6875,89.6875,90.46875,90.9375,91.875


In [133]:
ccsa_from_feat_batch_size_32_uneven_scores = get_score_combinations_new(
    method='ccsa',
    experiment_id='ccsa_batch_size',
    config_key='batch_size',
    config_value=32,
)*100
ccsa_from_feat_batch_size_32_uneven_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,79.3125,1.588072,77.34375,78.4375,78.90625,80.9375,80.9375
A->D,5.0,78.075802,2.494373,75.801749,76.96793,76.96793,78.425656,82.215743
W->A,5.0,59.47408,1.356378,58.37716,58.452292,59.090909,59.767092,61.682945
W->D,5.0,90.145773,2.156252,88.338192,89.212828,89.504373,89.795918,93.877551
D->A,5.0,59.120962,1.775632,56.048084,59.203606,59.804658,60.14275,60.40571
D->W,5.0,90.625,1.076879,89.21875,90.3125,90.46875,90.9375,92.1875


In [16]:
ccsa_from_feat_batch_size_64_uneven_scores = get_score_combinations_new(
    method='ccsa',
    experiment_id='ccsa_batch_size',
    avg_types=['macro avg'],
    config_key='batch_size',
    config_value=64,
)*100
ccsa_from_feat_batch_size_64_uneven_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,79.708751,2.307348,76.920129,78.636711,79.13654,80.872306,82.978071
A->D,5.0,79.141243,3.449395,74.629593,76.648884,80.174678,81.093436,83.159627
W->A,5.0,61.35665,1.105851,59.478968,61.317171,61.692201,62.124731,62.170181
W->D,5.0,88.773578,2.190124,85.960699,88.369675,88.541375,88.899718,92.096425
D->A,5.0,62.502549,1.694606,60.939045,61.115881,61.921053,63.746415,64.790349
D->W,5.0,90.691934,2.119183,88.559343,89.34639,89.796408,92.091719,93.665811


In [134]:
ccsa_from_feat_batch_size_128_uneven_scores = get_score_combinations_new(
    method='ccsa',
    experiment_id='ccsa_batch_size',
    config_key='batch_size',
    config_value=128,
)*100
ccsa_from_feat_batch_size_128_uneven_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,78.0,1.606414,75.625,77.5,78.28125,78.59375,80.0
A->D,5.0,76.151603,3.36479,71.720117,75.218659,75.801749,76.96793,81.049563
W->A,5.0,57.978963,2.099753,55.747558,55.935387,58.264463,59.504132,60.443276
W->D,5.0,87.696793,2.214592,85.714286,86.297376,86.880466,88.338192,91.253644
D->A,5.0,57.926371,1.20971,56.08565,57.325319,58.564989,58.64012,59.015778
D->W,5.0,88.15625,2.789402,86.09375,86.71875,86.875,88.125,92.96875


In [135]:
ccsa_from_feat_batch_size_256_uneven_scores = get_score_combinations_new(
    method='ccsa',
    experiment_id='ccsa_batch_size',
    config_key='batch_size',
    config_value=256,
)*100
ccsa_from_feat_batch_size_256_uneven_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,10.0,77.0,2.086976,73.59375,75.742188,77.421875,78.671875,79.375
A->D,10.0,76.38484,1.677617,72.886297,75.801749,76.239067,77.623907,78.717201
W->A,10.0,53.621337,3.489199,47.107438,52.037941,53.888054,56.41435,58.226897
W->D,10.0,86.209913,4.703134,79.883382,82.944606,85.860058,89.431487,94.169096
D->A,10.0,55.540947,2.568259,51.615327,53.878663,55.897821,57.334711,58.827949
D->W,10.0,84.796875,3.443847,78.4375,82.96875,85.46875,86.953125,89.53125


In [136]:
ccsa_from_feat_batch_size_4096_uneven_scores = get_score_combinations_new(
    method='ccsa',
    experiment_id='ccsa_batch_size',
    config_key='batch_size',
    config_value=4096,
)*100
ccsa_from_feat_batch_size_4096_uneven_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,72.21875,3.708099,65.78125,72.65625,73.59375,73.90625,75.15625
A->D,5.0,66.588921,5.836732,57.142857,65.306122,68.804665,69.387755,72.303207
W->A,5.0,49.917355,3.039507,46.882044,47.746056,49.286251,51.164538,54.507889
W->D,5.0,77.492711,5.188358,71.720117,74.344023,75.510204,82.215743,83.673469
D->A,5.0,48.384673,4.853837,42.261458,45.078888,48.271976,52.779865,53.53118
D->W,5.0,80.15625,5.3125,73.75,75.625,82.34375,82.5,86.5625


## d-SNE

In [137]:
# dsne_scores = get_score_combinations(
#     suffix='dsne', 
#     domains=['A','W','D'], 
#     avg_types=['macro avg'],
#     from_date='20191104094606',
#     to_date='20191104124943'
# )*100
# dsne_scores.describe().T

dsne_scores = get_score_combinations_new(
    method='dsne',
    experiment_id='dsne_even',
)*100
dsne_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,82.25,3.335855,76.875,82.03125,82.34375,84.53125,85.46875
A->D,5.0,81.574344,1.561877,79.591837,80.466472,81.632653,83.090379,83.090379
W->A,5.0,61.307288,1.935638,58.865515,60.217881,61.645379,61.758077,64.049587
W->D,5.0,93.819242,1.463545,91.836735,93.586006,93.586006,94.169096,95.918367
D->A,5.0,61.134485,1.847341,58.677686,60.66867,60.781367,61.795642,63.749061
D->W,5.0,93.15625,1.261664,91.71875,91.875,93.75,94.0625,94.375


In [138]:
# dsne_uneven_scores = get_score_combinations(
#     suffix='dsne', 
#     domains=['A','W','D'], 
#     avg_types=['macro avg'],
#     from_date='20191104152038',
#     to_date='20191104182550'
# )*100
# dsne_uneven_scores.describe().T

dsne_uneven_scores = get_score_combinations_new(
    method='dsne',
    experiment_id='dsne_uneven',
)*100
dsne_uneven_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,79.875,3.706453,75.78125,76.875,79.375,83.4375,83.90625
A->D,5.0,80.408163,3.775482,74.927114,78.134111,81.924198,83.381924,83.673469
W->A,5.0,61.111946,0.951237,60.067618,60.368144,61.194591,61.457551,62.471826
W->D,5.0,88.279883,3.891883,83.381924,87.172012,87.755102,88.921283,94.169096
D->A,5.0,59.804658,0.668839,58.90308,59.541698,59.917355,59.917355,60.743802
D->W,5.0,90.28125,1.560154,87.8125,90.0,90.78125,90.78125,92.03125


In [139]:
# dsne_large_uneven_scores = get_score_combinations(
#     suffix='dsne', 
#     domains=['A','W','D'], 
#     avg_types=['macro avg'],
#     from_date='20191105120356',
#     to_date='20191105154214'
# )*100
# dsne_large_uneven_scores.describe().T

dsne_large_uneven_scores = get_score_combinations_new(
    method='dsne',
    experiment_id='dsne_uneven_large',
)*100
dsne_large_uneven_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,80.0625,1.820832,77.03125,79.84375,80.78125,80.9375,81.71875
A->D,5.0,80.116618,2.511353,77.259475,78.425656,79.883382,81.341108,83.673469
W->A,5.0,58.219384,2.145921,55.409467,56.423742,59.429001,59.729527,60.105184
W->D,5.0,89.387755,3.725624,84.25656,88.046647,88.629738,92.419825,93.586006
D->A,5.0,58.422239,2.761963,55.785124,56.311044,57.325319,60.706236,61.983471
D->W,5.0,89.125,2.752396,84.84375,88.59375,89.375,90.625,92.1875


In [140]:
# dsne_resnet_uneven_scores = get_score_combinations(
#     suffix='dsne', 
#     domains=['A','W','D'], 
#     avg_types=['macro avg'],
#     from_date='20191107085855',
#     to_date='20191107131237'
# )*100
# dsne_resnet_uneven_scores.describe().T


dsne_resnet_uneven_scores = get_score_combinations_new(
    method='dsne',
    experiment_id='dsne_uneven_resnet',
)*100
dsne_resnet_uneven_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,84.03125,1.820162,81.71875,83.28125,83.4375,85.46875,86.25
A->D,5.0,85.072886,1.692469,83.673469,83.965015,84.25656,85.714286,87.755102
W->A,5.0,64.800902,2.708513,61.044328,64.425244,64.72577,65.138993,68.670173
W->D,5.0,91.020408,1.434213,89.795918,89.795918,90.962099,91.253644,93.294461
D->A,5.0,64.861007,1.711943,63.072878,63.486101,65.063862,65.289256,67.392938
D->W,5.0,89.40625,1.056278,88.28125,88.59375,89.375,89.84375,90.9375


### d-SNE from features

In [141]:
dsne_from_feat_batch_size_16_uneven_scores = get_score_combinations_new(
    method='dsne',
    experiment_id='dsne_from_features',
)*100
dsne_from_feat_batch_size_16_uneven_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,77.53125,1.101535,76.09375,77.03125,77.5,77.96875,79.0625
A->D,6.0,77.745384,2.512481,74.344023,76.457726,77.696793,78.717201,81.632653
W->A,5.0,58.534936,0.93213,57.513148,58.001503,58.489857,58.677686,59.992487
W->D,5.0,89.037901,3.725624,84.25656,86.588921,89.795918,90.670554,93.877551
D->A,5.0,58.309542,1.259265,56.649136,57.475582,58.489857,59.166041,59.767092
D->W,5.0,84.9375,1.172396,83.28125,84.375,85.0,85.78125,86.25


## Dage

In [142]:
# dage_resnet_uneven_scores = get_score_combinations(
#     suffix='homebrew', 
#     domains=['A','W','D'], 
#     avg_types=['macro avg'],
#     from_date='20191113093959',
#     to_date='20191113143542'
# )*100
# dage_resnet_uneven_scores.describe().T

dage_resnet_uneven_scores = get_score_combinations_new(
    method='dage',
    experiment_id='dage_uneven_resnet',
)*100
dage_resnet_uneven_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,86.53125,2.469047,83.28125,84.53125,87.65625,88.4375,88.75
A->D,5.0,87.930029,1.951394,86.005831,86.588921,87.172012,89.212828,90.670554
W->A,5.0,63.080391,3.665057,57.400451,61.758077,63.93689,65.927874,66.378663
W->D,5.0,89.212828,3.022805,85.714286,87.755102,88.046647,91.253644,93.294461
D->A,5.0,62.193839,3.735912,55.634861,62.659654,63.861758,64.312547,64.500376
D->W,5.0,87.15625,1.989965,84.0625,86.25,88.28125,88.4375,88.75


### DAGE on logits
Varying batch_size

In [143]:
dage_logits_bs_16_scores = get_score_combinations_new(
    method='dage',
    experiment_id='dage_logits_batch_size',
    config_key='batch_size',
    config_value=16,
)*100
dage_logits_bs_16_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,4.0,78.320312,1.136623,76.71875,78.007812,78.59375,78.90625,79.375
A->D,5.0,78.075802,3.101909,74.927114,75.510204,77.259475,81.341108,81.341108
W->A,5.0,57.610819,1.481967,56.123216,56.160781,58.001503,58.151766,59.616829
W->D,5.0,84.548105,4.493013,80.466472,80.758017,82.798834,88.921283,89.795918
D->A,5.0,58.557476,2.234672,55.860255,56.461307,59.504132,60.40571,60.555973
D->W,5.0,82.625,2.473986,78.4375,82.34375,83.75,84.21875,84.375


In [144]:
dage_logits_bs_64_scores = get_score_combinations_new(
    method='dage',
    experiment_id='dage_logits_batch_size',
    config_key='batch_size',
    config_value=64,
)*100
dage_logits_bs_64_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,78.96875,1.039973,77.65625,78.59375,78.59375,79.6875,80.3125
A->D,5.0,77.842566,3.474162,73.760933,76.676385,76.676385,79.008746,83.090379
W->A,5.0,57.97145,1.724632,56.423742,56.536439,57.325319,59.316304,60.255447
W->D,5.0,80.816327,4.231927,76.38484,79.300292,79.591837,81.049563,87.755102
D->A,5.0,56.799399,1.581781,54.094666,56.912096,57.287754,57.513148,58.189331
D->W,5.0,79.09375,2.362942,77.65625,77.8125,78.28125,78.4375,83.28125


In [145]:
dage_logits_bs_256_scores = get_score_combinations_new(
    method='dage',
    experiment_id='dage_logits_batch_size',
    config_key='batch_size',
    config_value=256,
)*100
dage_logits_bs_256_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,76.4375,1.044657,75.46875,75.46875,76.25,77.1875,77.8125
A->D,5.0,77.900875,3.730184,74.635569,74.927114,76.96793,79.300292,83.673469
W->A,5.0,56.829452,2.90818,51.652893,57.813674,57.926371,58.189331,58.564989
W->D,5.0,77.609329,2.326889,74.344023,76.38484,78.425656,78.425656,80.466472
D->A,5.0,56.070624,0.468142,55.296769,55.972953,56.311044,56.311044,56.461307
D->W,5.0,75.78125,1.589609,73.59375,75.3125,75.3125,77.34375,77.34375


In [146]:
dage_logits_bs_1024_scores = get_score_combinations_new(
    method='dage',
    experiment_id='dage_logits_batch_size',
    config_key='batch_size',
    config_value=1024,
)*100
dage_logits_bs_1024_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,74.53125,2.679129,69.84375,75.0,75.3125,76.25,76.25
A->D,5.0,76.501458,4.042923,72.011662,72.886297,76.676385,79.883382,81.049563
W->A,5.0,56.446281,2.204344,53.005259,55.82269,56.912096,57.738542,58.752817
W->D,5.0,75.510204,4.930477,68.221574,73.469388,76.38484,78.425656,81.049563
D->A,5.0,55.567243,3.02543,50.826446,54.84598,56.08565,57.287754,58.790383
D->W,5.0,75.28125,0.920417,74.0625,74.53125,75.78125,75.9375,76.09375


In [147]:
dage_logits_bs_full_scores = get_score_combinations_new(
    method='dage',
    experiment_id='dage_logits_batch_size',
    config_key='batch_size',
    config_value=4096,
)*100
dage_logits_bs_full_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,74.625,2.332264,71.5625,73.4375,74.375,76.25,77.5
A->D,5.0,73.586006,3.952561,67.055394,74.344023,74.344023,74.344023,77.842566
W->A,5.0,55.469572,1.8956,53.305785,54.432757,55.071375,56.273479,58.264463
W->D,5.0,75.335277,3.250435,72.886297,73.177843,73.469388,76.676385,80.466472
D->A,5.0,55.026296,2.934627,50.638618,53.643877,56.010518,56.761833,58.076634
D->W,5.0,74.0625,2.10503,70.9375,74.0625,74.21875,74.21875,76.875


### DAGE full on embeds
Varying batch_size

In [148]:
dage_embeds_bs_8_scores = get_score_combinations_new(
    method='dage',
    experiment_id='dage_embeds_batch_size',
    config_key='batch_size',
    config_value=8,
)*100
dage_embeds_bs_8_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,79.78125,0.941398,78.75,79.21875,79.84375,79.84375,81.25
A->D,5.0,78.77551,2.381052,76.38484,77.259475,77.842566,80.174927,82.215743
W->A,5.0,58.362134,0.628371,57.663411,58.001503,58.1142,58.865515,59.166041
W->D,5.0,82.44898,6.582081,74.635569,77.259475,82.798834,87.172012,90.379009
D->A,5.0,58.730278,1.526066,57.738542,57.888805,57.963937,58.677686,61.382419
D->W,5.0,85.125,0.972021,84.21875,84.375,84.6875,86.09375,86.25


In [149]:
dage_embeds_bs_16_scores = get_score_combinations_new(
    method='dage',
    experiment_id='dage_embeds_batch_size',
    config_key='batch_size',
    config_value=16,
)*100
dage_embeds_bs_16_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,79.8125,1.067772,78.90625,78.90625,79.375,80.625,81.25
A->D,5.0,80.233236,3.764209,75.510204,79.008746,79.300292,81.632653,85.714286
W->A,5.0,58.685199,1.004629,57.212622,58.414726,58.564989,59.466566,59.767092
W->D,5.0,85.714286,1.30383,83.673469,85.422741,86.005831,86.297376,87.172012
D->A,5.0,57.896319,2.357591,55.747558,55.860255,57.062359,60.105184,60.706236
D->W,5.0,86.28125,1.617772,83.59375,86.09375,86.71875,87.34375,87.65625


In [150]:
dage_embeds_bs_32_scores = get_score_combinations_new(
    method='dage',
    experiment_id='dage_embeds_batch_size',
    config_key='batch_size',
    config_value=32,
)*100
dage_embeds_bs_32_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,77.53125,2.227316,74.21875,76.40625,78.59375,78.59375,79.84375
A->D,5.0,79.650146,3.209646,76.676385,77.842566,78.425656,80.466472,84.83965
W->A,5.0,57.084899,1.76571,54.883546,55.447032,58.226897,58.414726,58.452292
W->D,5.0,81.574344,4.181413,75.218659,80.466472,82.507289,83.090379,86.588921
D->A,5.0,57.888805,1.322026,56.160781,57.400451,57.475582,58.940646,59.466566
D->W,5.0,82.53125,2.996173,80.15625,80.78125,81.5625,82.5,87.65625


In [152]:
dage_embeds_bs_64_scores = get_score_combinations_new(
    method='dage',
    experiment_id='dage_embeds_batch_size',
    config_key='batch_size',
    config_value=64,
)*100
dage_embeds_bs_64_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,76.75,1.407985,74.375,76.875,77.1875,77.1875,78.125
A->D,5.0,78.367347,2.799441,75.510204,76.676385,77.842566,79.008746,82.798834
W->A,5.0,57.137491,1.641111,54.958678,55.935387,57.85124,57.963937,58.978212
W->D,5.0,79.650146,2.776576,75.801749,78.717201,79.591837,80.758017,83.381924
D->A,5.0,57.768595,0.993328,56.498873,57.212622,57.700977,58.37716,59.053343
D->W,5.0,79.25,3.082524,73.90625,79.6875,80.15625,80.78125,81.71875


In [153]:
dage_embeds_bs_128_scores = get_score_combinations_new(
    method='dage',
    experiment_id='dage_embeds_batch_size',
    config_key='batch_size',
    config_value=128,
)*100
dage_embeds_bs_128_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,76.625,0.845773,75.625,75.9375,76.71875,77.1875,77.65625
A->D,5.0,78.717201,3.498542,74.927114,75.801749,78.425656,81.341108,83.090379
W->A,5.0,56.746807,1.442642,55.409467,55.747558,56.273479,57.325319,58.978212
W->D,5.0,78.717201,4.714566,72.011662,75.801749,80.174927,82.798834,82.798834
D->A,5.0,56.574005,2.197194,54.019534,54.658152,57.212622,57.663411,59.316304
D->W,5.0,76.46875,1.850755,74.53125,75.3125,75.9375,77.34375,79.21875


In [154]:
dage_embeds_bs_full_scores = get_score_combinations_new(
    method='dage',
    experiment_id='dage_embeds_batch_size',
    config_key='batch_size',
    config_value=4096,
)*100
dage_embeds_bs_full_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,73.5,1.14607,71.71875,73.28125,73.75,73.90625,74.84375
A->D,5.0,74.577259,1.419319,72.886297,73.760933,74.635569,74.927114,76.676385
W->A,5.0,56.49136,1.725041,53.831705,55.747558,57.212622,57.550714,58.1142
W->D,5.0,74.985423,2.354126,71.428571,73.760933,76.093294,76.676385,76.96793
D->A,5.0,56.453794,1.270977,54.32006,56.461307,56.649136,57.362885,57.475582
D->W,5.0,75.28125,2.180233,72.34375,74.53125,74.6875,77.03125,77.8125


### DAGE full across on embeds
Varying loss alpha

In [155]:
dage_embeds_full_across_025_scores = get_score_combinations_new(
    method='dage',
    experiment_id='dage_full_across_alpha',
    config_key='loss_alpha',
    config_value=0.25,
)*100
dage_embeds_full_across_025_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,76.6875,0.940101,75.78125,75.9375,76.40625,77.34375,77.96875
A->D,5.0,78.19242,3.25566,76.093294,76.676385,76.96793,77.259475,83.965015
W->A,5.0,57.137491,0.97707,55.972953,56.536439,57.250188,57.362885,58.564989
W->D,5.0,78.425656,3.564729,73.760933,76.093294,79.008746,80.466472,82.798834
D->A,5.0,56.904583,1.584321,54.695718,56.123216,57.062359,57.813674,58.827949
D->W,5.0,79.4375,3.06903,77.1875,77.34375,77.34375,81.40625,83.90625


In [156]:
dage_embeds_full_across_05_scores = get_score_combinations_new(
    method='dage',
    experiment_id='dage_full_across_alpha',
    config_key='loss_alpha',
    config_value=0.5,
)*100
dage_embeds_full_across_05_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,77.78125,1.227334,76.09375,76.875,78.4375,78.59375,78.90625
A->D,5.0,80.349854,2.731827,76.093294,80.174927,80.758017,81.049563,83.673469
W->A,5.0,57.85124,1.307536,55.597295,58.151766,58.226897,58.264463,59.015778
W->D,5.0,84.373178,4.575495,79.591837,81.341108,82.798834,87.463557,90.670554
D->A,5.0,57.385424,1.587036,55.597295,56.461307,56.761833,58.865515,59.241172
D->W,5.0,82.9375,2.082878,79.84375,82.03125,83.75,83.75,85.3125


In [157]:
dage_embeds_full_across_075_scores = get_score_combinations_new(
    method='dage',
    experiment_id='dage_full_across_alpha',
    config_key='loss_alpha',
    config_value=0.75,
)*100
dage_embeds_full_across_075_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,80.1875,1.658754,77.96875,79.0625,80.46875,81.5625,81.875
A->D,5.0,80.349854,2.382836,77.842566,79.008746,79.300292,81.924198,83.673469
W->A,5.0,59.338843,1.61982,57.212622,58.339594,59.353869,60.631104,61.157025
W->D,5.0,85.655977,3.608571,80.466472,84.548105,85.131195,88.629738,89.504373
D->A,5.0,60.090158,1.318177,58.564989,59.541698,59.579264,60.781367,61.983471
D->W,5.0,85.90625,3.789505,80.625,85.0,85.0,88.28125,90.625


### DAGE pair across on embeds
Varying loss alpha

In [158]:
dage_embeds_pair_across_025_scores = get_score_combinations_new(
    method='dage',
    experiment_id='dage_pair_across_alpha',
    config_key='loss_alpha',
    config_value=0.25,
)*100
dage_embeds_pair_across_025_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,80.3125,0.732877,79.21875,80.0,80.46875,80.78125,81.09375
A->D,5.0,80.816327,3.189723,76.093294,80.174927,80.466472,82.798834,84.548105
W->A,5.0,59.691961,3.706751,53.305785,59.691961,61.419985,61.607814,62.43426
W->D,5.0,90.204082,1.121597,88.629738,89.795918,90.087464,90.962099,91.54519
D->A,5.0,58.715252,1.54728,57.062359,57.700977,58.827949,58.865515,61.119459
D->W,5.0,88.96875,2.639192,84.84375,88.125,89.53125,90.9375,91.40625


In [159]:
dage_embeds_pair_across_05_scores = get_score_combinations_new(
    method='dage',
    experiment_id='dage_pair_across_alpha',
    config_key='loss_alpha',
    config_value=0.5,
)*100
dage_embeds_pair_across_05_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,80.53125,1.863899,78.90625,78.90625,80.46875,80.9375,83.4375
A->D,5.0,79.650146,2.363135,78.134111,78.134111,78.425656,79.883382,83.673469
W->A,5.0,60.368144,1.94473,57.475582,59.87979,60.180316,61.90834,62.396694
W->D,5.0,90.379009,4.737049,82.215743,90.962099,92.12828,92.12828,94.460641
D->A,5.0,60.743802,2.89634,56.686702,59.87979,60.443276,62.208866,64.500376
D->W,5.0,89.4375,2.311233,86.40625,88.125,89.21875,91.71875,91.71875


In [160]:
dage_embeds_pair_across_075_scores = get_score_combinations_new(
    method='dage',
    experiment_id='dage_pair_across_alpha',
    config_key='loss_alpha',
    config_value=0.75,
)*100
dage_embeds_pair_across_075_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,80.0625,1.182762,78.59375,79.6875,80.0,80.15625,81.875
A->D,5.0,79.125364,3.490029,74.344023,76.96793,79.883382,81.341108,83.090379
W->A,5.0,61.953418,0.422844,61.307288,61.758077,62.1713,62.1713,62.359128
W->D,5.0,91.603499,2.48584,89.795918,90.087464,90.962099,91.253644,95.918367
D->A,5.0,60.300526,1.88099,58.602554,58.64012,59.917355,61.344853,62.997746
D->W,5.0,91.1875,0.991911,90.3125,90.46875,90.625,92.1875,92.34375


### DAGE MFA-like
Varying k

In [161]:
dage_embeds_across_knn_knn_1_scores = get_score_combinations_new(
    method='dage',
    experiment_id='dage_mfa',
    config_key='connection_filter_param',
    config_value=1,
)*100
dage_embeds_across_knn_knn_1_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,80.53125,0.785924,79.375,80.46875,80.46875,80.78125,81.5625
A->D,5.0,81.865889,1.717397,79.300292,81.341108,82.215743,82.507289,83.965015
W->A,5.0,61.810669,0.630053,61.081893,61.269722,62.021037,62.058603,62.622089
W->D,5.0,91.428571,1.407291,89.504373,90.379009,92.12828,92.419825,92.71137
D->A,5.0,61.555222,2.007501,59.579264,59.654395,61.682945,62.546957,64.312547
D->W,5.0,91.21875,0.984499,89.84375,90.78125,91.40625,91.5625,92.5


In [162]:
dage_embeds_across_knn_knn_2_scores = get_score_combinations_new(
    method='dage',
    experiment_id='dage_mfa',
    config_key='connection_filter_param',
    config_value=2,
)*100
dage_embeds_across_knn_knn_2_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,79.0,2.249674,75.3125,79.0625,79.375,79.84375,81.40625
A->D,5.0,80.58309,1.495147,79.008746,79.883382,79.883382,81.341108,82.798834
W->A,5.0,61.07438,1.224609,59.579264,60.217881,61.081893,61.870774,62.622089
W->D,5.0,91.078717,3.130549,86.588921,89.795918,90.962099,93.877551,94.169096
D->A,5.0,62.268971,1.532756,60.180316,61.682945,61.983471,63.410969,64.087153
D->W,5.0,90.34375,1.980742,87.1875,89.6875,91.09375,91.71875,92.03125


### DAGE d-SNE-like
Varying k

In [163]:
dage_embeds_across_kfn_knn_1_scores = get_score_combinations_new(
    method='dage',
    experiment_id='dage_dsne',
    config_key='connection_filter_param',
    config_value=1,
)*100
dage_embeds_across_kfn_knn_1_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,79.375,1.502764,78.125,78.28125,79.21875,79.375,81.875
A->D,5.0,81.574344,2.698959,77.55102,80.174927,82.507289,83.673469,83.965015
W->A,5.0,61.547708,1.028643,60.293013,60.743802,61.607814,62.471826,62.622089
W->D,5.0,91.661808,2.067711,89.504373,89.795918,91.54519,93.294461,94.169096
D->A,5.0,61.855748,1.27131,60.368144,61.232156,61.307288,63.072878,63.298272
D->W,5.0,89.65625,2.539723,87.1875,87.5,88.90625,92.1875,92.5


In [164]:
dage_embeds_across_kfn_knn_2_scores = get_score_combinations_new(
    method='dage',
    experiment_id='dage_dsne',
    config_key='connection_filter_param',
    config_value=2,
)*100
dage_embeds_across_kfn_knn_2_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,79.34375,0.744446,78.125,79.21875,79.53125,79.84375,80.0
A->D,5.0,80.408163,0.696055,79.591837,79.883382,80.466472,80.758017,81.341108
W->A,4.0,62.10556,0.638526,61.607814,61.664162,61.90834,62.349737,62.997746
W->D,5.0,91.895044,2.086128,89.212828,91.54519,91.54519,92.12828,95.043732
D->A,5.0,61.006762,1.051841,59.278738,60.818933,61.344853,61.645379,61.945905
D->W,5.0,90.75,1.454888,89.21875,89.6875,90.46875,91.5625,92.8125


### DAGE on aux dense layer
Varying embedding size

In [165]:
dage_aux_dense_16_bs_full_scores = get_score_combinations_new(
    method='dage',
    experiment_id='dage_vary_emb_size',
    config_key='aux_dense_size',
    config_value=16,
)*100
dage_aux_dense_16_bs_full_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,3.0,74.53125,0.869963,73.59375,74.140625,74.6875,75.0,75.3125
A->D,2.0,74.344023,1.649229,73.177843,73.760933,74.344023,74.927114,75.510204
W->A,3.0,57.450538,1.094791,56.423742,56.87453,57.325319,57.963937,58.602554
W->D,3.0,75.218659,2.314068,73.469388,73.906706,74.344023,76.093294,77.842566
D->A,3.0,57.763586,1.796229,55.860255,56.930879,58.001503,58.715252,59.429001
D->W,3.0,74.635417,1.301041,73.59375,73.90625,74.21875,75.15625,76.09375


In [166]:
dage_aux_dense_31_bs_full_scores = get_score_combinations_new(
    method='dage',
    experiment_id='dage_vary_emb_size',
    config_key='aux_dense_size',
    config_value=31,
)*100
dage_aux_dense_31_bs_full_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,74.59375,1.430349,73.125,73.4375,74.375,75.46875,76.5625
A->D,5.0,75.568513,3.836894,71.428571,73.760933,73.760933,77.842566,81.049563
W->A,5.0,54.740796,5.062978,45.830203,55.634861,56.724267,57.438017,58.076634
W->D,5.0,75.16035,5.60571,67.055394,72.594752,76.96793,77.259475,81.924198
D->A,5.0,56.671675,1.745615,54.32006,56.311044,56.536439,56.987228,59.203606
D->W,5.0,73.5625,3.26895,67.96875,73.75,74.6875,75.0,76.40625


In [167]:
dage_aux_dense_64_bs_full_scores = get_score_combinations_new(
    method='dage',
    experiment_id='dage_vary_emb_size',
    config_key='aux_dense_size',
    config_value=64,
)*100
dage_aux_dense_64_bs_full_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,73.1875,1.710206,71.5625,71.71875,72.65625,74.84375,75.15625
A->D,5.0,75.276968,3.752901,69.970845,73.177843,76.38484,77.259475,79.591837
W->A,5.0,56.829452,1.623692,54.733283,55.897821,56.836965,57.738542,58.940646
W->D,5.0,76.96793,3.7336,73.760933,74.052478,75.510204,79.008746,82.507289
D->A,5.0,56.46882,1.33752,54.58302,55.597295,56.949662,57.513148,57.700977
D->W,5.0,74.8125,1.067772,73.90625,74.0625,74.53125,75.0,76.5625


In [60]:
# The above experiments used all data every batch. We saw this to be suboptimal in the logits experiments, so here is a sredo with bs=16

In [168]:
dage_aux_dense_16_bs_16_scores = get_score_combinations_new(
    method='dage',
    experiment_id='dage_aux_dense_low_bs',
    config_key='aux_dense_size',
    config_value=16,
)*100
dage_aux_dense_16_bs_16_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,76.46875,1.267456,75.0,75.3125,76.71875,77.5,77.8125
A->D,5.0,77.784257,2.889094,74.344023,76.38484,76.676385,80.174927,81.341108
W->A,5.0,57.145004,1.789289,54.244929,56.987228,57.287754,58.37716,58.827949
W->D,5.0,78.134111,3.220222,74.052478,76.96793,77.55102,79.300292,82.798834
D->A,5.0,57.129977,1.601376,54.770849,56.686702,56.987228,58.339594,58.865515
D->W,5.0,76.84375,1.186883,75.78125,75.9375,76.40625,77.5,78.59375


In [169]:
dage_aux_dense_31_bs_16_scores = get_score_combinations_new(
    method='dage',
    experiment_id='dage_aux_dense_low_bs',
    config_key='aux_dense_size',
    config_value=31,
)*100
dage_aux_dense_31_bs_16_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,76.84375,0.719429,75.9375,76.5625,76.5625,77.5,77.65625
A->D,5.0,78.600583,2.660899,75.801749,76.96793,78.425656,79.008746,82.798834
W->A,5.0,57.520661,1.15462,56.123216,56.423742,58.1142,58.37716,58.564989
W->D,5.0,79.183673,5.286543,72.594752,76.093294,78.134111,84.25656,84.83965
D->A,5.0,56.784373,1.150088,54.84598,56.686702,57.175056,57.513148,57.700977
D->W,5.0,77.90625,1.773283,75.625,77.1875,77.8125,78.4375,80.46875


In [170]:
dage_aux_dense_64_bs_16_scores = get_score_combinations_new(
    method='dage',
    experiment_id='dage_aux_dense_low_bs',
    config_key='aux_dense_size',
    config_value=64,
)*100
dage_aux_dense_64_bs_16_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,76.4375,2.527679,72.8125,74.84375,77.5,78.28125,78.75
A->D,5.0,78.425656,3.46804,75.218659,76.38484,76.676385,80.174927,83.673469
W->A,5.0,56.49136,1.429326,54.507889,55.484598,57.099925,57.588279,57.776108
W->D,5.0,79.766764,2.800959,76.96793,77.55102,79.591837,80.758017,83.965015
D->A,5.0,57.44553,0.446385,56.912096,57.137491,57.438017,57.700977,58.039068
D->W,5.0,77.625,1.69515,75.3125,76.875,77.65625,78.4375,79.84375


### DAGE with class-wise attention
Even loss weights (source and target entropy both weighted)
Varying loss alpha

In [171]:
dage_cw_attention_025_scores = get_score_combinations_new(
    method='dage_a',
    experiment_id='dage_attention_even',
    config_key='loss_alpha',
    config_value=0.25,
)*100
dage_cw_attention_025_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,77.125,0.40745,76.71875,76.71875,77.1875,77.34375,77.65625
A->D,5.0,77.434402,1.280809,76.093294,76.676385,76.96793,78.134111,79.300292
W->A,5.0,60.646131,1.845621,58.414726,59.203606,60.894065,61.795642,62.922615
W->D,5.0,92.769679,2.425263,88.921283,92.12828,93.294461,94.460641,95.043732
D->A,5.0,61.029301,1.486721,59.579264,59.992487,60.330579,62.396694,62.847483
D->W,5.0,91.71875,2.058116,88.75,91.40625,91.875,92.03125,94.53125


In [172]:
dage_cw_attention_050_scores = get_score_combinations_new(
    method='dage_a',
    experiment_id='dage_attention_even',
    config_key='loss_alpha',
    config_value=0.5,
)*100
dage_cw_attention_050_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,77.03125,1.668293,75.0,76.09375,76.5625,78.75,78.75
A->D,5.0,76.443149,3.25566,71.137026,75.801749,77.259475,79.008746,79.008746
W->A,5.0,61.592787,1.211633,60.180316,60.555973,61.682945,62.69722,62.847483
W->D,5.0,92.069971,1.492301,89.504373,92.12828,92.71137,92.71137,93.294461
D->A,5.0,60.443276,1.843977,58.151766,58.865515,61.044328,61.795642,62.359128
D->W,5.0,91.125,1.709492,89.21875,90.15625,90.9375,91.5625,93.75


In [173]:
dage_cw_attention_075_scores = get_score_combinations_new(
    method='dage_a',
    experiment_id='dage_attention_even',
    config_key='loss_alpha',
    config_value=0.75,
)*100
dage_cw_attention_075_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,78.53125,1.028168,77.65625,77.8125,78.125,78.90625,80.15625
A->D,5.0,76.96793,2.030377,73.760933,76.38484,77.55102,78.134111,79.008746
W->A,5.0,61.284748,1.651312,58.90308,60.518407,61.607814,62.1713,63.22314
W->D,5.0,91.195335,0.859935,90.379009,90.379009,91.253644,91.54519,92.419825
D->A,5.0,61.141998,1.892993,58.039068,61.006762,61.419985,62.283997,62.96018
D->W,5.0,90.71875,1.213329,89.6875,89.84375,90.3125,91.09375,92.65625


## Summary

In [174]:
def present(df, name):
    df = df.describe().T[['mean','std']]
    tot_mean = df.mean()[['mean']]
    
    toStr = lambda s: f'{s:.2f}'
    df = df['mean'].map(toStr) + ' ±' + df['std'].map(toStr)
    df = pd.concat([df,tot_mean[['mean']]])
    return pd.DataFrame(df, columns=[name]).T


def highlight_max(s):
    is_max = s == s.max()
    return ['font-weight: bold' if v else '' for v in is_max]

df_all_from_img_vgg16 = pd.concat([
    present(tune_source_no_aug_scores, 'FT (source only, w/o aug, VGG16)'),
    present(tune_source_scores, 'FT (source only, w. aug, VGG16)'),
    present(tune_both_scores, 'FT (source and target, w. aug, VGG16)'),
    present(multitask_scores, 'Multitask, w. aug, VGG16'),
    present(ccsa_scores, 'CCSA (even, w. aug, VGG16)'),
    present(ccsa_uneven_scores, 'CCSA (uneven, w. aug, VGG16)'),
    present(dsne_scores, 'd-SNE (even, w. aug, VGG16)'),
    present(dsne_uneven_scores, 'd-SNE (uneven, w. aug, VGG16)'),
#     present(dsne_large_uneven_scores, 'd-SNE (large, w. aug, uneven, VGG16)'),
#     present(dage_logits_bs_16_scores, 'DAGE (uneven, w/o. aug, VGG16, bs 16)'),
#     present(dage_logits_bs_full_scores, 'DAGE (uneven, w/o. aug, VGG16, bs full)'),
#     present(ccsa_resnet_uneven_scores, 'CCSA (uneven, ResNet101_v2)'),
#     present(dsne_resnet_uneven_scores, 'd-SNE (uneven, ResNet101_v2)'),
#     present(dage_resnet_uneven_scores, 'DAGE (uneven, ResNet101_v2)'),
])

df_all_from_feat_vgg16 = pd.concat([
    present(multitask_from_feat_scores, 'Multitask (w/o. aug, VGG16, bs 16)'),
    present(ccsa_from_feat_batch_size_16_uneven_scores, 'CCSA (uneven, w/o. aug, VGG16, bs 16)'),
    present(ccsa_from_feat_batch_size_32_uneven_scores, 'CCSA (uneven, w/o. aug, VGG16, bs 32)'),
    present(ccsa_from_feat_batch_size_64_uneven_scores, 'CCSA (uneven, w/o. aug, VGG16, bs 64)'),
    present(ccsa_from_feat_batch_size_128_uneven_scores, 'CCSA (uneven, w/o. aug, VGG16, bs 128)'),
    present(ccsa_from_feat_batch_size_256_uneven_scores, 'CCSA (uneven, w/o. aug, VGG16, bs 256)'),
    present(ccsa_from_feat_batch_size_4096_uneven_scores, 'CCSA (uneven, w/o. aug, VGG16, bs full)'),
    present(dsne_from_feat_batch_size_16_uneven_scores, 'd-SNE (uneven, w/o. aug, VGG16, bs 16)'),
    present(dage_aux_dense_16_bs_full_scores, 'DAGE (uneven, aux dense 16, w/o. aug, VGG16, bs full)'),
    present(dage_aux_dense_31_bs_full_scores, 'DAGE (uneven, aux dense 31, w/o. aug, VGG16, bs full)'),
    present(dage_aux_dense_64_bs_full_scores, 'DAGE (uneven, aux dense 64, w/o. aug, VGG16, bs full)'),
    present(dage_aux_dense_16_bs_16_scores, 'DAGE (uneven, aux dense 16, w/o. aug, VGG16, bs 16)'),
    present(dage_aux_dense_31_bs_16_scores, 'DAGE (uneven, aux dense 31, w/o. aug, VGG16, bs 16)'),
    present(dage_aux_dense_64_bs_16_scores, 'DAGE (uneven, aux dense 64, w/o. aug, VGG16, bs 16)'),
    present(dage_logits_bs_16_scores, 'DAGE (uneven, logits, w/o. aug, VGG16, bs 16)'),
    present(dage_logits_bs_64_scores, 'DAGE (uneven, logits, w/o. aug, VGG16, bs 64)'),
    present(dage_logits_bs_256_scores, 'DAGE (uneven, logits, w/o. aug, VGG16, bs 256)'),
    present(dage_logits_bs_1024_scores, 'DAGE (uneven, logits, w/o. aug, VGG16, bs 1024)'),
    present(dage_logits_bs_full_scores, 'DAGE (uneven, logits, w/o. aug, VGG16, bs full)'),
    present(dage_embeds_bs_8_scores, 'DAGE (uneven, embeds, full, w/o. aug, VGG16, bs 8)'),
    present(dage_embeds_bs_16_scores, 'DAGE (uneven, embeds, full, w/o. aug, VGG16, bs 16)'),
    present(dage_embeds_bs_32_scores, 'DAGE (uneven, embeds, full, w/o. aug, VGG16, bs 32)'),
    present(dage_embeds_bs_64_scores, 'DAGE (uneven, embeds, full, w/o. aug, VGG16, bs 64)'),
    present(dage_embeds_bs_128_scores, 'DAGE (uneven, embeds, full, w/o. aug, VGG16, bs 128)'),
    present(dage_embeds_bs_full_scores, 'DAGE (uneven, embeds, full, w/o. aug, VGG16, bs full)'),
    present(dage_embeds_full_across_025_scores, 'DAGE (uneven, embeds, full across, alpha=0.25 w/o. aug, VGG16, bs 16)'),
    present(dage_embeds_full_across_05_scores,  'DAGE (uneven, embeds, full across, alpha=0.50 w/o. aug, VGG16, bs 16)'),
    present(dage_embeds_full_across_075_scores, 'DAGE (uneven, embeds, full across, alpha=0.75 w/o. aug, VGG16, bs 16)'),
    present(dage_embeds_pair_across_025_scores, 'DAGE (uneven, embeds, pair across, alpha=0.25 w/o. aug, VGG16, bs 16)'),
    present(dage_embeds_pair_across_05_scores,  'DAGE (uneven, embeds, pair across, alpha=0.50 w/o. aug, VGG16, bs 16)'),
    present(dage_embeds_pair_across_075_scores, 'DAGE (uneven, embeds, pair across, alpha=0.75 w/o. aug, VGG16, bs 16)'),
    present(dage_embeds_across_knn_knn_1_scores, 'DAGE (uneven, embeds, full across, knn, knn, k=1, w/o. aug, VGG16, bs 16)'),
    present(dage_embeds_across_knn_knn_2_scores, 'DAGE (uneven, embeds, full across, knn, knn, k=2, w/o. aug, VGG16, bs 16)'),
    present(dage_embeds_across_kfn_knn_1_scores, 'DAGE (uneven, embeds, full across, kfn, knn, k=1, w/o. aug, VGG16, bs 16)'),
    present(dage_embeds_across_kfn_knn_2_scores, 'DAGE (uneven, embeds, full across, kfn, knn, k=2, w/o. aug, VGG16, bs 16)'),
    present(dage_cw_attention_025_scores, 'DAGE (even, embeds, full across, cw attention, alpha=0.25 w/o. aug, VGG16, bs 16)'),
    present(dage_cw_attention_050_scores, 'DAGE (even, embeds, full across, cw attention, alpha=0.5 w/o. aug, VGG16, bs 16)'),
    present(dage_cw_attention_075_scores, 'DAGE (even, embeds, full across, cw attention, alpha=0.75 w/o. aug, VGG16, bs 16)'),
])

df_all_from_img_renset101 = pd.concat([
    present(ccsa_resnet_uneven_scores, 'CCSA (uneven, ResNet101_v2)'),
    present(dsne_resnet_uneven_scores, 'd-SNE (uneven, ResNet101_v2)'),
    present(dage_resnet_uneven_scores, 'DAGE (uneven, ResNet101_v2)'),
])

In [175]:
df_all_from_img_vgg16.style.apply(highlight_max)

Unnamed: 0,A->W,A->D,W->A,W->D,D->A,D->W,mean
"FT (source only, w/o aug, VGG16)",52.66 ±1.25,56.96 ±3.21,42.34 ±1.47,98.57 ±0.57,36.84 ±2.34,91.03 ±2.01,63.0683
"FT (source only, w. aug, VGG16)",55.34 ±1.86,59.11 ±1.69,40.91 ±2.01,98.75 ±0.68,40.82 ±1.71,93.62 ±1.20,64.7596
"FT (source and target, w. aug, VGG16)",60.06 ±3.52,65.54 ±3.10,51.44 ±1.76,99.23 ±0.58,52.77 ±2.71,94.56 ±2.71,70.5994
"Multitask, w. aug, VGG16",81.69 ±1.94,80.23 ±3.69,62.91 ±1.67,96.33 ±1.91,62.15 ±1.91,93.38 ±1.06,79.4464
"CCSA (even, w. aug, VGG16)",82.50 ±2.72,82.27 ±3.61,59.94 ±2.55,93.24 ±2.52,60.11 ±1.92,92.44 ±2.33,78.4167
"CCSA (uneven, w. aug, VGG16)",83.47 ±1.46,82.92 ±2.25,60.92 ±3.09,93.94 ±1.65,60.83 ±2.82,92.53 ±2.62,79.1003
"d-SNE (even, w. aug, VGG16)",82.25 ±3.34,81.57 ±1.56,61.31 ±1.94,93.82 ±1.46,61.13 ±1.85,93.16 ±1.26,78.8736
"d-SNE (uneven, w. aug, VGG16)",79.88 ±3.71,80.41 ±3.78,61.11 ±0.95,88.28 ±3.89,59.80 ±0.67,90.28 ±1.56,76.6268


In [176]:
df_all_from_feat_vgg16.style.apply(highlight_max)

Unnamed: 0,A->W,A->D,W->A,W->D,D->A,D->W,mean
"Multitask (w/o. aug, VGG16, bs 16)",77.34 ±1.61,77.20 ±2.07,61.60 ±1.39,93.00 ±2.51,62.23 ±0.68,91.34 ±0.83,77.1205
"CCSA (uneven, w/o. aug, VGG16, bs 16)",79.25 ±1.81,78.48 ±1.64,59.56 ±0.79,90.03 ±3.43,58.77 ±2.07,90.53 ±0.92,76.1032
"CCSA (uneven, w/o. aug, VGG16, bs 32)",79.31 ±1.59,78.08 ±2.49,59.47 ±1.36,90.15 ±2.16,59.12 ±1.78,90.62 ±1.08,76.1257
"CCSA (uneven, w/o. aug, VGG16, bs 64)",79.71 ±2.31,79.14 ±3.45,61.36 ±1.11,88.77 ±2.19,62.50 ±1.69,90.69 ±2.12,77.0291
"CCSA (uneven, w/o. aug, VGG16, bs 128)",78.00 ±1.61,76.15 ±3.36,57.98 ±2.10,87.70 ±2.21,57.93 ±1.21,88.16 ±2.79,74.3183
"CCSA (uneven, w/o. aug, VGG16, bs 256)",77.00 ±2.09,76.38 ±1.68,53.62 ±3.49,86.21 ±4.70,55.54 ±2.57,84.80 ±3.44,72.259
"CCSA (uneven, w/o. aug, VGG16, bs full)",72.22 ±3.71,66.59 ±5.84,49.92 ±3.04,77.49 ±5.19,48.38 ±4.85,80.16 ±5.31,65.7931
"d-SNE (uneven, w/o. aug, VGG16, bs 16)",77.53 ±1.10,77.75 ±2.51,58.53 ±0.93,89.04 ±3.73,58.31 ±1.26,84.94 ±1.17,74.3494
"DAGE (uneven, aux dense 16, w/o. aug, VGG16, bs full)",74.53 ±0.87,74.34 ±1.65,57.45 ±1.09,75.22 ±2.31,57.76 ±1.80,74.64 ±1.30,68.9906
"DAGE (uneven, aux dense 31, w/o. aug, VGG16, bs full)",74.59 ±1.43,75.57 ±3.84,54.74 ±5.06,75.16 ±5.61,56.67 ±1.75,73.56 ±3.27,68.3829


In [177]:
df_all_from_img_renset101.style.apply(highlight_max)

Unnamed: 0,A->W,A->D,W->A,W->D,D->A,D->W,mean
"CCSA (uneven, ResNet101_v2)",86.69 ±1.84,84.99 ±1.70,66.47 ±2.01,93.35 ±2.06,63.48 ±4.04,92.19 ±1.18,81.1936
"d-SNE (uneven, ResNet101_v2)",84.03 ±1.82,85.07 ±1.69,64.80 ±2.71,91.02 ±1.43,64.86 ±1.71,89.41 ±1.06,79.8655
"DAGE (uneven, ResNet101_v2)",86.53 ±2.47,87.93 ±1.95,63.08 ±3.67,89.21 ±3.02,62.19 ±3.74,87.16 ±1.99,79.3508


# Utils

In [None]:
# restructure former runs
def reorganize(from_date, to_date, suffix, experiment_id='your_id', description='your_description_here'):

    RUNS_DIR_OLD = RUNS_DIR / '..' / 'runs_old'

    for item in RUNS_DIR_OLD.glob('*'):
        parts = item.name.split('_')
        if len(parts)<3:
            continue
        timestamp = parts[0]
        src = parts[1]
        tgt = parts[2]
        method = '_'.join(parts[3:])
        
        if (item.is_dir() 
            and item.name.endswith(suffix)
            and int(item.name.split('_')[0]) >= int(from_date)
            and int(item.name.split('_')[0]) <= int(to_date)):

            seed = load_json(item / 'config.json')['seed']

            new_method_dir = RUNS_DIR / method / experiment_id 
            new_run_dir = new_method_dir / '{}{}_{}_{}'.format(src,tgt,seed,timestamp)
            
            try:
                shutil.copytree(item, new_run_dir)
            except:
                pass
            
            new_run_dir.mkdir(parents=True, exist_ok=True)

            with open(new_method_dir / 'description.txt', 'a') as f:
                f.write(description)


In [None]:
reorganize(
    from_date='20191014123846',
    to_date='20191014162536',
    suffix='tune_source',
    experiment_id='tune_source_no_aug',
    description='In this experiment, we tune a VGG16-network pretrained on ImageNet with all available source data. The target data is used for validation (during training) and test. No augmentation was used'
)

reorganize(
    from_date='20191022103424',
    to_date='20191022142437',
    suffix='tune_source',
    experiment_id='tune_source_with_aug',
    description='In this experiment, we tune a VGG16-network pretrained on ImageNet with all available source data. The target data is used for validation (during training) and test. Augmentation was used'
)

reorganize(
    from_date='20191023072318',
    to_date='20191023123426',
    suffix='tune_target',
    experiment_id='tune_target_with_aug',
    description='Fine tune target from ImageNet model that was already fine-tuned on source. Augmentation was used. No batchnorm was used.'
)

reorganize(
    from_date='20191025141713',
    to_date='20191025170306',
    suffix='ccsa',
    experiment_id='ccsa_without_batchnorn',
    description='CCSA method without batchnorm. In this experiment, we saw stability issues for loss_alpha over 0.01.'
)

reorganize(
    from_date='20191101173414',
    to_date='20191101191220',
    suffix='ccsa',
    experiment_id='ccsa_with_batchnorn_alpha_0',
    description='CCSA method with batchnorm and loss_alpha=0. This amounts to a multi-task learning setup.'
)

reorganize(
    from_date='20191101150707',
    to_date='20191101171905',
    suffix='ccsa',
    experiment_id='ccsa_with_batchnorn_alpha_0.25',
    description='CCSA method with batchnorm and loss_alpha=0.25.'
)

reorganize(
    from_date='20191104152050',
    to_date='20191104185818',
    suffix='ccsa',
    experiment_id='ccsa_uneven',
    description='CCSA method with batchnorm and loss_alpha=0.25. Here, we only weight the target softmax (source softmax not used).'
)

reorganize(
    from_date='20191106083058',
    to_date='20191106144631',
    suffix='ccsa',
    experiment_id='ccsa_resnet_uneven',
    description='CCSA method with batchnorm and loss_alpha=0.25. Here, we only weight the target softmax (source softmax not used). ResNet features where used.'
)

reorganize(
    from_date='20191104094606',
    to_date='20191104124943',
    suffix='dsne',
    experiment_id='dsne_even',
    description='DSNE method using both source and target softmax loss'
)

reorganize(
    from_date='20191104152038',
    to_date='20191104182550',
    suffix='dsne',
    experiment_id='dsne_uneven',
    description='DSNE method using only target softmax loss'
)

reorganize(
    from_date='20191105120356',
    to_date='20191105154214',
    suffix='dsne',
    experiment_id='dsne_uneven_large',
    description='DSNE method using only target softmax loss. Larger dimensionality was used for dense and embedding layers'
)

reorganize(
    from_date='20191107085855',
    to_date='20191107131237',
    suffix='dsne',
    experiment_id='dsne_uneven_resnet',
    description='DSNE method using only target softmax loss. ResNet features where used.'
)

reorganize(
    from_date='20191113093959',
    to_date='20191113143542',
    suffix='homebrew',
    experiment_id='dage_uneven_resnet',
    description='DAGE method using only target softmax loss. ResNet features where used.'
)