# Test evaluations

In [2]:
from os.path import realpath
from pathlib import Path
import pandas as pd
import itertools

RUNS_DIR = Path(realpath('.')) / 'runs'
if not 'domain-adaptation/runs' in str(RUNS_DIR):
    RUNS_DIR = Path(realpath('.')).parent / 'runs'
assert('domain-adaptation/runs' in str(RUNS_DIR))

In [3]:
def load_cls_rep_paths(
    suffix:str, 
    runs_dir:Path=RUNS_DIR, 
    from_date:str='19700101000000', 
    to_date:str='30001010000000'
):
    assert(len(from_date)==14 and len(to_date)==14)
    return sorted([
        item / 'report.json' 
        for item in runs_dir.glob('*') 
        if item.is_dir() 
        and item.name.endswith(suffix)
        and int(item.name.split('_')[0]) >= int(from_date)
        and int(item.name.split('_')[0]) <= int(to_date)
    ])

def get_score(
    suffix:str, 
    runs_dir:Path=RUNS_DIR, 
    metric:str='precision',
    avg_types=['macro avg', 'weighted avg'],
    from_date:str='19700101000000', 
    to_date:str='30001010000000',
    map_col_name= lambda n: n
):
    report_paths = load_cls_rep_paths(suffix, RUNS_DIR, from_date, to_date)
    reports = [ pd.read_json(p) for p in report_paths ]
    score = pd.DataFrame(
        [[r[avgt][metric] for avgt in avg_types]
         for r in reports
        ],
        columns = [ map_col_name(avgt) for avgt in avg_types]
    ) 
    return score

def get_score_combinations(
    suffix:str, 
    domains=['A','W','D'], 
    runs_dir:Path=RUNS_DIR, 
    metric:str='precision',
    avg_types=['macro avg', 'weighted avg'],
    from_date:str='19700101000000', 
    to_date:str='30001010000000',
):
    combos = [c for c in itertools.product(domains, repeat=2) if c[0] != c[1]]
    scores = [
        get_score(
            suffix='{}_{}_{}'.format(c[0],c[1],suffix),
            runs_dir=runs_dir,
            metric=metric,
            avg_types=avg_types,
            from_date=from_date,
            to_date=to_date,
            map_col_name=lambda n: '{}->{}'.format(c[0],c[1]),
        )
        for c in combos
    ]
    return pd.concat(scores, sort=False)
    

# get_score(suffix='A_D_tune_source', avg_types=['macro avg']).describe()

# reports = [ pd.read_json(p) for p in load_cls_rep_paths(suffix='A_D_tune_source') ]
# reports[0]

## Tune source only
In this experiment, we tune a VGG16-network pretrained on ImageNet with all available source data.
The target data is used for validation (during training) and test.

In [4]:
# without augmentation
tune_source_no_aug_scores = get_score_combinations(
    suffix='tune_source', 
    domains=['A','W','D'], 
    avg_types=['macro avg'],
    from_date='20191014123846',
    to_date='20191014162536'
)*100
tune_source_no_aug_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,58.647708,1.675882,56.614427,57.265038,59.272086,59.371915,60.715072
A->D,5.0,66.119209,1.978536,63.9226,64.272422,66.324489,67.951688,68.124848
W->A,5.0,45.655557,1.135846,44.720362,45.216344,45.321017,45.387328,47.632732
W->D,5.0,98.714765,0.487872,98.14102,98.279352,98.85421,99.01609,99.283154
D->A,5.0,45.956431,2.777532,42.368585,44.673151,45.209599,48.745268,48.785551
D->W,5.0,91.561151,1.374618,89.826742,90.537983,91.810545,92.430816,93.199668


In [5]:
# with augmentation
tune_source_scores = get_score_combinations(
    suffix='tune_source', 
    domains=['A','W','D'], 
    avg_types=['macro avg'],
    from_date='20191022103424',
    to_date='20191022142437'
)*100
tune_source_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,59.522947,1.309548,57.809506,58.72371,59.741903,60.119134,61.220482
A->D,5.0,67.163203,1.622441,64.650273,66.852105,67.490647,67.742923,69.080065
W->A,5.0,45.297358,2.32515,42.970616,43.627089,44.308746,47.700264,47.880078
W->D,5.0,98.657229,0.771405,97.900666,98.197343,98.347653,99.010264,99.830221
D->A,5.0,46.943538,1.683987,44.351185,46.729477,46.780614,48.161656,48.694758
D->W,5.0,94.329697,1.313532,92.649473,93.192383,94.96945,95.372884,95.464293


In [6]:
(tune_source_scores - tune_source_no_aug_scores).describe().T[['mean','std']]

Unnamed: 0,mean,std
A->W,0.875239,1.68049
A->D,1.043994,1.164266
W->A,-0.358198,2.102998
W->D,-0.057536,0.547688
D->A,0.987107,3.48028
D->W,2.768546,1.952751


## Tune source and target

In [7]:
tune_both_scores = get_score_combinations(
    suffix='tune_target', 
    domains=['A','W','D'], 
    avg_types=['macro avg'],
    from_date='20191023072318',
    to_date='20191023123426'
)*100
tune_both_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,61.447768,1.87253,58.935474,61.053916,61.301769,61.773919,64.173762
A->D,5.0,71.565021,2.735697,67.044792,71.531295,71.779077,73.594656,73.875283
W->A,5.0,52.377981,1.508269,50.675616,51.028661,52.672868,53.25945,54.253307
W->D,5.0,99.169865,0.620451,98.407462,98.785086,99.139785,99.516992,100.0
D->A,5.0,52.608226,2.532902,48.726655,52.347771,52.48521,53.898876,55.58262
D->W,5.0,95.052622,2.355281,90.955851,95.494887,95.627899,96.329072,96.855399


In [8]:
(tune_both_scores - tune_source_scores).describe().T[['mean','std']]

Unnamed: 0,mean,std
A->W,1.924821,2.739708
A->D,4.401818,3.82653
W->A,7.080622,3.180762
W->D,0.512635,0.480969
D->A,5.664688,3.961753
D->W,0.722925,3.014821


## CCSA

__Run 1__: The were some severe stability issues when introducting the CSA loss (alpha 0.25 as done by original author).
Employed parameters:
- alpha=0.01 
- freeze_base=true

In [9]:
tune_ccsa_scores = get_score_combinations(
    suffix='ccsa', 
    domains=['A','W','D'], 
    avg_types=['macro avg'],
    from_date='20191025141713',
    to_date='20191025170306'
)*100
tune_ccsa_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,78.798048,2.45935,75.701057,76.806076,79.394747,80.737548,81.350812
A->D,5.0,79.63265,3.983204,75.801623,76.008044,79.133937,82.237858,84.98179
W->A,5.0,61.247778,2.383082,59.214011,59.410071,60.22722,62.646077,64.74151
W->D,5.0,93.912782,2.337585,90.781195,93.178093,93.841792,94.511284,97.251548
D->A,5.0,61.581135,1.017806,59.997092,61.487433,61.752556,61.853747,62.814847
D->W,5.0,93.393685,1.891096,91.173019,91.928614,93.604523,94.374407,95.887861


In [10]:
(tune_ccsa_scores - tune_both_scores).describe().T[['mean','std']]

Unnamed: 0,mean,std
A->W,17.35028,3.81661
A->D,8.06763,3.528893
W->A,8.869797,2.056354
W->D,-5.257083,2.004682
D->A,8.972909,3.050441
D->W,-1.658937,2.960813


We observe a large performance increment for the domain adaptations where domains are far apart. However, for similar domains (W,D)

__Run 2__: Using batch-norm greatly increased stability of the method. The choice of alpha is still unclear, though. We'll try out a couple of cofigurations for a single adaptation

In [11]:
ccsa_scores_alpha0 = get_score_combinations(
    suffix='ccsa', 
    domains=['A','D'], 
    avg_types=['macro avg'],
    from_date='20191101122726',
    to_date='20191101123533'
)*100
ccsa_scores_alpha0.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->D,3.0,80.099791,5.141211,75.002485,77.507782,80.013079,82.648445,85.283811


In [12]:
ccsa_scores_alpha025 = get_score_combinations(
    suffix='ccsa', 
    domains=['A','D'], 
    avg_types=['macro avg'],
    from_date='20191101123927',
    to_date='20191101125222'
)*100
ccsa_scores_alpha025.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->D,3.0,81.135601,2.974216,77.728031,80.098431,82.468832,82.839386,83.20994


In [13]:
ccsa_scores_alpha05 = get_score_combinations(
    suffix='ccsa', 
    domains=['A','D'], 
    avg_types=['macro avg'],
    from_date='20191101130046',
    to_date='20191101132008'
)*100
ccsa_scores_alpha05.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->D,3.0,80.2884,2.492062,77.430716,79.427688,81.424659,81.717242,82.009825


__Run 3__ multi-task learning (ccsa code with alpha=0)

In [14]:
multitask_scores = get_score_combinations(
    suffix='ccsa', 
    domains=['A','W','D'], 
    avg_types=['macro avg'],
    from_date='20191101173414',
    to_date='20191101191220'
)*100
multitask_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,82.786633,2.176073,79.688982,81.647784,83.031861,84.673918,84.890622
A->D,5.0,80.116118,3.575463,75.444807,78.19922,80.351172,81.64899,84.936403
W->A,5.0,63.620193,1.667376,61.608446,62.583484,63.262434,65.136835,65.509767
W->D,5.0,96.022428,1.805967,93.657375,95.365056,95.873795,96.615347,98.600569
D->A,5.0,63.539013,2.036433,60.171669,63.210951,64.276065,64.684695,65.351686
D->W,5.0,93.962518,0.803252,93.022606,93.299236,94.201545,94.277264,95.011937


__Run 4__ ccsa with alpha = 0.25

In [15]:
ccsa_scores = get_score_combinations(
    suffix='ccsa', 
    domains=['A','W','D'], 
    avg_types=['macro avg'],
    from_date='20191101150707',
    to_date='20191101171905'
)*100
ccsa_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,84.123137,1.925269,81.226364,83.079057,85.104744,85.563238,85.642282
A->D,5.0,82.13117,3.735848,76.218028,80.769827,83.651851,84.741472,85.27467
W->A,5.0,61.107807,2.253307,58.095626,60.495632,60.917527,61.682899,64.347349
W->D,5.0,92.810065,2.506197,88.809418,92.114287,93.452229,94.792916,94.881476
D->A,5.0,62.383859,2.733972,58.223974,61.187359,63.625331,63.681493,65.201136
D->W,5.0,93.71217,1.357313,91.648125,93.074904,94.255764,94.591728,94.990329


In [16]:
(ccsa_scores - multitask_scores).describe().T[['mean','std']]

Unnamed: 0,mean,std
A->W,1.336504,0.540338
A->D,2.015051,3.230577
W->A,-2.512387,3.196939
W->D,-3.212363,2.401068
D->A,-1.155155,1.416689
D->W,-0.250348,1.25738


In [17]:
ccsa_uneven_scores = get_score_combinations(
    suffix='ccsa', 
    domains=['A','W','D'], 
    avg_types=['macro avg'],
    from_date='20191104152050',
    to_date='20191104185818'
)*100
ccsa_uneven_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,84.707488,1.425911,82.84051,83.521377,85.504465,85.662013,86.009077
A->D,5.0,82.596073,2.859945,78.051521,82.07346,82.891426,84.582776,85.381183
W->A,5.0,62.244928,3.06593,58.439242,59.682659,63.575644,63.740426,65.786667
W->D,5.0,93.793427,2.248753,90.650457,92.540323,94.144556,95.328581,96.303216
D->A,5.0,61.599173,3.129505,59.65982,59.679137,60.013532,61.639419,67.003957
D->W,5.0,93.238799,2.460498,90.460994,91.45336,92.842509,95.064086,96.373045


In [18]:
(ccsa_uneven_scores - ccsa_scores).describe().T[['mean','std']]

Unnamed: 0,mean,std
A->W,0.584351,0.990042
A->D,0.464904,2.139264
W->A,1.137121,2.793617
W->D,0.983361,1.868095
D->A,-0.784686,2.81392
D->W,-0.473371,1.46145


In [19]:
ccsa_resnet_uneven_scores = get_score_combinations(
    suffix='ccsa', 
    domains=['A','W','D'], 
    avg_types=['macro avg'],
    from_date='20191106083058',
    to_date='20191106144631'
)*100
ccsa_resnet_uneven_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,88.515942,1.163446,87.404071,87.423183,88.734515,88.814687,90.203254
A->D,6.0,87.694157,2.361554,84.903802,85.647832,87.861207,89.694434,90.323178
W->A,5.0,68.780836,1.997994,66.350606,67.673645,68.219386,70.584884,71.075659
W->D,5.0,94.85447,1.429756,92.442928,94.860859,95.217836,95.606338,96.144388
D->A,6.0,67.163408,3.654978,61.017038,65.736368,67.754554,69.889443,70.789592
D->W,5.0,93.637002,0.672926,93.153521,93.254722,93.310533,93.678979,94.787255


In [20]:
(ccsa_resnet_uneven_scores - ccsa_uneven_scores).describe().T[['mean','std']]

Unnamed: 0,mean,std
A->W,3.808454,0.858042
A->D,4.57228,3.420832
W->A,6.535909,1.967194
W->D,1.061043,1.537587
D->A,4.962897,6.470839
D->W,0.398203,2.228064


## d-SNE

In [21]:
dsne_scores = get_score_combinations(
    suffix='dsne', 
    domains=['A','W','D'], 
    avg_types=['macro avg'],
    from_date='20191104094606',
    to_date='20191104124943'
)*100
dsne_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,83.59965,2.934409,78.691244,83.156612,84.848799,85.351201,85.950393
A->D,5.0,81.461538,1.801268,79.23711,79.839005,82.196868,82.928646,83.106061
W->A,5.0,62.476751,2.373655,58.803858,62.203546,62.429067,63.768399,65.178888
W->D,5.0,93.674039,2.514686,89.425243,93.501042,94.642239,95.006289,95.795379
D->A,5.0,63.597292,1.222746,61.85374,63.142117,63.479165,64.622647,64.88879
D->W,5.0,93.726503,1.360903,92.120308,92.483518,94.203878,94.564026,95.260785


In [22]:
(dsne_scores - ccsa_scores).describe().T[['mean','std']]

Unnamed: 0,mean,std
A->W,-0.523487,1.19674
A->D,-0.669632,2.424563
W->A,1.368945,3.476487
W->D,0.863973,1.527852
D->A,1.213433,2.917222
D->W,0.014333,0.74836


In [23]:
dsne_uneven_scores = get_score_combinations(
    suffix='dsne', 
    domains=['A','W','D'], 
    avg_types=['macro avg'],
    from_date='20191104152038',
    to_date='20191104182550'
)*100
dsne_uneven_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,81.331476,2.280087,77.81342,80.707778,81.545574,82.927621,83.662988
A->D,5.0,82.184021,4.724724,75.314405,79.355833,84.275841,85.354313,86.619711
W->A,5.0,63.370318,1.643826,61.438764,62.224592,63.638733,63.83983,65.709672
W->D,5.0,87.876722,4.714901,81.59864,85.661237,88.425745,89.325191,94.372796
D->A,5.0,62.785679,1.008744,61.346362,62.375043,63.042269,63.084983,64.079737
D->W,5.0,91.057538,1.481214,88.950284,90.322571,91.276593,91.977455,92.760784


In [24]:
(dsne_uneven_scores - dsne_scores).describe().T[['mean','std']]

Unnamed: 0,mean,std
A->W,-2.268174,1.301751
A->D,0.722483,3.179148
W->A,0.893567,2.387081
W->D,-5.797317,3.108359
D->A,-0.811613,1.262002
D->W,-2.668965,1.594781


In [25]:
dsne_large_uneven_scores = get_score_combinations(
    suffix='dsne', 
    domains=['A','W','D'], 
    avg_types=['macro avg'],
    from_date='20191105120356',
    to_date='20191105154214'
)*100
dsne_large_uneven_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,82.006544,1.746592,80.447386,81.103619,81.287498,82.297248,84.896968
A->D,5.0,80.857453,3.553114,75.810612,80.170605,80.395155,82.351191,85.559702
W->A,5.0,64.476773,1.09209,63.534057,63.730474,64.034078,64.888069,66.197187
W->D,5.0,89.686751,3.566893,85.466601,88.014192,88.122183,93.292066,93.538713
D->A,5.0,63.997957,1.287688,62.607819,63.108509,64.067159,64.248378,65.957922
D->W,5.0,90.099385,2.351134,86.595243,89.745318,89.788602,91.505607,92.862155


In [26]:
(dsne_large_uneven_scores - dsne_uneven_scores).describe().T[['mean','std']]

Unnamed: 0,mean,std
A->W,0.675068,3.384591
A->D,-1.326568,2.461007
W->A,1.106455,1.617274
W->D,1.810029,2.271587
D->A,1.212278,1.573261
D->W,-0.958153,2.175731


In [27]:
dsne_resnet_uneven_scores = get_score_combinations(
    suffix='dsne', 
    domains=['A','W','D'], 
    avg_types=['macro avg'],
    from_date='20191107085855',
    to_date='20191107131237'
)*100
dsne_resnet_uneven_scores.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,87.113041,2.059469,84.446705,86.025125,86.758302,88.938416,89.396658
A->D,5.0,87.076223,2.464536,83.374793,85.919893,87.734352,89.119094,89.232982
W->A,5.0,68.358893,2.172218,66.098332,67.311365,67.603428,69.00508,71.776258
W->D,5.0,91.403295,1.552391,88.924168,91.046221,91.694993,92.527626,92.823467
D->A,5.0,69.11144,2.098614,65.398859,69.586397,69.975771,70.148826,70.447345
D->W,5.0,90.969955,0.464405,90.392922,90.578467,91.097682,91.332567,91.448135


## Homebrew

In [33]:
dage_resnet_uneven_scores = get_score_combinations(
    suffix='homebrew', 
    domains=['A','W','D'], 
    avg_types=['macro avg'],
    from_date='20191113093959',
    to_date='20191113143542'
)*100
dage_resnet_uneven_scores.describe().T



Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A->W,5.0,89.288269,1.675657,86.808312,88.916546,89.364825,89.938798,91.412862
A->D,5.0,88.8645,2.584009,85.228901,87.131043,89.950014,90.748162,91.264381
W->A,5.0,65.502206,2.647026,62.979116,63.671843,64.131791,68.203819,68.524459
W->D,5.0,90.300239,3.579583,86.463322,87.055836,90.563158,92.549446,94.869432
D->A,5.0,65.268226,1.128416,63.829532,64.651385,65.056321,66.284887,66.519007
D->W,5.0,89.287878,1.89665,86.551691,88.577565,89.257746,90.6058,91.44659


## Summary

In [31]:
def present(df, name):
    df = df.describe().T[['mean','std']]
    tot_mean = df.mean()[['mean']]
    
    toStr = lambda s: f'{s:.2f}'
    df = df['mean'].map(toStr) + ' ±' + df['std'].map(toStr)
    df = pd.concat([df,tot_mean[['mean']]])
    return pd.DataFrame(df, columns=[name]).T


def highlight_max(s):
    is_max = s == s.max()
    return ['font-weight: bold' if v else '' for v in is_max]

df_all = pd.concat([
    present(tune_source_no_aug_scores, 'FT (source only, w/o aug, VGG16)'),
    present(tune_source_scores, 'FT (source only, w. aug, VGG16)'),
    present(tune_both_scores, 'FT (source and target, VGG16)'),
    present(multitask_scores, 'Multitask, VGG16'),
    present(ccsa_scores, 'CCSA (even, VGG16)'),
    present(ccsa_uneven_scores, 'CCSA (uneven, VGG16)'),
    present(dsne_scores, 'd-SNE (even, VGG16)'),
    present(dsne_uneven_scores, 'd-SNE (uneven, VGG16)'),
    present(dsne_large_uneven_scores, 'd-SNE (large, uneven, VGG16)'),
    present(ccsa_resnet_uneven_scores, 'CCSA (uneven, ResNet101_v2)'),
    present(dsne_resnet_uneven_scores, 'd-SNE (uneven, ResNet101_v2)'),
    present(dage_resnet_uneven_scores, 'DAGE (uneven, ResNet101_v2)'),
])

df_all.style.apply(highlight_max)

Unnamed: 0,A->W,A->D,W->A,W->D,D->A,D->W,mean
"FT (source only, w/o aug, VGG16)",58.65 ±1.68,66.12 ±1.98,45.66 ±1.14,98.71 ±0.49,45.96 ±2.78,91.56 ±1.37,67.7758
"FT (source only, w. aug, VGG16)",59.52 ±1.31,67.16 ±1.62,45.30 ±2.33,98.66 ±0.77,46.94 ±1.68,94.33 ±1.31,68.6523
"FT (source and target, VGG16)",61.45 ±1.87,71.57 ±2.74,52.38 ±1.51,99.17 ±0.62,52.61 ±2.53,95.05 ±2.36,72.0369
"Multitask, VGG16",82.79 ±2.18,80.12 ±3.58,63.62 ±1.67,96.02 ±1.81,63.54 ±2.04,93.96 ±0.80,80.0078
"CCSA (even, VGG16)",84.12 ±1.93,82.13 ±3.74,61.11 ±2.25,92.81 ±2.51,62.38 ±2.73,93.71 ±1.36,79.378
"CCSA (uneven, VGG16)",84.71 ±1.43,82.60 ±2.86,62.24 ±3.07,93.79 ±2.25,61.60 ±3.13,93.24 ±2.46,79.6966
"d-SNE (even, VGG16)",83.60 ±2.93,81.46 ±1.80,62.48 ±2.37,93.67 ±2.51,63.60 ±1.22,93.73 ±1.36,79.756
"d-SNE (uneven, VGG16)",81.33 ±2.28,82.18 ±4.72,63.37 ±1.64,87.88 ±4.71,62.79 ±1.01,91.06 ±1.48,78.101
"d-SNE (large, uneven, VGG16)",82.01 ±1.75,80.86 ±3.55,64.48 ±1.09,89.69 ±3.57,64.00 ±1.29,90.10 ±2.35,78.5208
"CCSA (uneven, ResNet101_v2)",88.52 ±1.16,87.69 ±2.36,68.78 ±2.00,94.85 ±1.43,67.16 ±3.65,93.64 ±0.67,83.441


# Utils

In [41]:
[ str(p) for p in
    load_cls_rep_paths(
        suffix='tune_target',
        from_date='20191023072318',
        to_date='20191023120138'
    )
]

['/Users/lh/Projects/domain-adaptation/runs/20191023072318_A_D_tune_target/report.json',
 '/Users/lh/Projects/domain-adaptation/runs/20191023074355_D_W_tune_target/report.json',
 '/Users/lh/Projects/domain-adaptation/runs/20191023075250_W_A_tune_target/report.json',
 '/Users/lh/Projects/domain-adaptation/runs/20191023075252_W_D_tune_target/report.json',
 '/Users/lh/Projects/domain-adaptation/runs/20191023075407_A_D_tune_target/report.json',
 '/Users/lh/Projects/domain-adaptation/runs/20191023080731_A_W_tune_target/report.json',
 '/Users/lh/Projects/domain-adaptation/runs/20191023082327_D_A_tune_target/report.json',
 '/Users/lh/Projects/domain-adaptation/runs/20191023082714_D_W_tune_target/report.json',
 '/Users/lh/Projects/domain-adaptation/runs/20191023091519_A_W_tune_target/report.json',
 '/Users/lh/Projects/domain-adaptation/runs/20191023092508_D_A_tune_target/report.json',
 '/Users/lh/Projects/domain-adaptation/runs/20191023093053_D_W_tune_target/report.json',
 '/Users/lh/Projects/