# Test evaluations

In [112]:
from os.path import realpath
from pathlib import Path
import pandas as pd
import itertools
RUNS_DIR = Path(realpath('.')).parent / 'runs'

In [144]:
def load_cls_rep_paths(
    suffix:str, 
    runs_dir:Path=RUNS_DIR, 
    from_date:str='19700101000000', 
    to_date:str='30001010000000'
):
    assert(len(from_date)==14 and len(to_date)==14)
    return sorted([
        item / 'report.json' 
        for item in runs_dir.glob('*') 
        if item.is_dir() 
        and item.name.endswith(suffix)
        and int(item.name.split('_')[0]) > int(from_date)
        and int(item.name.split('_')[0]) < int(to_date)
    ])

def get_score(
    suffix:str, 
    runs_dir:Path=RUNS_DIR, 
    metric:str='precision',
    avg_types=['macro avg', 'weighted avg'],
    from_date:str='19700101000000', 
    to_date:str='30001010000000',
    map_col_name= lambda n: n
):
    report_paths = load_cls_rep_paths(suffix, RUNS_DIR, from_date, to_date)
    reports = [ pd.read_json(p) for p in report_paths ]
    score = pd.DataFrame(
        [[r[avgt][metric] for avgt in avg_types]
         for r in reports
        ],
        columns = [ map_col_name(avgt) for avgt in avg_types]
    ) 
    return score

def get_score_combinations(
    suffix:str, 
    domains=['A','W','D'], 
    runs_dir:Path=RUNS_DIR, 
    metric:str='precision',
    avg_types=['macro avg', 'weighted avg'],
    from_date:str='19700101000000', 
    to_date:str='30001010000000',
):
    combos = [c for c in itertools.product(domains, repeat=2) if c[0] != c[1]]
    scores = [
        get_score(
            suffix='{}_{}_{}'.format(c[0],c[1],suffix),
            runs_dir=runs_dir,
            metric=metric,
            avg_types=avg_types,
            from_date=from_date,
            to_date=to_date,
            map_col_name=lambda n: '{}->{} {}'.format(c[0],c[1], n),
        )
        for c in combos
    ]
    return pd.concat(scores, sort=False)
    

# get_score(suffix='A_D_tune_source', avg_types=['macro avg']).describe()

# reports = [ pd.read_json(p) for p in load_cls_rep_paths(suffix='A_D_tune_source') ]
# reports[0]

## Tune source only
In this experiment, we tune a VGG16-network pretrained on ImageNet with all available source data.
The target data is used for validation (during training) and test.

In [151]:
get_score_combinations(
    suffix='tune_source', 
    domains=['A','W','D'], 
    avg_types=['macro avg']
).describe()

Unnamed: 0,A->D macro avg,D->A macro avg
count,2.0,1.0
mean,0.008309,0.008309
std,0.0,
min,0.008309,0.008309
25%,0.008309,0.008309
50%,0.008309,0.008309
75%,0.008309,0.008309
max,0.008309,0.008309


## Tune source and target

In [149]:
get_score_combinations(
    suffix='tune_both', 
    domains=['A','W','D'], 
    avg_types=['macro avg']
).describe()

Unnamed: 0,A->W macro avg,A->D macro avg,W->A macro avg,W->D macro avg,D->A macro avg,D->W macro avg
count,0.0,0.0,0.0,0.0,0.0,0.0
unique,0.0,0.0,0.0,0.0,0.0,0.0
top,,,,,,
freq,,,,,,
