In [1]:
import os
os.chdir("..")
!pwd

/data/home/lanyanyan/xiongruibin/opendebias/examples


In [2]:
import pandas as pd 
from pandas import DataFrame
import numpy as np 
import pathlib
from pathlib import Path
import json
import os
import re
from copy import deepcopy
pd.options.display.float_format = '{:.2f}'.format
def load_one_file_metrics(file, arg, seed):
    with open(file) as f:
        metrics = json.load(f)
    for key in metrics.keys():
        metrics[key] = [metrics[key]]
    metrics['name'] = arg['name']
    metrics['seed'] = seed
    return pd.DataFrame(metrics)
        

def collect_one_method(glob_pattern, folder, seeds, file_names, arg, must_include_files=None, verbose=True):
    path = pathlib.Path(folder)
    all_metrics_df = pd.DataFrame()
    if not os.path.exists(str(path)):
        print('Do Not Exist {}'.format(str(path)))
    file_names_template = file_names
    best_epoch_records = {}
#     for folder_p in path.glob(f"*{glob_pattern}*"):  # every folder is a running records of  the given method with different seeds
    for folder_p in path.glob(glob_pattern):  # every folder is a running records of  the given method with different seeds
        file_names = deepcopy(file_names_template)
        # check file and seed
        not_found = False
        if must_include_files is not None:
            for mif in must_include_files:
                if not os.path.exists(str(folder_p/mif)):
                    not_found = True
                    continue    
        if not_found: continue
        if 'seed' in folder_p.name.lower():
            cur_seed = folder_p.name.split('-')[folder_p.name.lower().split('-').index('seed')+1]
        else:
            cur_seed = folder_p.name[-4:]      
        if cur_seed not in seeds:
            if verbose:
                print('in {}, skip {}'.format(folder_p, cur_seed))
            continue
        
        # fill best epoch
        need_best = False
        for file in file_names:
            if '{}' in file: # need to found best epoh
                need_best = True
                break 
        if need_best:
            if not os.path.exists(folder_p/'metrics.json'):
                print('Not Exists ', folder_p/'metrics.json')
                continue
            best_metrics = load_one_file_metrics(folder_p/'metrics.json', arg, cur_seed)
            best_epoch = best_metrics['best_epoch'][0]
            for idx in range(len(file_names)):
                if '{}' in file_names[idx]:
                    file_names[idx] = file_names[idx].format(best_epoch)
            best_epoch_records[cur_seed] = best_epoch
        
        cur_seed_df = pd.DataFrame()
        for file_path in file_names:
            fp = folder_p / file_path
            if not os.path.exists(str(fp)):
                if verbose:
                    print('Missing', str(fp))
                continue
            metrics = load_one_file_metrics(fp, arg, cur_seed)
#             all_metrics_df = all_metrics_df.join(metrics, on=['name', 'seed'])
            if len(cur_seed_df) == 0:
                cur_seed_df = metrics
            else:
                cur_seed_df = cur_seed_df.merge(metrics, on=['name', 'seed'], validate='one_to_one')
        cur_seed_df['epoch'] = cur_seed_df['epoch_x']
        cur_seed_df = cur_seed_df.drop(['epoch_x', 'epoch_y'], axis=1)
        all_metrics_df = all_metrics_df.append(cur_seed_df)
    print("{} best epochs: {}".format(arg['name'], ", ".join('{}: {}'.format(s, e) for s, e in sorted(best_epoch_records.items(), key=lambda x:x[0]))))
    return all_metrics_df

def summary(args, filter_patterns=None, string=True, verbose=True):
    """
    args is a list of dict
    [{"name", "glob_pattern", "seeds", "file_paths", "must_include_files", "folder", "precision"}]
    """
    df = DataFrame()
    for arg in args:
        cur_method_df = collect_one_method(arg['glob_pattern'], arg['folder'], arg['seeds'], 
                                                            arg['file_paths'], arg, 
                                                            arg.get('must_include_files', None), verbose)
        df = df.append(cur_method_df)
    filtered_columns = []
    for column in df.columns:
        for pattern in filter_patterns:
            if re.match(pattern, column):
                break
        else:
            filtered_columns.append(column)
    name_cid = filtered_columns.index('name')
    new_columns = ['name'] + filtered_columns[:name_cid] + filtered_columns[name_cid+1:]
    df = df[new_columns]
    mapper = {}
    for name in new_columns:
        if '-main' in name:
            mapper[name] = name.replace('-main', '')
    for column in new_columns:
        if 'accuracy' in column or 'f1' in column or 'ece' in column:
            df[column] *= 100
    return df.rename(columns=mapper)


def statistical(df, std=True):
    output_std = std
    statistical_df = pd.DataFrame()
    # get mean
    df = df.loc[:,~df.columns.duplicated()]
    mean = df.groupby(['name']).mean()
    std = df.groupby(['name']).std()
    i = 0
    for index, row in mean.iterrows():
        row['name'] = index + "_mean"
        statistical_df = statistical_df.append(row)
        if output_std:
            srow = std.loc[index]
            srow['name'] = index + "_std"
            statistical_df = statistical_df.append(srow)
    statistical_df = statistical_df.reset_index()
    statistical_df = statistical_df.drop(['index'], axis=1)
    
    columns = list(statistical_df.columns)
    name_cid = columns.index('name')
    new_columns = ['name'] + columns[:name_cid] + columns[name_cid+1:]
    statistical_df = statistical_df[new_columns]  
    return statistical_df

# all_seeds = ['13214', '28987', '3420', '37462', '54324', '54673', '78541', '98732']

## HANS

In [60]:
# file_paths = ['test_metrics_epoch_2.json', 'metrics_epoch_2.json']
# file_paths = ['test_metrics_epoch_{}.json', 'metrics_epoch_{}.json', 'eval/new-hard/eval_metrics.json']
file_paths = ['test_metrics_epoch_{}.json', 'metrics_epoch_{}.json', 'eval/new-hard/eval_metrics.json']

base_folder = Path('histories')
# seeds = set(all_seeds)
# seeds = set(['13214', '28987', '3420', '37462', '54324', '54673',])
# seeds = set(['1010', '2020', '3030', '4040', '5050'])
seeds = set(['37462', '13214', '28987', '54324', '54673'])
# seeds = set(['1010', '3030',   '5050'])
UNIVERSAL_FILTER_PATTERN = ['.*?memory.*?', '.*?train.*?', '.*?best.*?']



args = []

base_folder = "histories/multi_env/"
for wa in "0 1e-3 1e-2 1e-1 1".split():
    for rate in "0.3 0.6".split():
        args.append({
            "name": "IRM-{}-{}".format(wa, rate),
            "folder": base_folder + "IRMv1",
            "file_paths": file_paths,
            "glob_pattern": "basic_bert_lr_5_epoch_3_lambda_{}_{}-SEED-*".format(wa, rate,),
            "seeds": seeds,
            "must_include_files": ['best.th']
        })
print(args[0])
# statistical(std=True, df=summary(args, filter_patterns=['.*?ece.*?', '.*?entail.*?', '.*?cons.*?', '.*?lex.*?', '.*?sub.*?', '.*?LREC.*?', '.*?hard.*?', '.*?loss.*?'] + UNIVERSAL_FILTER_PATTERN))
statistical(std=True, df=summary(args, filter_patterns=['.*?ece.*?', '.*?hard.*?',  '.*?loss.*?', '.*?max.*?', '.*?sub.*?', '.*?lex.*?', '.*?cons.*?', 'hans-accuracy'] + UNIVERSAL_FILTER_PATTERN))
# statistical(summary(args, filter_patterns=['.*?entail.*?', '.*?max.*?', '.*?LREC.*?', '.*?hard.*?', '.*?consiti.*?', '.*?lexical.*?', '.*?subse.*?', '.*?validation.*?'] + UNIVERSAL_FILTER_PATTERN), False)


{'name': 'IRM-0-0.3', 'folder': 'histories/multi_env/IRMv1', 'file_paths': ['test_metrics_epoch_{}.json', 'metrics_epoch_{}.json', 'eval/new-hard/eval_metrics.json'], 'glob_pattern': 'basic_bert_lr_5_epoch_3_lambda_0_0.3-SEED-*', 'seeds': {'54673', '13214', '28987', '54324', '37462'}, 'must_include_files': ['best.th']}
Missing histories/multi_env/IRMv1/basic_bert_lr_5_epoch_3_lambda_0_0.3-SEED-37462/eval/new-hard/eval_metrics.json
Missing histories/multi_env/IRMv1/basic_bert_lr_5_epoch_3_lambda_0_0.3-SEED-54324/eval/new-hard/eval_metrics.json
Missing histories/multi_env/IRMv1/basic_bert_lr_5_epoch_3_lambda_0_0.3-SEED-13214/eval/new-hard/eval_metrics.json
Missing histories/multi_env/IRMv1/basic_bert_lr_5_epoch_3_lambda_0_0.3-SEED-28987/eval/new-hard/eval_metrics.json
Missing histories/multi_env/IRMv1/basic_bert_lr_5_epoch_3_lambda_0_0.3-SEED-54673/eval/new-hard/eval_metrics.json
IRM-0-0.3 best epochs: 13214: 1, 28987: 2, 37462: 1, 54324: 2, 54673: 1
Missing histories/multi_env/IRMv1/bas

Unnamed: 0,name,epoch,hans-accuracy-sum,hans-entail-accuracy-sum,hans-non-entail-accuracy-sum,hans-penalty,validation_accuracy,validation_penalty
0,IRM-0-0.3_mean,1.4,64.68,98.19,31.18,0.0,84.02,0.0
1,IRM-0-0.3_std,0.55,2.07,1.05,5.17,0.0,0.28,0.0
2,IRM-0-0.6_mean,1.4,64.68,98.19,31.18,0.0,84.02,0.0
3,IRM-0-0.6_std,0.55,2.07,1.05,5.17,0.0,0.28,0.0
4,IRM-1-0.3_mean,1.5,64.76,98.16,31.36,0.0,83.72,0.0
5,IRM-1-0.3_std,0.71,2.28,0.47,4.1,0.0,0.35,0.0
6,IRM-1-0.6_mean,1.5,65.98,97.31,34.65,0.0,83.85,0.0
7,IRM-1-0.6_std,0.71,0.85,0.87,2.57,0.0,0.12,0.0
8,IRM-1e-1-0.3_mean,1.6,66.58,98.46,34.7,0.0,83.85,0.0
9,IRM-1e-1-0.3_std,0.55,1.91,1.07,4.81,0.0,0.23,0.0


In [64]:
# file_paths = ['test_metrics_epoch_2.json', 'metrics_epoch_2.json']
# file_paths = ['test_metrics_epoch_{}.json', 'metrics_epoch_{}.json', 'eval/new-hard/eval_metrics.json']
file_paths = ['test_metrics_epoch_{}.json', 'metrics_epoch_{}.json', 'eval/new-hard/eval_metrics.json']

base_folder = Path('histories')
# seeds = set(all_seeds)
# seeds = set(['13214', '28987', '3420', '37462', '54324', '54673',])
# seeds = set(['1010', '2020', '3030', '4040', '5050'])
seeds = set(['37462', '13214', '54324', '28987'])
# seeds = set(['1010', '3030',   '5050'])
UNIVERSAL_FILTER_PATTERN = ['.*?memory.*?', '.*?train.*?', '.*?best.*?']

args = []

base_folder = "histories/multi_env/"
for wa in "1e-3 1e-2 1e-1 1".split():
    for rate in "0.3 0.6".split():
        args.append({
            "name": "cIRM-{}-{}".format(wa, rate), 
            "folder": base_folder + "cIRMv1",
            "file_paths": file_paths,
            "glob_pattern": "basic_bert_lr_5_epoch_3_lambda_{}_{}-SEED-*".format(wa, rate,),
            "seeds": seeds,
            "must_include_files": ['best.th']
        })
print(args[0])
# statistical(std=True, df=summary(args, filter_patterns=['.*?ece.*?', '.*?entail.*?', '.*?cons.*?', '.*?lex.*?', '.*?sub.*?', '.*?LREC.*?', '.*?hard.*?', '.*?loss.*?'] + UNIVERSAL_FILTER_PATTERN))
statistical(std=True, df=summary(args, filter_patterns=['.*?ece.*?', '.*?hard.*?',  '.*?loss.*?', '.*?max.*?', '.*?sub.*?', '.*?lex.*?', '.*?cons.*?', 'hans-accuracy'] + UNIVERSAL_FILTER_PATTERN))
# statistical(summary(args, filter_patterns=['.*?entail.*?', '.*?max.*?', '.*?LREC.*?', '.*?hard.*?', '.*?consiti.*?', '.*?lexical.*?', '.*?subse.*?', '.*?validation.*?'] + UNIVERSAL_FILTER_PATTERN), False)


{'name': 'cIRM-1e-3-0.3', 'folder': 'histories/multi_env/cIRMv1', 'file_paths': ['test_metrics_epoch_{}.json', 'metrics_epoch_{}.json', 'eval/new-hard/eval_metrics.json'], 'glob_pattern': 'basic_bert_lr_5_epoch_3_lambda_1e-3_0.3-SEED-*', 'seeds': {'54324', '37462', '13214', '28987'}, 'must_include_files': ['best.th']}
Missing histories/multi_env/cIRMv1/basic_bert_lr_5_epoch_3_lambda_1e-3_0.3-SEED-13214/eval/new-hard/eval_metrics.json
Missing histories/multi_env/cIRMv1/basic_bert_lr_5_epoch_3_lambda_1e-3_0.3-SEED-28987/eval/new-hard/eval_metrics.json
Missing histories/multi_env/cIRMv1/basic_bert_lr_5_epoch_3_lambda_1e-3_0.3-SEED-54324/eval/new-hard/eval_metrics.json
Missing histories/multi_env/cIRMv1/basic_bert_lr_5_epoch_3_lambda_1e-3_0.3-SEED-37462/eval/new-hard/eval_metrics.json
cIRM-1e-3-0.3 best epochs: 13214: 1, 28987: 2, 37462: 2, 54324: 2
Missing histories/multi_env/cIRMv1/basic_bert_lr_5_epoch_3_lambda_1e-3_0.6-SEED-28987/eval/new-hard/eval_metrics.json
Missing histories/multi_

Unnamed: 0,name,epoch,hans-accuracy-sum,hans-entail-accuracy-sum,hans-non-entail-accuracy-sum,hans-penalty,validation_accuracy,validation_penalty
0,cIRM-1-0.3_mean,2.0,65.26,96.79,33.72,0.0,82.82,0.0
1,cIRM-1-0.3_std,0.0,1.62,0.7,3.04,0.0,0.2,0.0
2,cIRM-1-0.6_mean,2.0,64.57,96.82,32.31,0.0,83.11,0.0
3,cIRM-1-0.6_std,0.0,3.55,1.15,7.42,0.0,0.67,0.0
4,cIRM-1e-1-0.3_mean,1.75,66.64,98.55,34.72,0.0,83.83,0.0
5,cIRM-1e-1-0.3_std,0.5,1.66,0.77,3.5,0.0,0.22,0.0
6,cIRM-1e-1-0.6_mean,1.5,65.03,98.77,31.28,0.0,84.08,0.0
7,cIRM-1e-1-0.6_std,0.58,0.9,0.55,2.27,0.0,0.16,0.0
8,cIRM-1e-2-0.3_mean,1.5,64.57,98.94,30.19,0.0,84.01,0.0
9,cIRM-1e-2-0.3_std,0.58,2.03,0.59,4.54,0.0,0.25,0.0


In [3]:
# file_paths = ['test_metrics_epoch_2.json', 'metrics_epoch_2.json']
# file_paths = ['test_metrics_epoch_{}.json', 'metrics_epoch_{}.json', 'eval/new-hard/eval_metrics.json']
file_paths = ['test_metrics_epoch_{}.json', 'metrics_epoch_{}.json', 'eval/new-hard/eval_metrics.json']

base_folder = Path('histories')
# seeds = set(all_seeds)
# seeds = set(['13214', '28987', '3420', '37462', '54324', '54673',])
# seeds = set(['1010', '2020', '3030', '4040', '5050'])
# seeds = set(['37462', '13214', '54324', '28987'])
seeds = set(['37462', '13214', '54324', '28987'])
# seeds = set(['1010', '3030',   '5050'])
UNIVERSAL_FILTER_PATTERN = ['.*?memory.*?', '.*?train.*?', '.*?best.*?']

args = []

base_folder = "histories/multi_env/"
for r in (False,):
    for wa in "1e-3 1e-2 1e-1 1".split():
        for rate in "0.3 0.6".split():
            if r:
                args.append({
                "name": "pgi-{}-{}-r".format(wa, rate), 
                "folder": base_folder + "pgi",
                "file_paths": file_paths,
                "glob_pattern": "basic_bert_lr_5_epoch_3_lambda_{}_{}-r-SEED-*".format(wa, rate,),
                "seeds": seeds,
                "must_include_files": ['best.th']
                })
            else:
                args.append({
                "name": "pgi-{}-{}".format(wa, rate), 
                "folder": base_folder + "pgi",
                "file_paths": file_paths,
                "glob_pattern": "basic_bert_lr_5_epoch_3_lambda_{}_{}-SEED-*".format(wa, rate,),
                "seeds": seeds,
                "must_include_files": ['best.th']
                })
print(args[0])
# statistical(std=True, df=summary(args, filter_patterns=['.*?ece.*?', '.*?entail.*?', '.*?cons.*?', '.*?lex.*?', '.*?sub.*?', '.*?LREC.*?', '.*?hard.*?', '.*?loss.*?'] + UNIVERSAL_FILTER_PATTERN))
statistical(std=True, df=summary(args, filter_patterns=['.*?ece.*?', '.*?hard.*?',  '.*?loss.*?', '.*?max.*?', '.*?sub.*?', '.*?lex.*?', '.*?cons.*?', 'hans-accuracy'] + UNIVERSAL_FILTER_PATTERN))
# statistical(summary(args, filter_patterns=['.*?entail.*?', '.*?max.*?', '.*?LREC.*?', '.*?hard.*?', '.*?consiti.*?', '.*?lexical.*?', '.*?subse.*?', '.*?validation.*?'] + UNIVERSAL_FILTER_PATTERN), False)


{'name': 'pgi-1e-3-0.3', 'folder': 'histories/multi_env/pgi', 'file_paths': ['test_metrics_epoch_{}.json', 'metrics_epoch_{}.json', 'eval/new-hard/eval_metrics.json'], 'glob_pattern': 'basic_bert_lr_5_epoch_3_lambda_1e-3_0.3-SEED-*', 'seeds': {'28987', '37462', '54324', '13214'}, 'must_include_files': ['best.th']}
Missing histories/multi_env/pgi/basic_bert_lr_5_epoch_3_lambda_1e-3_0.3-SEED-13214/eval/new-hard/eval_metrics.json
Missing histories/multi_env/pgi/basic_bert_lr_5_epoch_3_lambda_1e-3_0.3-SEED-37462/eval/new-hard/eval_metrics.json
pgi-1e-3-0.3 best epochs: 13214: 1, 37462: 1
Missing histories/multi_env/pgi/basic_bert_lr_5_epoch_3_lambda_1e-3_0.6-SEED-13214/eval/new-hard/eval_metrics.json
Missing histories/multi_env/pgi/basic_bert_lr_5_epoch_3_lambda_1e-3_0.6-SEED-37462/eval/new-hard/eval_metrics.json
pgi-1e-3-0.6 best epochs: 13214: 1, 37462: 1
Missing histories/multi_env/pgi/basic_bert_lr_5_epoch_3_lambda_1e-2_0.3-SEED-13214/eval/new-hard/eval_metrics.json
Missing histories/m

Unnamed: 0,name,epoch,hans-accuracy-sum,hans-entail-accuracy-sum,hans-non-entail-accuracy-sum,hans-penalty,validation_accuracy,validation_penalty
0,pgi-1e-1-0.3_mean,1.5,67.5,98.97,36.04,0.0,83.92,0.0
1,pgi-1e-1-0.3_std,0.71,0.13,0.08,0.34,0.0,0.21,0.0
2,pgi-1e-1-0.6_mean,1.0,69.95,95.9,44.0,0.0,84.03,0.0
3,pgi-1e-1-0.6_std,0.0,1.15,3.63,5.93,0.0,0.02,0.0
4,pgi-1e-2-0.3_mean,1.5,65.31,98.59,32.04,0.0,83.98,0.0
5,pgi-1e-2-0.3_std,0.71,2.41,0.39,4.43,0.0,0.19,0.0
6,pgi-1e-2-0.6_mean,1.0,67.12,98.22,36.03,0.0,84.01,0.0
7,pgi-1e-2-0.6_std,0.0,2.07,0.46,3.67,0.0,0.2,0.0
8,pgi-1e-3-0.3_mean,1.0,64.58,99.18,29.97,0.0,84.0,0.0
9,pgi-1e-3-0.3_std,0.0,2.6,0.31,5.52,0.0,0.19,0.0


In [74]:
# file_paths = ['test_metrics_epoch_2.json', 'metrics_epoch_2.json']
# file_paths = ['test_metrics_epoch_{}.json', 'metrics_epoch_{}.json', 'eval/new-hard/eval_metrics.json']
file_paths = ['test_metrics_epoch_{}.json', 'metrics_epoch_{}.json', 'eval/new-hard/eval_metrics.json']

base_folder = Path('histories')
# seeds = set(all_seeds)
# seeds = set(['13214', '28987', '3420', '37462', '54324', '54673',])
# seeds = set(['1010', '2020', '3030', '4040', '5050'])
seeds = set(['37462', '13214', '54324', '28987'])
# seeds = set(['1010', '3030',   '5050'])
UNIVERSAL_FILTER_PATTERN = ['.*?memory.*?', '.*?train.*?', '.*?best.*?']

args = []

base_folder = "histories/multi_env/"
for wa in "1e-3 1e-2 1e-1 1".split():
    for rate in "0.3 0.6".split():
        args.append({
            "name": "cMMD-{}-{}".format(wa, rate), 
            "folder": base_folder + "cmmd",
            "file_paths": file_paths,
            "glob_pattern": "basic_bert_lr_5_epoch_3_lambda_{}_{}-SEED-*".format(wa, rate,),
            "seeds": seeds,
            "must_include_files": ['best.th']
        })
print(args[0])
# statistical(std=True, df=summary(args, filter_patterns=['.*?ece.*?', '.*?entail.*?', '.*?cons.*?', '.*?lex.*?', '.*?sub.*?', '.*?LREC.*?', '.*?hard.*?', '.*?loss.*?'] + UNIVERSAL_FILTER_PATTERN))
statistical(std=True, df=summary(args, filter_patterns=['.*?ece.*?', '.*?hard.*?',  '.*?loss.*?', '.*?max.*?', '.*?sub.*?', '.*?lex.*?', '.*?cons.*?', 'hans-accuracy'] + UNIVERSAL_FILTER_PATTERN))
# statistical(summary(args, filter_patterns=['.*?entail.*?', '.*?max.*?', '.*?LREC.*?', '.*?hard.*?', '.*?consiti.*?', '.*?lexical.*?', '.*?subse.*?', '.*?validation.*?'] + UNIVERSAL_FILTER_PATTERN), False)


{'name': 'cMMD-1e-3-0.3', 'folder': 'histories/multi_env/cmmd', 'file_paths': ['test_metrics_epoch_{}.json', 'metrics_epoch_{}.json', 'eval/new-hard/eval_metrics.json'], 'glob_pattern': 'basic_bert_lr_5_epoch_3_lambda_1e-3_0.3-SEED-*', 'seeds': {'54324', '37462', '13214', '28987'}, 'must_include_files': ['best.th']}
Missing histories/multi_env/cmmd/basic_bert_lr_5_epoch_3_lambda_1e-3_0.3-SEED-13214/eval/new-hard/eval_metrics.json
Missing histories/multi_env/cmmd/basic_bert_lr_5_epoch_3_lambda_1e-3_0.3-SEED-37462/eval/new-hard/eval_metrics.json
cMMD-1e-3-0.3 best epochs: 13214: 1, 37462: 2
Missing histories/multi_env/cmmd/basic_bert_lr_5_epoch_3_lambda_1e-3_0.6-SEED-13214/eval/new-hard/eval_metrics.json
Missing histories/multi_env/cmmd/basic_bert_lr_5_epoch_3_lambda_1e-3_0.6-SEED-37462/eval/new-hard/eval_metrics.json
cMMD-1e-3-0.6 best epochs: 13214: 2, 37462: 2
Missing histories/multi_env/cmmd/basic_bert_lr_5_epoch_3_lambda_1e-2_0.3-SEED-13214/eval/new-hard/eval_metrics.json
Missing hi

Unnamed: 0,name,epoch,hans-accuracy-sum,hans-entail-accuracy-sum,hans-non-entail-accuracy-sum,hans-penalty,validation_accuracy,validation_penalty
0,cMMD-1-0.3_mean,1.5,64.5,98.87,30.14,0.0,83.93,0.0
1,cMMD-1-0.3_std,0.58,1.11,0.58,1.97,0.0,0.23,0.0
2,cMMD-1-0.6_mean,2.0,65.17,98.47,31.87,0.0,83.88,0.0
3,cMMD-1-0.6_std,0.0,0.67,0.45,1.71,0.0,0.19,0.0
4,cMMD-1e-1-0.3_mean,1.25,66.12,97.84,34.4,0.0,84.08,0.0
5,cMMD-1e-1-0.3_std,0.5,1.56,0.58,3.62,0.0,0.38,0.0
6,cMMD-1e-1-0.6_mean,1.75,65.59,98.3,32.89,0.0,84.0,0.0
7,cMMD-1e-1-0.6_std,0.5,0.72,0.84,1.94,0.0,0.24,0.0
8,cMMD-1e-2-0.3_mean,1.25,66.41,98.33,34.49,0.0,84.14,0.0
9,cMMD-1e-2-0.3_std,0.5,1.59,0.54,3.54,0.0,0.07,0.0


In [None]:
# GroupCL
# file_paths = ['test_metrics_epoch_2.json', 'metrics_epoch_2.json']
# file_paths = ['test_metrics_epoch_{}.json', 'metrics_epoch_{}.json', 'eval/new-hard/eval_metrics.json']
file_paths = ['test_metrics_epoch_{}.json', 'metrics_epoch_{}.json', 'eval/new-hard/eval_metrics.json']

base_folder = Path('histories')
# seeds = set(all_seeds)
# seeds = set(['13214', '28987', '3420', '37462', '54324', '54673',])
# seeds = set(['1010', '2020', '3030', '4040', '5050'])
seeds = set(['37462', '13214', '54324', '28987'])
# seeds = set(['1010', '3030',   '5050'])
UNIVERSAL_FILTER_PATTERN = ['.*?memory.*?', '.*?train.*?', '.*?best.*?']

args = []

base_folder = "histories/multi_env/"
for wa in "1e-3 1e-2 1e-1 1".split():
    for rate in "0.3 0.6".split():
        args.append({
            "name": "cMMD-{}-{}".format(wa, rate), 
            "folder": base_folder + "pgi",
            "file_paths": file_paths,
            "glob_pattern": "basic_bert_lr_5_epoch_3_lambda_{}_{}-SEED-*".format(wa, rate,),
            "seeds": seeds,
            "must_include_files": ['best.th']
        })
print(args[0])
# statistical(std=True, df=summary(args, filter_patterns=['.*?ece.*?', '.*?entail.*?', '.*?cons.*?', '.*?lex.*?', '.*?sub.*?', '.*?LREC.*?', '.*?hard.*?', '.*?loss.*?'] + UNIVERSAL_FILTER_PATTERN))
statistical(std=True, df=summary(args, filter_patterns=['.*?ece.*?', '.*?hard.*?',  '.*?loss.*?', '.*?max.*?', '.*?sub.*?', '.*?lex.*?', '.*?cons.*?', 'hans-accuracy'] + UNIVERSAL_FILTER_PATTERN))
# statistical(summary(args, filter_patterns=['.*?entail.*?', '.*?max.*?', '.*?LREC.*?', '.*?hard.*?', '.*?consiti.*?', '.*?lexical.*?', '.*?subse.*?', '.*?validation.*?'] + UNIVERSAL_FILTER_PATTERN), False)
