In [1]:
import sys
import os
import json
import pandas as pd

output_directory = "/dfs/scratch0/maxlam/mmtl_share/feb_16_output"

In [2]:
nan = float("nan")

def get_filepath_with_substring(filenames, substring_match):
    for filename in filenames:
        if substring_match in filename:
            return filename
    return None

def extract_text(filepath):
    with open(filepath, "r") as f:
        return f.read()
    
def extract_scores_from_stdout(stdout):
    # Assume second to last line contains json data of scores
    lines = stdout.splitlines()
    try:
        d = eval(lines[-2])
    except:
        d = None
    return d

def gather_data(working_dir):
    data = []
    for dirname, dirnames, filenames in os.walk(working_dir):
        filenames = [os.path.join(dirname, filename) for filename in filenames]
        stderr_filepath = get_filepath_with_substring(filenames, "stderr")
        stdout_filepath = get_filepath_with_substring(filenames, "stdout")
        config_filepath = get_filepath_with_substring(filenames, "config")
        if stderr_filepath is not None and stdout_filepath is not None and config_filepath is not None:
            print("Processing: %s" % dirname)
            stdout_text = extract_text(stdout_filepath)
            stderr_text = extract_text(stderr_filepath)
            data.append({
                "stderr" : stderr_text,
                "stdout" : stdout_text,
                "config" : json.loads(extract_text(config_filepath)),
                "config_path" : config_filepath,
                "scores" : extract_scores_from_stdout(stdout_text)
            })
    return data
  
data = gather_data(output_directory)

Processing: /dfs/scratch0/maxlam/mmtl_share/feb_16_output/1
Processing: /dfs/scratch0/maxlam/mmtl_share/feb_16_output/6
Processing: /dfs/scratch0/maxlam/mmtl_share/feb_16_output/2
Processing: /dfs/scratch0/maxlam/mmtl_share/feb_16_output/8
Processing: /dfs/scratch0/maxlam/mmtl_share/feb_16_output/5
Processing: /dfs/scratch0/maxlam/mmtl_share/feb_16_output/7
Processing: /dfs/scratch0/maxlam/mmtl_share/feb_16_output/0
Processing: /dfs/scratch0/maxlam/mmtl_share/feb_16_output/4
Processing: /dfs/scratch0/maxlam/mmtl_share/feb_16_output/9
Processing: /dfs/scratch0/maxlam/mmtl_share/feb_16_output/3


In [9]:
def extract_keys(score):
    d = []
    for k,v in score.items():
        for k2,v2 in v.items():
            d.append((k,k2))
    return d

def extract_score(key, data_element):
    if data_element is None:
        return None
    high_level_key, inner_key = key
    return data_element[high_level_key][inner_key]

def extract_extra_keys(data_dict):
    # Return keys of config
    return [x for x in data_dict["config"].keys() if type(data_dict["config"][x]) == float]

def extract_extra_value(k, data_dict):
    assert k in data_dict["config"]
    return data_dict["config"][k]
    
def aggregate_data(data):
    keys = extract_keys(data[0]["scores"])
    extra_keys = extract_extra_keys(data[0])
    aggregated_data = {k:[] for k in keys+extra_keys}    
    for d in data:
        for k in keys:
            aggregated_data[k].append(extract_score(k, d["scores"]))
        for k in extra_keys:            
            aggregated_data[k].append(extract_extra_value(k, d))
    return aggregated_data
        
pd.DataFrame.from_dict(aggregate_data(data))

Unnamed: 0,"(COLA, COLA/train/accuracy)","(COLA, COLA/train/matthews_corr)","(COLA, COLA/valid/accuracy)","(COLA, COLA/valid/matthews_corr)","(COLA, COLA/test/accuracy)","(COLA, COLA/test/matthews_corr)","(SST2, SST2/train/accuracy)","(SST2, SST2/valid/accuracy)","(SST2, SST2/test/accuracy)","(MNLI, MNLI/train/accuracy)",...,"(STSB, STSB/test/pearson_spearman)","(QNLI, QNLI/train/accuracy)","(QNLI, QNLI/valid/accuracy)","(QNLI, QNLI/test/accuracy)",log_every,score_every,lr,l2,split_prop,warmup_steps
0,0.499854,0.002202,0.487434,-0.02358,0.503356,0.006073,0.498692,0.500074,0.501147,0.334001,...,,0.497995,0.501122,0.505583,0.25,0.25,0.0003,0.923528,0.8,0.5
1,0.706725,0.0,0.694915,0.0,0.691275,0.0,0.55723,0.560208,0.509174,0.3331,...,0.005751,0.499248,0.502745,0.5054,0.25,0.25,3.5e-05,0.008235,0.8,0.5
2,0.706725,0.0,0.694915,0.0,0.691275,0.0,0.77906,0.767409,0.736239,0.623276,...,0.010969,0.735244,0.731013,0.749771,0.25,0.25,5.3e-05,0.001804,0.8,0.5
3,,,,,,,,,,,...,,,,,0.25,0.25,0.009309,0.007896,0.8,0.5
4,,,,,,,,,,,...,,,,,0.25,0.25,1.7e-05,0.000106,0.8,0.5
5,0.708772,0.066222,0.696084,0.05163,0.694151,0.080368,0.9175,0.912324,0.899083,0.789474,...,0.117864,0.860742,0.851306,0.866191,0.25,0.25,2e-06,4.4e-05,0.8,0.5
6,0.706725,0.0,0.694915,0.0,0.691275,0.0,0.55723,0.560208,0.509174,0.33353,...,0.083576,0.500095,0.496539,0.494783,0.25,0.25,1.7e-05,0.010732,0.8,0.5
7,0.500731,0.004128,0.500292,0.004346,0.503356,0.006073,0.50374,0.494581,0.501147,0.332482,...,,0.499737,0.50179,0.505583,0.25,0.25,0.007581,0.373688,0.8,0.5
8,0.706725,0.0,0.694915,0.0,0.691275,0.0,0.55723,0.560208,0.509174,0.33353,...,,0.500752,0.497255,0.4946,0.25,0.25,0.000662,0.026825,0.8,0.5
9,0.705848,0.041885,0.691993,-0.013086,0.686481,-0.008045,0.773158,0.753972,0.693807,0.466633,...,0.039714,0.597752,0.569621,0.583196,0.25,0.25,0.000334,0.001638,0.8,0.5
