In [51]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd 

In [52]:
def parse_accs(fname):
    fin_acc = -1
    with open(fname, 'r') as f:
        for line in f.readlines():
            if 'end model (LeNet) test acc: ' in line:
                fin_acc = float(line.split()[-1])
    return fin_acc

In [53]:
resdir = 'results/snubasweep'
lf_class_options = 'DecisionTreeClassifier,LogisticRegression'

accs = []
fnames = []

for embedding in ['raw', 'pca', 'resnet18', 'vae']:
    for em_hard_labels in ['True', 'False']:
        for snuba_cardinality in [1, 2]:
            for n_labeled_points in [100, 500, 1000, 5000]:
                fname = f'{resdir}/embedding_{embedding}/lf_class_options_{lf_class_options}/em_hard_labels_{em_hard_labels}/snuba_cardinality_{snuba_cardinality}/n_labeled_points_{n_labeled_points}/res.log'
                acc = parse_accs(fname)
                fnames.append(fname)
                accs.append(acc)

fnames = np.array(fnames)
accs = np.array(accs)

In [54]:
fnames[np.argsort(accs)[::-1][:20]]

array(['results/snubasweep/embedding_vae/lf_class_options_DecisionTreeClassifier,LogisticRegression/em_hard_labels_False/snuba_cardinality_2/n_labeled_points_100/res.log',
       'results/snubasweep/embedding_vae/lf_class_options_DecisionTreeClassifier,LogisticRegression/em_hard_labels_False/snuba_cardinality_2/n_labeled_points_500/res.log',
       'results/snubasweep/embedding_vae/lf_class_options_DecisionTreeClassifier,LogisticRegression/em_hard_labels_True/snuba_cardinality_2/n_labeled_points_100/res.log',
       'results/snubasweep/embedding_pca/lf_class_options_DecisionTreeClassifier,LogisticRegression/em_hard_labels_True/snuba_cardinality_2/n_labeled_points_100/res.log',
       'results/snubasweep/embedding_pca/lf_class_options_DecisionTreeClassifier,LogisticRegression/em_hard_labels_True/snuba_cardinality_2/n_labeled_points_500/res.log',
       'results/snubasweep/embedding_pca/lf_class_options_DecisionTreeClassifier,LogisticRegression/em_hard_labels_True/snuba_cardinality_2/n_l

In [55]:
accs[np.argsort(accs)[::-1][:25]]

array([ 0.4924,  0.4395,  0.4366,  0.4112,  0.3965,  0.3912,  0.3893,
        0.3878,  0.3835,  0.3712,  0.3637,  0.2983,  0.298 ,  0.2821,
        0.2249,  0.2106,  0.2097,  0.2085,  0.2083,  0.2069,  0.1425,
       -1.    , -1.    , -1.    , -1.    ])

In [57]:
fmeta = [{kv.rsplit('_', 1)[0]: kv.rsplit('_', 1)[1] \
    for kv in fname.split('/')[2:-1]} for fname in fnames]

for i in range(len(accs)):
    fmeta[i]['acc'] = accs[i]

df = pd.DataFrame(fmeta)

In [63]:
df.sort_values(by=['acc'], ascending=False)[:21]

Unnamed: 0,embedding,lf_class_options,em_hard_labels,snuba_cardinality,n_labeled_points,acc
60,vae,"DecisionTreeClassifier,LogisticRegression",False,2,100,0.4924
61,vae,"DecisionTreeClassifier,LogisticRegression",False,2,500,0.4395
52,vae,"DecisionTreeClassifier,LogisticRegression",True,2,100,0.4366
20,pca,"DecisionTreeClassifier,LogisticRegression",True,2,100,0.4112
21,pca,"DecisionTreeClassifier,LogisticRegression",True,2,500,0.3965
22,pca,"DecisionTreeClassifier,LogisticRegression",True,2,1000,0.3912
10,raw,"DecisionTreeClassifier,LogisticRegression",False,1,1000,0.3893
30,pca,"DecisionTreeClassifier,LogisticRegression",False,2,1000,0.3878
2,raw,"DecisionTreeClassifier,LogisticRegression",True,1,1000,0.3835
50,vae,"DecisionTreeClassifier,LogisticRegression",True,1,1000,0.3712
