In [3]:
import sys
sys.path.append("..")
sys.path.append("../../eqnet")

In [26]:
from expemb import TestingResults
import glob
import gzip

## EqNet and EqNet-L Results

In [5]:
datasets = {
    "simpleBoolean8": "\\textsc{SimpBool8}",
    "simpleBoolean10": "\\textsc{SimpBool10}",
    "boolean5": "\\textsc{Bool5}",
    "boolean8": "\\textsc{Bool8}",
    "boolean10": "\\textsc{Bool10}",
    "largeSimpleBoolean5": "\\textsc{SimpBoolL5}",
    "largeBoolean5": "\\textsc{BoolL5}",
    "simplepoly5": "\\textsc{SimpPoly5}",
    "simplepoly8": "\\textsc{SimpPoly8}",
    "simplepoly10": "\\textsc{SimpPoly10}",
    "oneVarPoly10": "\\textsc{oneV-Poly10}",
    "oneVarPoly13": "\\textsc{oneV-Poly13}",
    "poly5": "\\textsc{Poly5}",
    "poly8": "\\textsc{Poly8}",
}
all_hidden_sizes = [64, 128, 256, 512, 1024]
eqnet_results = {
    "simpleBoolean8": "97.4",
    "simpleBoolean10": "99.1",
    "boolean5": "65.8",
    "boolean8": "58.1",
    "boolean10": "71.4",
    "largeSimpleBoolean5": "85.0",
    "largeBoolean5": "75.2",
    "simplepoly5": "65.6",
    "simplepoly8": "98.9",
    "simplepoly10": "99.3",
    "oneVarPoly10": "81.3",
    "oneVarPoly13": "90.4",
    "poly5": "55.3",
    "poly8": "86.2",
}
eqnetl_results = {
    "simpleBoolean8": "-",
    "simpleBoolean10": "-",
    "boolean5": "73.7",
    "boolean8": "-",
    "boolean10": "-",
    "largeSimpleBoolean5": "72.1",
    "largeBoolean5": "-",
    "simplepoly5": "56.3",
    "simplepoly8": "98.0",
    "simplepoly10": "-",
    "oneVarPoly10": "80.0",
    "oneVarPoly13": "-",
    "poly5": "-",
    "poly8": "87.1",
}

In [27]:
def read_expemb_results(dataset, autoencoder, hidden = 64):
    if autoencoder:
        resultfilepattern = f"../models/semvec/autoencoder/h{hidden}/{dataset}/results*"
    else:
        resultfilepattern = f"../models/semvec/equivexp/{dataset}/results*"
    
    resultfilelist = glob.glob(resultfilepattern)
    assert len(resultfilelist) == 1
    resultfile = resultfilelist[0]
    
    results = TestingResults.load(resultfile)
    
    # Validate the test files
    assert results.args.test_file.endswith(f"{dataset}-neweqtestset.json.gz"), f"Invalid test file {results.args.test_file}"
    assert results.args.full_file.endswith(f"{dataset}.json.gz"), f"Invalid full file {results.args.full_file}"
    
    return results.accuracy[0]["val/score@5/max"]


def get_training_set_size(dataset, autoencoder):
    if autoencoder:
        trainfile = f"../data/semvec/{dataset}_autoenc.train.gz"
    else:
        trainfile = f"../data/semvec/{dataset}.train.gz"
        
    with gzip.open(trainfile, "rt") as file:
        n_lines = len(file.readlines())
        
    return n_lines

In [45]:
def print_main_results_table():
    scores = []

    for dataset, displayname in datasets.items():
        eqnet_score = eqnet_results[dataset]
        eqnetl_score = eqnetl_results[dataset]
        expembe_score = float(read_expemb_results(dataset, autoencoder = False))
        expemba_score = float(read_expemb_results(dataset, autoencoder = True))
        expembe_train_size = get_training_set_size(dataset, autoencoder = False)
        expemba_train_size = get_training_set_size(dataset, autoencoder = True)
        
        expemba_score = round(expemba_score * 100, 1)
        expembe_score = round(expembe_score * 100, 1)

        scores.append({
            "dataset": displayname,
            "eqnet": eqnet_score,
            "eqnetl": eqnetl_score,
            "expemba": expemba_score,
            "expemba_train_size": expemba_train_size,
            "expembe": expembe_score,
            "expembe_train_size": expembe_train_size,
        })
        
    # Sort based on ExpEmb-E training set size
    scores = sorted(scores, key = lambda d : int(d["expembe_train_size"]), reverse = True)

    print("\\toprule")
    print("\\multirow{2}{*}{Dataset} & \\eqnet{} & \\eqnetl{} & \\multicolumn{2}{c}{\\expemba{}} & \\multicolumn{2}{c}{\\expembe{}} \\\\")
    print("& $score_5(\\%)$ & $score_5(\\%)$ & $score_5(\\%)$ & Training Set Size & $score_5(\\%)$ & Training Set Size \\\\")
    print("\\midrule")
    for score in scores:
        print(
            f"{score['dataset']} & {score['eqnet']} & {score['eqnetl']} & " + \
            f"{score['expemba']} & {score['expemba_train_size']:,} & " + \
            f"{score['expembe']} & {score['expembe_train_size']:,} \\\\"
        )
        
    print(f"\\bottomrule")
    
print_main_results_table()

\toprule
\multirow{2}{*}{Dataset} & \eqnet{} & \eqnetl{} & \multicolumn{2}{c}{\expemba{}} & \multicolumn{2}{c}{\expembe{}} \\
& $score_5(\%)$ & $score_5(\%)$ & $score_5(\%)$ & Training Set Size & $score_5(\%)$ & Training Set Size \\
\midrule
\textsc{Bool8} & 58.1 & - & 30.6 & 146,488 & 100.0 & 16,143,072 \\
\textsc{oneV-Poly13} & 90.4 & - & 38.7 & 60,128 & 99.6 & 9,958,406 \\
\textsc{SimpPoly10} & 99.3 & - & 40.1 & 31,143 & 99.8 & 6,731,858 \\
\textsc{SimpBool8} & 97.4 & - & 36.9 & 21,604 & 99.4 & 4,440,450 \\
\textsc{Bool10} & 71.4 & - & 10.8 & 25,560 & 91.3 & 3,041,640 \\
\textsc{SimpBool10} & 99.1 & - & 24.4 & 13,081 & 95.5 & 1,448,804 \\
\textsc{BoolL5} & 75.2 & - & 38.3 & 23,219 & 36.0 & 552,642 \\
\textsc{Poly8} & 86.2 & 87.1 & 32.7 & 6,785 & 87.3 & 257,190 \\
\textsc{SimpPoly8} & 98.9 & 98.0 & 47.6 & 1,934 & 98.9 & 113,660 \\
\textsc{SimpBoolL5} & 85.0 & 72.1 & 55.1 & 6,009 & 71.1 & 66,876 \\
\textsc{oneV-Poly10} & 81.3 & 80.0 & 59.8 & 767 & 74.1 & 25,590 \\
\textsc{Bool5} & 65.

In [50]:
def print_expemba_all_results():
    scores = []

    for dataset, displayname in datasets.items():
        h32_score = float(read_expemb_results(dataset, autoencoder = True, hidden = 32))
        h64_score = float(read_expemb_results(dataset, autoencoder = True, hidden = 64))
        h128_score = float(read_expemb_results(dataset, autoencoder = True, hidden = 128))
        
        scores.append({
            "dataset": displayname,
            "h32": round(h32_score * 100, 1),
            "h64": round(h64_score * 100, 1),
            "h128": round(h128_score * 100, 1),
        })
        
    print("\\toprule")
    print("Dataset & $H = 32$ & $H = 64$ & $H = 128$ \\\\")
    print("\\midrule")
    for score in scores:
        print(f"{score['dataset']} & {score['h32']} & {score['h64']} & {score['h128']} \\\\")
        
    print("\\bottomrule")
        
print_expemba_all_results()

\toprule
Dataset & $H = 32$ & $H = 64$ & $H = 128$ \\
\midrule
\textsc{SimpBool8} & 33.1 & 36.9 & 34.1 \\
\textsc{SimpBool10} & 20.5 & 24.4 & 25.4 \\
\textsc{Bool5} & 36.7 & 36.4 & 28.1 \\
\textsc{Bool8} & 31.1 & 30.6 & 25.9 \\
\textsc{Bool10} & 10.7 & 10.8 & 8.6 \\
\textsc{SimpBoolL5} & 55.2 & 55.1 & 54.9 \\
\textsc{BoolL5} & 35.6 & 38.3 & 38.2 \\
\textsc{SimpPoly5} & 18.8 & 28.1 & 17.7 \\
\textsc{SimpPoly8} & 50.4 & 47.6 & 47.3 \\
\textsc{SimpPoly10} & 47.4 & 40.1 & 44.9 \\
\textsc{oneV-Poly10} & 58.3 & 59.8 & 59.7 \\
\textsc{oneV-Poly13} & 37.7 & 38.7 & 38.4 \\
\textsc{Poly5} & 14.9 & 5.7 & 6.6 \\
\textsc{Poly8} & 32.2 & 32.7 & 32.1 \\
\bottomrule
