In [43]:
import json
import pandas as pd
import numpy as np
import os
from itertools import product

In [44]:
scorer = [None, "tanh", "sigmoid"]
encoders = ["average_glove", "sbert"]
tasks = ["discrimination", "insertion"]
bidirect = [False, True]


def scorer_name(scorer):
    if scorer is None:
        return "None"
    else:
        return scorer


# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# ORDER IN product() IS IMPORTANT
list(enumerate(product(tasks, bidirect, scorer, encoders)))

[(0, ('discrimination', False, None, 'average_glove')),
 (1, ('discrimination', False, None, 'sbert')),
 (2, ('discrimination', False, 'tanh', 'average_glove')),
 (3, ('discrimination', False, 'tanh', 'sbert')),
 (4, ('discrimination', False, 'sigmoid', 'average_glove')),
 (5, ('discrimination', False, 'sigmoid', 'sbert')),
 (6, ('discrimination', True, None, 'average_glove')),
 (7, ('discrimination', True, None, 'sbert')),
 (8, ('discrimination', True, 'tanh', 'average_glove')),
 (9, ('discrimination', True, 'tanh', 'sbert')),
 (10, ('discrimination', True, 'sigmoid', 'average_glove')),
 (11, ('discrimination', True, 'sigmoid', 'sbert')),
 (12, ('insertion', False, None, 'average_glove')),
 (13, ('insertion', False, None, 'sbert')),
 (14, ('insertion', False, 'tanh', 'average_glove')),
 (15, ('insertion', False, 'tanh', 'sbert')),
 (16, ('insertion', False, 'sigmoid', 'average_glove')),
 (17, ('insertion', False, 'sigmoid', 'sbert')),
 (18, ('insertion', True, None, 'average_glove')),

In [46]:
def print_list(l):
    for x in l:
        print(" | ", x, end="")
    print()
for i, x in enumerate(product(tasks, bidirect, scorer, encoders)):
    print(i, end="")
    print_list(x)

0 |  discrimination |  False |  None |  average_glove
1 |  discrimination |  False |  None |  sbert
2 |  discrimination |  False |  tanh |  average_glove
3 |  discrimination |  False |  tanh |  sbert
4 |  discrimination |  False |  sigmoid |  average_glove
5 |  discrimination |  False |  sigmoid |  sbert
6 |  discrimination |  True |  None |  average_glove
7 |  discrimination |  True |  None |  sbert
8 |  discrimination |  True |  tanh |  average_glove
9 |  discrimination |  True |  tanh |  sbert
10 |  discrimination |  True |  sigmoid |  average_glove
11 |  discrimination |  True |  sigmoid |  sbert
12 |  insertion |  False |  None |  average_glove
13 |  insertion |  False |  None |  sbert
14 |  insertion |  False |  tanh |  average_glove
15 |  insertion |  False |  tanh |  sbert
16 |  insertion |  False |  sigmoid |  average_glove
17 |  insertion |  False |  sigmoid |  sbert
18 |  insertion |  True |  None |  average_glove
19 |  insertion |  True |  None |  sbert
20 |  insertion |  T

In [8]:
input_dropout = np.arange(0.5, 0.71, 0.1).tolist()
hidden_layers = np.arange(1, 3).tolist()
hidden_dropout = np.arange(0.2, 0.41, 0.1).tolist()
margin = np.arange(4, 6.1, 1).tolist()
l2_reg_lambda = np.arange(0, 0.11, 0.1).tolist()
dpout_model = np.arange(0, 0.11, 0.05).tolist()
task = ["insertion"]

print(
        input_dropout,
        hidden_layers,
        hidden_dropout,
        margin,
        l2_reg_lambda,
        dpout_model,
        task,
    )

[0.5, 0.6, 0.7] [1, 2] [0.2, 0.30000000000000004, 0.4000000000000001] [4.0, 5.0, 6.0] [0.0, 0.1] [0.0, 0.05, 0.1] ['insertion']


In [9]:
list(enumerate(
    product(
        input_dropout,
        hidden_layers,
        hidden_dropout,
        margin,
        l2_reg_lambda,
        dpout_model,
        task,
    )
))[60]

(60, (0.5, 2, 0.2, 5.0, 0.0, 0.0, 'insertion'))

In [10]:
RESULTS_DIR = "results/insertion_experiments/data/"

In [11]:
files = os.listdir(RESULTS_DIR)

In [115]:
# files.remove("all_results.json")
# files.remove("changename.txt")

In [12]:
all_results = []
for file in files:
    with open(os.path.join(RESULTS_DIR, file)) as f:
        try:
            results = json.load(f)
            all_results.append(results)
        except:
            print(file)

In [13]:
disc_models = sorted(all_results, key=lambda x: x["discrimination"][0], reverse=True)
ins_models = sorted(all_results, key=lambda x: x["insertion"][0], reverse=True)

In [74]:
# def trim_models(l, key, cutoff):
#     temp = []
#     for x in l:
#         if x[key][0] > cutoff:
#             temp.append(x)
#         else:
#             return temp
#     return temp

# trimmed_disc_models = trim_models(disc_models, "discrimination", 0.925)
# trimmed_ins_models = trim_models(ins_models, "insertion", 0.3)

# print(len(trimmed_disc_models), len(trimmed_ins_models))

In [14]:
trimmed_ins_models = ins_models[:20]
trimmed_disc_models = disc_models[:20]

In [15]:
print(trimmed_disc_models[0])
print(trimmed_ins_models[0])

{'hparams': {'loss': 'margin', 'input_dropout': 0.5, 'hidden_state': 500, 'hidden_layers': 1, 'hidden_dropout': 0.4000000000000001, 'num_epochs': 50, 'margin': 6.0, 'lr': 0.001, 'l2_reg_lambda': 0.0, 'use_bn': False, 'task': 'insertion', 'bidirectional': False, 'dpout_model': 0.05}, 'discrimination': [0.9303130929791271, [0.8889121338912134, 0.9290502793296089, 0.9807304785894206]], 'insertion': [0.3124241023790151, [0.4978423656770935, 0.21919521886637955, 0.13121009807745407]]}
{'hparams': {'loss': 'margin', 'input_dropout': 0.5, 'hidden_state': 500, 'hidden_layers': 1, 'hidden_dropout': 0.4000000000000001, 'num_epochs': 50, 'margin': 6.0, 'lr': 0.001, 'l2_reg_lambda': 0.0, 'use_bn': False, 'task': 'insertion', 'bidirectional': False, 'dpout_model': 0.05}, 'discrimination': [0.9303130929791271, [0.8889121338912134, 0.9290502793296089, 0.9807304785894206]], 'insertion': [0.3124241023790151, [0.4978423656770935, 0.21919521886637955, 0.13121009807745407]]}


In [22]:
print(trimmed_disc_models[0]==trimmed_ins_models[0])

True


In [16]:
def get_params(l):
    params = {k: [v] for k, v in l[0]["hparams"].items()}
    for x in l:
        for k, v in x["hparams"].items():
            if v not in params[k]:
                params[k].append(v)
    return params

In [17]:
disc_params = get_params(trimmed_disc_models)
ins_params = get_params(trimmed_ins_models)
print(disc_params)
print(ins_params)

{'loss': ['margin'], 'input_dropout': [0.5], 'hidden_state': [500], 'hidden_layers': [1, 2], 'hidden_dropout': [0.4000000000000001, 0.2, 0.30000000000000004], 'num_epochs': [50], 'margin': [6.0, 5.0, 4.0], 'lr': [0.001], 'l2_reg_lambda': [0.0], 'use_bn': [False], 'task': ['insertion'], 'bidirectional': [False], 'dpout_model': [0.05, 0.1, 0.0]}
{'loss': ['margin'], 'input_dropout': [0.5], 'hidden_state': [500], 'hidden_layers': [1, 2], 'hidden_dropout': [0.4000000000000001, 0.2, 0.30000000000000004], 'num_epochs': [50], 'margin': [6.0, 4.0, 5.0], 'lr': [0.001], 'l2_reg_lambda': [0.0], 'use_bn': [False], 'task': ['insertion'], 'bidirectional': [False], 'dpout_model': [0.05, 0.1, 0.0]}


In [18]:
params_exp = [
    "input_dropout",
    "hidden_layers",
    "hidden_dropout",
    "margin",
    "l2_reg_lambda",
    "dpout_model",
    "task",
]

In [19]:
def print_params(params, d):
    def print_list(l):
        s = ""
        for x in l:
            s += str(x) + " > "
        return s[:-3]

    for k, v in d.items():
        if k in params:
            print(f"{k}: {print_list(v)}")

In [20]:
print_params(params_exp, disc_params)
print()
print_params(params_exp, ins_params)

input_dropout: 0.5
hidden_layers: 1 > 2
hidden_dropout: 0.4000000000000001 > 0.2 > 0.30000000000000004
margin: 6.0 > 5.0 > 4.0
l2_reg_lambda: 0.0
task: insertion
dpout_model: 0.05 > 0.1 > 0.0

input_dropout: 0.5
hidden_layers: 1 > 2
hidden_dropout: 0.4000000000000001 > 0.2 > 0.30000000000000004
margin: 6.0 > 4.0 > 5.0
l2_reg_lambda: 0.0
task: insertion
dpout_model: 0.05 > 0.1 > 0.0


In [21]:
def print_scores(l, key):
    print([f"{x[key][0]:.4f}" for x in l])


print_scores(trimmed_disc_models, "discrimination")
print()
print_scores(trimmed_ins_models, "insertion")

['0.9303', '0.9289', '0.9288', '0.9286', '0.9284', '0.9271', '0.9265', '0.9263', '0.9263', '0.9259', '0.9259', '0.9258', '0.9258', '0.9258', '0.9251', '0.9251', '0.9251', '0.9245', '0.9241', '0.9239']

['0.3124', '0.3093', '0.3085', '0.3056', '0.3050', '0.3050', '0.3050', '0.3040', '0.3039', '0.3035', '0.3031', '0.3030', '0.3028', '0.3025', '0.3025', '0.3021', '0.3019', '0.3016', '0.3011', '0.3010']


In [88]:
print("input_dropout", np.arange(0.5, 0.71, 0.1).tolist())
print("hidden_layers", np.arange(1, 3).tolist())
print("hidden_dropout", np.arange(0.2, 0.41, 0.1).tolist())
print("margin", np.arange(4, 6.1, 1).tolist())
print("l2_reg_lambda", np.arange(0, 0.11, 0.1).tolist())
print("dpout_model", np.arange(0, 0.11, 0.05).tolist())
print("task", ["discrimination"])

input_dropout [0.5, 0.6, 0.7]
hidden_layers [1, 2]
hidden_dropout [0.2, 0.30000000000000004, 0.4000000000000001]
margin [4.0, 5.0, 6.0]
l2_reg_lambda [0.0, 0.1]
dpout_model [0.0, 0.05, 0.1]
task ['discrimination']


In [99]:
def read_json(path):
    with open(path) as f:
        return json.load(f)

In [107]:
def print_json(d):
    print(" | ", d["discrimination"][0], " | ", d["insertion"][0], " | ")

In [110]:
json_data = read_json("results/sigmoid/sigmoid-0.7988.json")

In [111]:
print_json(json_data)

 |  0.8039373814041746  |  0.2146964623152086  | 


In [40]:
scores = ['0.3091', '0.3082', '0.3072', '0.3069', '0.3069', '0.3068', '0.3064', '0.3062', '0.3059', '0.3054', '0.3054', '0.3048', '0.3048', '0.3046', '0.3043', '0.3042', '0.3041', '0.3040', '0.3036', '0.3036']
t = pd.DataFrame({"scores":scores})
t["scores"] = t["scores"].astype(float)
x = t.describe()

In [41]:
for k,v in x["scores"].to_dict().items():
    print(" | ", k, " | ", v, " | ")

 |  count  |  20.0  | 
 |  mean  |  0.30562000000000006  | 
 |  std  |  0.001560903921652955  | 
 |  min  |  0.3036  | 
 |  25%  |  0.304275  | 
 |  50%  |  0.3054  | 
 |  75%  |  0.306825  | 
 |  max  |  0.3091  | 
