In [2]:
import json
import os
import math

template_fn = os.path.abspath("../experiments/exp-3_uni-1-layer_32-32.json")

def save_config(filename, config):
    with open(filename, "w") as outfile:
        json.dump(config, outfile, indent=4)

def load_template(filename):
    with open(template_fn, "r") as template_file:
        config = json.loads(template_file.read())
    return config

def build_bash(bash_fn ,config_list):
    header = """#Run the experiments
    set -x
    source activate allennlp
    """
    single_exp = """allennlp train {conf} -s {serial} --include-package allen_linguo
    """
    serial_base = os.path.abspath("../results")
    with open(bash_fn, "w") as bash_file:
        bash_file.write(header)
        for conf_path in config_list:
            exp_name = os.path.basename(conf_path).replace(".json","")
            serial = os.path.join(serial_base,exp_name)
            bash_file.write(single_exp.format(conf=conf_path, serial=serial))
            
def calculate_model_parameters(e,h,L, uni=True):
    if uni:
        resp = 4*((e+1)*h + h**2 + h)  +  (L-1) * 4*((h+1)*h + h**2 + h) + 2*h + e + 2
    else:
        resp = 2*(4*((e+1)*h + h**2 + h)  +  (L-1) * 4*((2*h+1)*h + h**2 + h ) + 2*h ) + e + 2
    return resp

def get_hidden(e,L,p,uni=True):
    if uni:
        root1= (math.sqrt(4*e**2 + 8*e*(L + 1) + 16*L**2 + 8*L*(p - 1) - 4*p + 9) + 2*e + 4*L + 1)/(4 - 8*L)
        root2= (math.sqrt(4*e**2 + 8*e*(L + 1) + 16*L**2 + 8*L*(p - 1) - 4*p + 9) - 2*e - 4*L - 1)/(8*L - 4)
    else:
        root1=(math.sqrt(16*L**2 + 6*L*p + 10*e*L - 4*L - 4*p + 4*e**2 + 8*e + 9) + 4*L + 2*e + 1)/(8 - 12*L)
        root2=(-math.sqrt(16*L**2 + 6*L*p + 10*e*L - 4*L - 4*p + 4*e**2 + 8*e + 9) + 4*L + 2*e + 1)/(8 - 12*L)
    if root1<=0:
        root = root2
    elif root2<=0:
        root = root1
    else:
        root= min(root1,root2)
    #Choose whichever lands you the closest to the original number of params
    error_ceil = abs(p-calculate_model_parameters(e,math.ceil(root),L,uni=uni))
    error_floor = abs(p-calculate_model_parameters(e,math.floor(root),L,uni=uni))
    if error_ceil<error_floor:
        return math.ceil(root)
    else:
        return math.floor(root)

In [3]:
math.ceil(0.2)

1

In [4]:
e = 32
h_list = [32,64,]
L_list = [1,2,3]
for h in h_list:
    for L in L_list:
        #uni = calculate_model_parameters(e,h,L)
        params = calculate_model_parameters(e,h,L,uni=False)
        print(f"{e}-{h}, {L} layers bi: {params}")
        estimated_h = get_hidden(e,L,params,uni=False)
        print(f"You predicted, {estimated_h} for {h}")
        new_params = calculate_model_parameters(e,estimated_h,L,uni=False)
        dif= new_params-params
        print(f"That cost you a difference of: {dif}")




32-32, 1 layers bi: 17058
You predicted, 32 for 32
That cost you a difference of: 0
32-32, 2 layers bi: 42146
You predicted, 32 for 32
That cost you a difference of: 0
32-32, 3 layers bi: 67234
You predicted, 32 for 32
That cost you a difference of: 0
32-64, 1 layers bi: 50466
You predicted, 64 for 64
That cost you a difference of: 0
32-64, 2 layers bi: 149794
You predicted, 64 for 64
That cost you a difference of: 0
32-64, 3 layers bi: 249122
You predicted, 64 for 64
That cost you a difference of: 0


In [5]:
emb = 64
h_options_uni={}
h_options_uni[1] = [16,32,64,96,122,144,164,182,200,216]
h_options_bi = {}
param_range = []
range_differences = []
for h in h_options_uni[1]:
    param_range.append(calculate_model_parameters(emb,h,1,uni=True))
    if len(param_range)>=2:
        range_differences.append(param_range[-1]-param_range[-2])
print(f"parameters:{param_range}")
print(f"Differences:{range_differences}")
for L in [2,3,4]:
    h_options_uni[L] = []
    for target_params in param_range:
        hidden = get_hidden(emb,L,target_params)
        h_options_uni[L].append(hidden)
        new_params = calculate_model_parameters(emb,hidden,L)
        print(f"Difference of {new_params-target_params}")
    print(h_options_uni[L])
    
print("Bi-directional")
for L in [1,2,3,4]:
    h_options_bi[L] = []
    for target_params in param_range:
        hidden = get_hidden(emb,L,target_params,uni=False)
        h_options_bi[L].append(hidden)
        new_params = calculate_model_parameters(emb,hidden,L,uni=False)
        print(f"Difference of {new_params-target_params}")
    print(h_options_bi[L])

    

parameters:[5346, 12674, 33474, 62466, 92054, 121314, 151274, 180974, 213266, 244146]
Differences:[7328, 20800, 28992, 29588, 29260, 29960, 29700, 32292, 30880]
Difference of -264
Difference of 42
Difference of 562
Difference of 716
Difference of 258
Difference of 612
Difference of -1122
Difference of 308
Difference of -1164
Difference of 1176
[12, 23, 43, 62, 77, 90, 101, 112, 122, 132]
Difference of 242
Difference of -30
Difference of -700
Difference of -562
Difference of -366
Difference of -406
Difference of -648
Difference of -1212
Difference of -1808
Difference of 1568
[11, 19, 34, 49, 61, 71, 80, 88, 96, 104]
Difference of -402
Difference of 414
Difference of 492
Difference of -828
Difference of -1196
Difference of 630
Difference of -2016
Difference of -1658
Difference of -1148
Difference of -1728
[9, 17, 30, 42, 52, 61, 68, 75, 82, 88]
Bi-directional
Difference of 156
Difference of 388
Difference of -492
Difference of -180
Difference of -32
Difference of -552
Difference of -440


In [6]:
print(calculate_model_parameters(64,16,1))
print(calculate_model_parameters(64,16,1,uni=False))

5346
10626


In [11]:
# For experiment 3.2 (varying encoder depth and directionality on smaller dimensions)

config = load_template(template_fn)
exp_list = []
base_filename = os.path.abspath("../experiments/exp-3.2_{}.json") 
# common settings

e_dim = 64

for num_layers in [1,2,3,4]:
    embedding_dim=e_dim
    print(f"Uni {num_layers} layers")
    print(h_options_uni[num_layers])
    print(f"Bi {num_layers} layers")
    print(h_options_bi[num_layers])
    for hidden_dim in h_options_uni[num_layers]:
        config["model"]["word_embeddings"]["token_embedders"]["tokens"]["embedding_dim"] = embedding_dim
        config["model"]["encoder"]["hidden_size"] = hidden_dim
        config["model"]["encoder"]["input_size"] = embedding_dim
        condition= "uni-{}-layer{}-{}".format(num_layers,embedding_dim,hidden_dim)
        config["model"]["encoder"]["num_layers"] = num_layers
        config["model"]["encoder"]["bidirectional"] = False
        fn = base_filename.format(condition)
        exp_list.append(os.path.basename(fn.replace(".json","")))
        save_config(fn, config)
        
for num_layers in [1,2,3,4]:
    #Bidirectional (1-2-3 layer)
    for hidden_dim in h_options_bi[num_layers]:
        config["model"]["word_embeddings"]["token_embedders"]["tokens"]["embedding_dim"] = embedding_dim
        config["model"]["encoder"]["hidden_size"] = hidden_dim
        config["model"]["encoder"]["input_size"] = embedding_dim
        condition= "bi-{}-layer{}-{}".format(num_layers,embedding_dim,hidden_dim)
        config["model"]["encoder"]["num_layers"] = num_layers
        config["model"]["encoder"]["bidirectional"] = True
        fn = base_filename.format(condition)
        exp_list.append(os.path.basename(fn.replace(".json","")))
        save_config(fn, config)


element_list = " ".join(exp_list)
bash_line = f"({element_list})"
print(bash_line)

Uni 1 layers
[16, 32, 64, 96, 122, 144, 164, 182, 200, 216]
Bi 1 layers
[9, 19, 39, 61, 79, 94, 108, 121, 133, 145]
Uni 2 layers
[12, 23, 43, 62, 77, 90, 101, 112, 122, 132]
Bi 2 layers
[7, 13, 25, 36, 46, 54, 61, 67, 74, 79]
Uni 3 layers
[11, 19, 34, 49, 61, 71, 80, 88, 96, 104]
Bi 3 layers
[6, 11, 20, 29, 36, 42, 47, 52, 57, 61]
Uni 4 layers
[9, 17, 30, 42, 52, 61, 68, 75, 82, 88]
Bi 4 layers
[5, 9, 17, 25, 30, 35, 40, 44, 48, 52]
(exp-3.2_uni-1-layer64-16 exp-3.2_uni-1-layer64-32 exp-3.2_uni-1-layer64-64 exp-3.2_uni-1-layer64-96 exp-3.2_uni-1-layer64-122 exp-3.2_uni-1-layer64-144 exp-3.2_uni-1-layer64-164 exp-3.2_uni-1-layer64-182 exp-3.2_uni-1-layer64-200 exp-3.2_uni-1-layer64-216 exp-3.2_uni-2-layer64-12 exp-3.2_uni-2-layer64-23 exp-3.2_uni-2-layer64-43 exp-3.2_uni-2-layer64-62 exp-3.2_uni-2-layer64-77 exp-3.2_uni-2-layer64-90 exp-3.2_uni-2-layer64-101 exp-3.2_uni-2-layer64-112 exp-3.2_uni-2-layer64-122 exp-3.2_uni-2-layer64-132 exp-3.2_uni-3-layer64-11 exp-3.2_uni-3-layer64-19 ex

In [None]:
8463072

In [None]:
mystr