In [1]:
import json
import os

template_fn = os.path.abspath("../experiments/exp_1_no-WS_350-1000.json")

def save_config(filename, config):
    with open(filename, "w") as outfile:
        json.dump(config, outfile, indent=4)

def load_template(filename):
    with open(template_fn, "r") as template_file:
        config = json.loads(template_file.read())
    return config

def build_bash(bash_fn ,config_list):
    header = """#Run the experiments
    set -x
    source activate allennlp
    """
    single_exp = """allennlp train {conf} -s {serial} --include-package allen_linguo
    """
    serial_base = os.path.abspath("../results")
    with open(bash_fn, "w") as bash_file:
        bash_file.write(header)
        for conf_path in config_list:
            exp_name = os.path.basename(conf_path).replace(".json","")
            serial = os.path.join(serial_base,exp_name)
            bash_file.write(single_exp.format(conf=conf_path, serial=serial))


In [42]:
dim = 32
base_fn = os.path.abspath("../experiments/")
base_fn = os.path.join(base_fn, "exp-1.1_no-WS_{}-{}.json")
exp_files = []
while dim < 1024:
    config["model"]["word_embeddings"]["token_embedders"]["tokens"]["embedding_dim"] = dim
    config["model"]["encoder"]["input_size"] = dim
    config["model"]["encoder"]["hidden_size"] = dim
    filename = base_fn.format(dim,dim)
    exp_files.append(filename)
    save_config(filename, config)
    dim = dim*2
    
config["model"]["encoder"]["hidden_size"] = 1024
filename = base_fn.format(512,1024)
exp_files.append(filename)
save_config(filename, config)

bash_fn = os.path.abspath("../pbs_scripts/run_all-1.1.sh")

build_bash(bash_fn, exp_files)

In [35]:
dim = 32
base_fn = os.path.abspath("../experiments/")
base_fn = os.path.join(base_fn, "exp-1.1_no-WS_{}-{}.json")
exp_files = []
while dim < 1024:
    config["model"]["word_embeddings"]["token_embedders"]["tokens"]["embedding_dim"] = dim
    config["model"]["encoder"]["input_size"] = dim
    config["model"]["encoder"]["hidden_size"] = dim
    filename = base_fn.format(dim,dim)
    exp_files.append(filename)
    save_config(filename, config)
    dim = dim*2
    
config["model"]["encoder"]["hidden_size"] = 1024
filename = base_fn.format(512,1024)
exp_files.append(filename)
save_config(filename, config)

bash_fn = os.path.abspath("../pbs_scripts/run_all-1.1.sh")

build_bash(bash_fn, exp_files)

In [51]:
config = load_template(template_fn)

#For exp 1.2
exp_list = []
# For random initialization 
config["model"]["word_embeddings"]["token_embedders"]["tokens"]["embedding_dim"] = 300
config["model"]["encoder"]["hidden_size"] = 512
config["model"]["encoder"]["input_size"] = 300
random_fn = os.path.abspath("../experiments/exp-1.2_random.json")
exp_list.append(random_fn)
save_config(random_fn, config)

# For pretrained with refinement
config["model"]["word_embeddings"]["token_embedders"]["tokens"]["pretrained_file"] =  "/home/lab/Pablo/darth_linguo/resources/SBW-vectors-300-min5.txt"
config["model"]["word_embeddings"]["token_embedders"]["tokens"]["trainable"] = True
pretrained_cont_fn = os.path.abspath("../experiments/exp-1.2_pretrained-cont.json")
exp_list.append(pretrained_cont_fn)
save_config(pretrained_cont_fn, config)

#Pretrainde with frozen weights
pretrained_freeze_fn = os.path.abspath("../experiments/exp-1.2_pretrained-freeze.json")
config["model"]["word_embeddings"]["token_embedders"]["tokens"]["trainable"] = False
exp_list.append(pretrained_freeze_fn)
save_config(pretrained_freeze_fn, config)

script_fn = os.path.abspath("../pbs_scripts/run_all-1.2.sh")

build_bash(script_fn, exp_list)


In [53]:

# For experiment 2 (varying encoder depth and directionality)

config = load_template(template_fn)
exp_list = []
base_filename = os.path.abspath("../experiments/exp-2.1_{}.json") 
# common settings
config["model"]["word_embeddings"]["token_embedders"]["tokens"]["embedding_dim"] = 512
config["model"]["encoder"]["hidden_size"] = 1024
config["model"]["encoder"]["input_size"] = 512
expnum = 1

# Unidirectional (1-2-3 layer) Encoder
for num_layers in [1,2,3]:
    condition= "{}uni-{}-layer".format(expnum, num_layers)
    config["model"]["encoder"]["num_layers"] = num_layers
    fn = base_filename.format(condition)
    exp_list.append(fn)
    save_config(fn, config)
    expnum+=1
    
#Bidirectional (1-2-3 layer)
for num_layers in [1,2,3]:
    condition= "{}bi-{}-layer".format(expnum,num_layers)
    config["model"]["encoder"]["num_layers"] = num_layers
    config["model"]["encoder"]["bidirectional"] = True
    fn = base_filename.format(condition)
    exp_list.append(fn)
    save_config(fn, config)
    expnum += 1

script_fn = os.path.abspath("../pbs_scripts/run_all-2.1.sh")

build_bash(script_fn, exp_list)



In [4]:
# For experiment 3.1 (varying encoder depth and directionality on smaller dimensions)

config = load_template(template_fn)
exp_list = []
base_filename = os.path.abspath("../experiments/exp-2.1_{}.json") 
# common settings

encoder_dimensions = [(32,32),(32,64),(64,64),(64,128),(128,128)]

for embedding_dim, hidden_dim in encoder_dimensions:
    config["model"]["word_embeddings"]["token_embedders"]["tokens"]["embedding_dim"] = embedding_dim
    config["model"]["encoder"]["hidden_size"] = hidden_dim
    config["model"]["encoder"]["input_size"] = embedding_dim

    # Unidirectional (1-2-3 layer) Encoder
    for num_layers in [1,2,3]:
        condition= "uni-{}-layer{}-{}".format(num_layers,embedding_dim,hidden_dim)
        config["model"]["encoder"]["num_layers"] = num_layers
        fn = base_filename.format(condition)
        exp_list.append(fn)
        save_config(fn, config)

    #Bidirectional (1-2-3 layer)
    for num_layers in [1,2,3]:
        condition= "bi-{}-layer{}-{}".format(num_layers,embedding_dim,hidden_dim)
        config["model"]["encoder"]["num_layers"] = num_layers
        config["model"]["encoder"]["bidirectional"] = True
        fn = base_filename.format(condition)
        exp_list.append(fn)
        save_config(fn, config)
        
    script_fn = os.path.abspath("../pbs_scripts/run_all-3.sh")

build_bash(script_fn, exp_list)

In [37]:
# Make the Bash file
bash_fn = os.path.abspath("../pbs_scripts/run_all-1.1.sh")
header = """#Run the experiments
set -x
source activate allennlp
"""
single_exp = """allennlp train {conf} -s {serial} --include-package allen_linguo
"""
serial_base = os.path.abspath("../results")
with open(bash_fn, "w") as bash_file:
    bash_file.write(header)
    for conf_path in exp_files:
        exp_name = os.path.basename(conf_path).replace(".json","")
        serial = os.path.join(serial_base,exp_name)
        bash_file.write(single_exp.format(conf=conf_path, serial=serial))


TypeError: dump() missing 1 required positional argument: 'fp'

In [5]:
# For experiment 1.3 Embedding sizes
config = load_template(template_fn)

dim = 16
base_fn = os.path.abspath("../experiments/")

base_fn = os.path.join(base_fn, "exp-1.3_{}-512.json")
exp_files = []
#Hidden size is kept at 512
config["model"]["encoder"]["hidden_size"] = 512
while dim < 1024:
    config["model"]["word_embeddings"]["token_embedders"]["tokens"]["embedding_dim"] = dim
    config["model"]["encoder"]["input_size"] = dim    
    filename = base_fn.format(dim)
    exp_files.append(filename)
    save_config(filename, config)
    dim = dim*2

bash_fn = os.path.abspath("../pbs_scripts/run_all-1.3.sh")

build_bash(bash_fn, exp_files)

'Pablo hola'

In [12]:
mystr

'{} hola'