In [18]:
import sys, os
import datetime


### Thursday 241205. Fragments. Hidden dimensions sweep [1-4096]

In [None]:
env_cmd = "source activate cleo"
script_name = 'train_surrogate.py'
home_path = '/home/jq01/projects/active_learning/cleo'
script_path = os.path.join(home_path, script_name)

run_prefix = "241205_super_fragment"
base_config = "train_super_fragment"
hidden_dims = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096]

commands_list = []

for hidden_dim in hidden_dims:
    run_name = f"{run_prefix}_hidden{hidden_dim}"
    cmd = f"python {script_path} -cn {base_config} run_name={run_name} model.base_model.hidden_dim={hidden_dim}"
    commands_list.append(cmd)


In [23]:
tasks_list = []
for cmd in commands_list:
    task = f"{env_cmd}; cd {home_path}; {cmd}"
    tasks_list.append(task+"\n")

with open(f"{home_path}/cmds/{run_prefix}_hidden_tasks", "w") as f:
    f.writelines(tasks_list)

### Sunday 241208. Fragment. Batchsize sweep [1-64]

In [16]:
env_cmd = "source activate cleo"
script_name = 'train_surrogate.py'
home_path = '/home/jq01/projects/active_learning/cleo'
script_path = os.path.join(home_path, script_name)

run_prefix = "241208_batchsize_frag"
base_config = "241208_train_batchsize_frag"
train_batch_size = [1,2,4,16,32,64]

commands_list = []

for bath_size in train_batch_size:
    run_name = f"{run_prefix}_batchsize{bath_size}"
    cmd = f"python {script_path} -cn {base_config} run_name={run_name} data.train_batch_size={bath_size}"
    commands_list.append(cmd)


In [17]:
tasks_list = []
for cmd in commands_list:
    task = f"{env_cmd}; cd {home_path}; {cmd}"
    tasks_list.append(task+"\n")

with open(f"{home_path}/cmds/{run_prefix}_tasks", "w") as f:
    f.writelines(tasks_list)

### Sunday 241208 Fragments. Learning rate sweep [1e-1 - 1e-5]

In [20]:
env_cmd = "source activate cleo"
script_name = 'train_surrogate.py'
home_path = '/home/jq01/projects/active_learning/cleo'
script_path = os.path.join(home_path, script_name)

run_prefix = "241208_learningrate_frag"
base_config = "241208_train_lr_frag"
learning_rate = [1e-5,1e-4,1e-3,1e-2,1e-1]

commands_list = []

for lr in learning_rate:
    run_name = f"{run_prefix}_learningrate{lr}"
    cmd = f"python {script_path} -cn {base_config} run_name={run_name} model.lr={lr}"
    commands_list.append(cmd)


In [21]:
tasks_list = []
for cmd in commands_list:
    task = f"{env_cmd}; cd {home_path}; {cmd}"
    tasks_list.append(task+"\n")

with open(f"{home_path}/cmds/{run_prefix}_tasks", "w") as f:
    f.writelines(tasks_list)

### Monday 241209 Fragments. Learning rate constant 1e-4. Batch size sweep [1-64]

In [3]:
env_cmd = "source activate cleo"
script_name = 'train_surrogate.py'
home_path = '/home/jq01/projects/active_learning/cleo'
script_path = os.path.join(home_path, script_name)

run_prefix = "241209_batchsize_lr1e-4_frag"
base_config = "241209_train_lr1e-4_batchsize_sweep"
train_batch_size = [1,2,4,16,32,64]

commands_list = []

for bath_size in train_batch_size:
    run_name = f"{run_prefix}_batchsize{bath_size}"
    cmd = f"python {script_path} -cn {base_config} run_name={run_name} data.train_batch_size={bath_size}"
    commands_list.append(cmd)


In [4]:
tasks_list = []
for cmd in commands_list:
    task = f"{env_cmd}; cd {home_path}; {cmd}"
    tasks_list.append(task+"\n")

with open(f"{home_path}/cmds/{run_prefix}_tasks", "w") as f:
    f.writelines(tasks_list)

### Thursday 241212 Fragments. Learning rate: 1e-4, hidden dimension: 16, batch size: 16. Number of models sweep [1,5,10,20,40,80,160]

In [9]:
env_cmd = "source activate cleo"
script_name = 'train_surrogate.py'
home_path = '/home/jq01/projects/active_learning/cleo'
script_path = os.path.join(home_path, script_name)

run_prefix = "241212_model_sweep_frag"
base_config = "241212_train_model_sweep_frag"
train_models = [1,5,10,20,40,80,160]

commands_list = []

for models in train_models:
    run_name = f"{run_prefix}_num_models_{models}"
    cmd = f"python {script_path} -cn {base_config} run_name={run_name} model.num_models={models}"
    commands_list.append(cmd)


In [10]:
tasks_list = []
for cmd in commands_list:
    task = f"{env_cmd}; cd {home_path}; {cmd}"
    tasks_list.append(task+"\n")

with open(f"{home_path}/cmds/{run_prefix}_tasks", "w") as f:
    f.writelines(tasks_list)

### ESM embedding comparison training (ESM cambria and ESM 2)

In [15]:

env_cmd = "source activate /home/jq01/.conda/envs/cleo"
script_name = 'train_surrogate.py'
home_path = '/home/jq01/git/cleo_jason_working_branch'

now = datetime.datetime.now()   
date_str = now.strftime("%y%m%d")

esm_model = ['/projects/ml/itopt/datasets/esm_embeddings','/projects/ml/itopt/datasets/esmc_600m_embeddings']
 
sweep_var = esm_model  # this is the hyperparameter to sweep
base_config = f"{date_str}_train_super_esm_vs_esmc"   # this is the yaml file name

cmds_path = os.path.join(home_path, 'cmds')
script_path = os.path.join(home_path, script_name)

input_dim: 1280
hidden_dim: 16
kernel_size: 5

input_dim: 1152
hidden_dim: 32
kernel_size: 162

commands_list = []

for sweep_content in sweep_var:
    
    if 'esm_embeddings' in sweep_content:
        input_dim = 1280
        hidden_dim = 16
        kernel_size = 5
    elif 'esmc_600m_embeddings' in sweep_content:
        input_dim = 1152
        hidden_dim = 16
        kernel_size = 5

    run_name = f"{base_config}_model_{sweep_content.split('/')[-1]}"
    cmd = f"python {script_path} -cn {base_config} run_name={run_name} data.dataset_cfg.path_to_embeddings={sweep_content} model.base_model.input_dim={input_dim} model.base_model.hidden_dim={hidden_dim} model.base_model.kernel_size={kernel_size}"
    commands_list.append(cmd)

In [16]:
tasks_list = []
for cmd in commands_list:
    task = f"{env_cmd}; cd {home_path}; {cmd}"
    tasks_list.append(task+"\n")

with open(f"{cmds_path}/{base_config}_tasks", "w") as f:
    f.writelines(tasks_list)

tasks_list

['source activate /home/jq01/.conda/envs/cleo; cd /home/jq01/git/cleo_jason_working_branch; python /home/jq01/git/cleo_jason_working_branch/train_surrogate.py -cn 241216_train_super_esm_vs_esmc run_name=241216_train_super_esm_vs_esmc_model_esm_embeddings data.dataset_cfg.path_to_embeddings=/projects/ml/itopt/datasets/esm_embeddings model.base_model.input_dim=1280 model.base_model.hidden_dim=16 model.base_model.kernel_size=5\n',
 'source activate /home/jq01/.conda/envs/cleo; cd /home/jq01/git/cleo_jason_working_branch; python /home/jq01/git/cleo_jason_working_branch/train_surrogate.py -cn 241216_train_super_esm_vs_esmc run_name=241216_train_super_esm_vs_esmc_model_esmc_600m_embeddings data.dataset_cfg.path_to_embeddings=/projects/ml/itopt/datasets/esmc_600m_embeddings model.base_model.input_dim=1152 model.base_model.hidden_dim=16 model.base_model.kernel_size=5\n']

In [17]:
submit_script_content = f"""#!/bin/bash
#SBATCH -p gpu
#SBATCH --mem=8g
#SBATCH --gres=gpu:a4000:1
#SBATCH -c 1
#SBATCH -o /home/jq01/git/cleo_jason_working_branch/logs/ajob_%j_%a.out
#SBATCH -J {base_config}
#SBATCH -t 10:00:00

# get line number ${{SLURM_ARRAY_TASK_ID}} from tasks file
CMD=$(sed -n "${{SLURM_ARRAY_TASK_ID}}p" {cmds_path}/{base_config}_tasks)
# tell bash to run $CMD
echo "${{CMD}}" | bash
"""

submit_script_path = os.path.join(home_path, 'submit', f'{base_config}_array.sh')

with open(submit_script_path, 'w') as f:
    f.write(submit_script_content)

print(f"Submit scriptt: \nsbatch -a 1-$(cat {cmds_path}/{base_config}_tasks|wc -l) {submit_script_path}")



Submit scriptt: 
sbatch -a 1-$(cat /home/jq01/git/cleo_jason_working_branch/cmds/241216_train_super_esm_vs_esmc_tasks|wc -l) /home/jq01/git/cleo_jason_working_branch/submit/241216_train_super_esm_vs_esmc_array.sh


### 241216: Fragment training 5 sequential training runs. 
#### hidden 16, lr 1-e4, bathsize: 64, epoch: 500

In [18]:

env_cmd = "source activate /home/jq01/.conda/envs/cleo"
script_name = 'train_surrogate.py'
home_path = '/home/jq01/git/cleo_jason_working_branch'

now = datetime.datetime.now()   
date_str = now.strftime("%y%m%d")

training_runs = [1,2,3,4,5]
sweep_var = training_runs  # this is the hyperparameter to sweep
base_config = f"{date_str}_train_final_fragment"   # this is the yaml file name

cmds_path = os.path.join(home_path, 'cmds')
script_path = os.path.join(home_path, script_name)

commands_list = []

for sweep_content in sweep_var:
    run_name = f"{base_config}_run{sweep_content}"
    cmd = f"python {script_path} -cn {base_config} run_name={run_name}"
    commands_list.append(cmd)

tasks_list = []
for cmd in commands_list:
    task = f"{env_cmd}; cd {home_path}; {cmd}"
    tasks_list.append(task+"\n")

with open(f"{cmds_path}/{base_config}_tasks", "w") as f:
    f.writelines(tasks_list)

tasks_list

['source activate /home/jq01/.conda/envs/cleo; cd /home/jq01/git/cleo_jason_working_branch; python /home/jq01/git/cleo_jason_working_branch/train_surrogate.py -cn 241216_train_final_fragment run_name=241216_train_final_fragment_run1\n',
 'source activate /home/jq01/.conda/envs/cleo; cd /home/jq01/git/cleo_jason_working_branch; python /home/jq01/git/cleo_jason_working_branch/train_surrogate.py -cn 241216_train_final_fragment run_name=241216_train_final_fragment_run2\n',
 'source activate /home/jq01/.conda/envs/cleo; cd /home/jq01/git/cleo_jason_working_branch; python /home/jq01/git/cleo_jason_working_branch/train_surrogate.py -cn 241216_train_final_fragment run_name=241216_train_final_fragment_run3\n',
 'source activate /home/jq01/.conda/envs/cleo; cd /home/jq01/git/cleo_jason_working_branch; python /home/jq01/git/cleo_jason_working_branch/train_surrogate.py -cn 241216_train_final_fragment run_name=241216_train_final_fragment_run4\n',
 'source activate /home/jq01/.conda/envs/cleo; cd /h

In [19]:
submit_script_content = f"""#!/bin/bash
#SBATCH -p gpu
#SBATCH --mem=8g
#SBATCH --gres=gpu:a4000:1
#SBATCH -c 1
#SBATCH -o /home/jq01/git/cleo_jason_working_branch/logs/ajob_%j_%a.out
#SBATCH -J {base_config}
#SBATCH -t 10:00:00

# get line number ${{SLURM_ARRAY_TASK_ID}} from tasks file
CMD=$(sed -n "${{SLURM_ARRAY_TASK_ID}}p" {cmds_path}/{base_config}_tasks)
# tell bash to run $CMD
echo "${{CMD}}" | bash
"""

submit_script_path = os.path.join(home_path, 'submit', f'{base_config}_array.sh')

with open(submit_script_path, 'w') as f:
    f.write(submit_script_content)

print(f"Submit scriptt: \nsbatch -a 1-$(cat {cmds_path}/{base_config}_tasks|wc -l) {submit_script_path}")



Submit scriptt: 
sbatch -a 1-$(cat /home/jq01/git/cleo_jason_working_branch/cmds/241216_train_final_fragment_tasks|wc -l) /home/jq01/git/cleo_jason_working_branch/submit/241216_train_final_fragment_array.sh


### 241216: Sequencing training 5 sequential training runs. 
### hidden 16, lr 1-e4, bathsize: 64, epoch: 500

In [3]:

env_cmd = "source activate /home/jq01/.conda/envs/cleo"
script_name = 'train_surrogate.py'
home_path = '/home/jq01/git/cleo_jason_working_branch'

now = datetime.datetime.now()   
date_str = now.strftime("%y%m%d")

training_runs = [1,2,3,4,5]
sweep_var = training_runs  # this is the hyperparameter to sweep
base_config = f"{date_str}_train_final_sequence"   # this is the yaml file name

cmds_path = os.path.join(home_path, 'cmds')
script_path = os.path.join(home_path, script_name)

commands_list = []

for sweep_content in sweep_var:
    run_name = f"{base_config}_run{sweep_content}"
    cmd = f"python {script_path} -cn {base_config} run_name={run_name}"
    commands_list.append(cmd)

tasks_list = []
for cmd in commands_list:
    task = f"{env_cmd}; cd {home_path}; {cmd}"
    tasks_list.append(task+"\n")

with open(f"{cmds_path}/{base_config}_tasks", "w") as f:
    f.writelines(tasks_list)

tasks_list

['source activate /home/jq01/.conda/envs/cleo; cd /home/jq01/git/cleo_jason_working_branch; python /home/jq01/git/cleo_jason_working_branch/train_surrogate.py -cn 241217_train_final_sequence run_name=241217_train_final_sequence_run1\n',
 'source activate /home/jq01/.conda/envs/cleo; cd /home/jq01/git/cleo_jason_working_branch; python /home/jq01/git/cleo_jason_working_branch/train_surrogate.py -cn 241217_train_final_sequence run_name=241217_train_final_sequence_run2\n',
 'source activate /home/jq01/.conda/envs/cleo; cd /home/jq01/git/cleo_jason_working_branch; python /home/jq01/git/cleo_jason_working_branch/train_surrogate.py -cn 241217_train_final_sequence run_name=241217_train_final_sequence_run3\n',
 'source activate /home/jq01/.conda/envs/cleo; cd /home/jq01/git/cleo_jason_working_branch; python /home/jq01/git/cleo_jason_working_branch/train_surrogate.py -cn 241217_train_final_sequence run_name=241217_train_final_sequence_run4\n',
 'source activate /home/jq01/.conda/envs/cleo; cd /h

In [4]:
submit_script_content = f"""#!/bin/bash
#SBATCH -p gpu
#SBATCH --mem=8g
#SBATCH --gres=gpu:a4000:1
#SBATCH -c 1
#SBATCH -o /home/jq01/git/cleo_jason_working_branch/logs/ajob_%j_%a.out
#SBATCH -J {base_config}
#SBATCH -t 10:00:00

# get line number ${{SLURM_ARRAY_TASK_ID}} from tasks file
CMD=$(sed -n "${{SLURM_ARRAY_TASK_ID}}p" {cmds_path}/{base_config}_tasks)
# tell bash to run $CMD
echo "${{CMD}}" | bash
"""

submit_script_path = os.path.join(home_path, 'submit', f'{base_config}_array.sh')

with open(submit_script_path, 'w') as f:
    f.write(submit_script_content)

print(f"Submit scriptt: \nsbatch -a 1-$(cat {cmds_path}/{base_config}_tasks|wc -l) {submit_script_path}")

Submit scriptt: 
sbatch -a 1-$(cat /home/jq01/git/cleo_jason_working_branch/cmds/241217_train_final_sequence_tasks|wc -l) /home/jq01/git/cleo_jason_working_branch/submit/241217_train_final_sequence_array.sh


### 241217: FRAGMENT training 5 sequential training runs WITHOUT VALIDATION
 
#### hidden 16, lr 1-e4, bathsize: 64, epoch: 500

In [14]:
env_cmd = "source activate /home/jq01/.conda/envs/cleo"
script_name = 'train_surrogate.py'
home_path = '/home/jq01/git/cleo_jason_working_branch'

now = datetime.datetime.now()   
date_str = now.strftime("%y%m%d")

training_runs = [1,2,3,4,5]
sweep_var = training_runs  # this is the hyperparameter to sweep
base_config = f"{date_str}_train_final_fragment_wo_val"   # this is the yaml file name


cmds_path = os.path.join(home_path, 'cmds')
script_path = os.path.join(home_path, script_name)

commands_list = []

for sweep_content in sweep_var:
    run_name = f"{base_config}_run{sweep_content}"
    cmd = f"python {script_path} -cn {base_config} run_name={run_name}"
    commands_list.append(cmd)

tasks_list = []
for cmd in commands_list:
    task = f"{env_cmd}; cd {home_path}; {cmd}"
    tasks_list.append(task+"\n")

with open(f"{cmds_path}/{base_config}_tasks", "w") as f:
    f.writelines(tasks_list)

tasks_list

['source activate /home/jq01/.conda/envs/cleo; cd /home/jq01/git/cleo_jason_working_branch; python /home/jq01/git/cleo_jason_working_branch/train_surrogate.py -cn 241217_train_final_fragment_wo_val run_name=241217_train_final_fragment_wo_val_run1\n',
 'source activate /home/jq01/.conda/envs/cleo; cd /home/jq01/git/cleo_jason_working_branch; python /home/jq01/git/cleo_jason_working_branch/train_surrogate.py -cn 241217_train_final_fragment_wo_val run_name=241217_train_final_fragment_wo_val_run2\n',
 'source activate /home/jq01/.conda/envs/cleo; cd /home/jq01/git/cleo_jason_working_branch; python /home/jq01/git/cleo_jason_working_branch/train_surrogate.py -cn 241217_train_final_fragment_wo_val run_name=241217_train_final_fragment_wo_val_run3\n',
 'source activate /home/jq01/.conda/envs/cleo; cd /home/jq01/git/cleo_jason_working_branch; python /home/jq01/git/cleo_jason_working_branch/train_surrogate.py -cn 241217_train_final_fragment_wo_val run_name=241217_train_final_fragment_wo_val_run4\

In [15]:
submit_script_content = f"""#!/bin/bash
#SBATCH -p gpu
#SBATCH --mem=8g
#SBATCH --gres=gpu:a6000:1
#SBATCH -c 1
#SBATCH -o /home/jq01/git/cleo_jason_working_branch/logs/ajob_%j_%a.out
#SBATCH -J {base_config}
#SBATCH -t 10:00:00

# get line number ${{SLURM_ARRAY_TASK_ID}} from tasks file
CMD=$(sed -n "${{SLURM_ARRAY_TASK_ID}}p" {cmds_path}/{base_config}_tasks)
# tell bash to run $CMD
echo "${{CMD}}" | bash
"""

submit_script_path = os.path.join(home_path, 'submit', f'{base_config}_array.sh')

with open(submit_script_path, 'w') as f:
    f.write(submit_script_content)

print(f"Submit scriptt: \nsbatch -a 1-$(cat {cmds_path}/{base_config}_tasks|wc -l) {submit_script_path}")



Submit scriptt: 
sbatch -a 1-$(cat /home/jq01/git/cleo_jason_working_branch/cmds/241217_train_final_fragment_wo_val_tasks|wc -l) /home/jq01/git/cleo_jason_working_branch/submit/241217_train_final_fragment_wo_val_array.sh


### 241216: SEQUENCE training 5 sequential training runs WITHOUT VALIDATION
 
#### hidden 16, lr 1-e4, bathsize: 64, epoch: 500

In [19]:

env_cmd = "source activate /home/jq01/.conda/envs/cleo"
script_name = 'train_surrogate.py'
home_path = '/home/jq01/git/cleo_jason_working_branch'

now = datetime.datetime.now()   
date_str = now.strftime("%y%m%d")

training_runs = [1,2,3,4,5]
sweep_var = training_runs  # this is the hyperparameter to sweep
base_config = f"{date_str}_train_final_sequence_wo_val"   # this is the yaml file name

cmds_path = os.path.join(home_path, 'cmds')
script_path = os.path.join(home_path, script_name)

commands_list = []

for sweep_content in sweep_var:
    run_name = f"{base_config}_run{sweep_content}"
    cmd = f"python {script_path} -cn {base_config} run_name={run_name}"
    commands_list.append(cmd)

tasks_list = []
for cmd in commands_list:
    task = f"{env_cmd}; cd {home_path}; {cmd}"
    tasks_list.append(task+"\n")

with open(f"{cmds_path}/{base_config}_tasks", "w") as f:
    f.writelines(tasks_list)

tasks_list

['source activate /home/jq01/.conda/envs/cleo; cd /home/jq01/git/cleo_jason_working_branch; python /home/jq01/git/cleo_jason_working_branch/train_surrogate.py -cn 241217_train_final_sequence_wo_val run_name=241217_train_final_sequence_wo_val_run1\n',
 'source activate /home/jq01/.conda/envs/cleo; cd /home/jq01/git/cleo_jason_working_branch; python /home/jq01/git/cleo_jason_working_branch/train_surrogate.py -cn 241217_train_final_sequence_wo_val run_name=241217_train_final_sequence_wo_val_run2\n',
 'source activate /home/jq01/.conda/envs/cleo; cd /home/jq01/git/cleo_jason_working_branch; python /home/jq01/git/cleo_jason_working_branch/train_surrogate.py -cn 241217_train_final_sequence_wo_val run_name=241217_train_final_sequence_wo_val_run3\n',
 'source activate /home/jq01/.conda/envs/cleo; cd /home/jq01/git/cleo_jason_working_branch; python /home/jq01/git/cleo_jason_working_branch/train_surrogate.py -cn 241217_train_final_sequence_wo_val run_name=241217_train_final_sequence_wo_val_run4\

In [20]:
submit_script_content = f"""#!/bin/bash
#SBATCH -p gpu
#SBATCH --mem=8g
#SBATCH --gres=gpu:a6000:1
#SBATCH -c 1
#SBATCH -o /home/jq01/git/cleo_jason_working_branch/logs/ajob_%j_%a.out
#SBATCH -J {base_config}
#SBATCH -t 10:00:00

# get line number ${{SLURM_ARRAY_TASK_ID}} from tasks file
CMD=$(sed -n "${{SLURM_ARRAY_TASK_ID}}p" {cmds_path}/{base_config}_tasks)
# tell bash to run $CMD
echo "${{CMD}}" | bash
"""

submit_script_path = os.path.join(home_path, 'submit', f'{base_config}_array.sh')

with open(submit_script_path, 'w') as f:
    f.write(submit_script_content)

print(f"Submit scriptt: \nsbatch -a 1-$(cat {cmds_path}/{base_config}_tasks|wc -l) {submit_script_path}")

Submit scriptt: 
sbatch -a 1-$(cat /home/jq01/git/cleo_jason_working_branch/cmds/241217_train_final_sequence_wo_val_tasks|wc -l) /home/jq01/git/cleo_jason_working_branch/submit/241217_train_final_sequence_wo_val_array.sh
