In [37]:
import os
import itertools

In [38]:
def mkdir(dir):
    if not os.path.exists(dir):
        os.mkdir(dir)

In [39]:
# global job parameters

job_directory = f"math"
out_dir = f'{job_directory}/.out'
time_str = '00-24:00:00'
partition = 'gpu'
ntasks = 1
nodes = 1
cpu_per_task = 8
mem_per_cpu = 2
n_gpus = 1
# gpus_constraints = '"a100|rtx3090|v100|rtx2080ti"' # all gpus are pretty good now
project_dir = "/home/ma2393/project/abstract_transformer/experiments/math"

mkdir(job_directory)
mkdir(out_dir)

In [40]:
# define params of individual jobs

model_params = [
    # dict(e_sa=8, e_rca=0, d_sa=8, d_rca=0, d_cross=8, e_n_layers=2, d_n_layers=2, d_model=144, dff=144*2, symbol_type='NA', rca_type='NA'),
    dict(e_sa=4, e_rca=4, d_sa=4, d_rca=4, d_cross=8, e_n_layers=2, d_n_layers=2, d_model=128, dff=256, symbol_type='symbolic_attention', rca_type='relational_attention'),
    dict(e_sa=4, e_rca=4, d_sa=8, d_rca=0, d_cross=8, e_n_layers=2, d_n_layers=2, d_model=128, dff=256, symbol_type='symbolic_attention', rca_type='relational_attention'),
    dict(e_sa=4, e_rca=4, d_sa=4, d_rca=4, d_cross=8, e_n_layers=2, d_n_layers=2, d_model=128, dff=256, symbol_type='position_relative', rca_type='relational_attention'),
    dict(e_sa=4, e_rca=4, d_sa=8, d_rca=0, d_cross=8, e_n_layers=2, d_n_layers=2, d_model=128, dff=256, symbol_type='position_relative', rca_type='relational_attention'),
]

In [41]:
tasks =  ['algebra__linear_1d', 'polynomials__add', 'polynomials__expand', 'calculus__differentiate', 'algebra__sequence_next_term']
n_epochs = 100
batch_size = 512

In [42]:
jobs_params = []
for model_p, task in itertools.product(model_params, tasks):
    jobs_params.append({**model_p, 'task': task, 'n_epochs': n_epochs, 'batch_size': batch_size})
print(jobs_params)
print(len(jobs_params))

[{'e_sa': 4, 'e_rca': 4, 'd_sa': 4, 'd_rca': 4, 'd_cross': 8, 'e_n_layers': 2, 'd_n_layers': 2, 'd_model': 128, 'dff': 256, 'symbol_type': 'symbolic_attention', 'rca_type': 'relational_attention', 'task': 'algebra__linear_1d', 'n_epochs': 100, 'batch_size': 512}, {'e_sa': 4, 'e_rca': 4, 'd_sa': 4, 'd_rca': 4, 'd_cross': 8, 'e_n_layers': 2, 'd_n_layers': 2, 'd_model': 128, 'dff': 256, 'symbol_type': 'symbolic_attention', 'rca_type': 'relational_attention', 'task': 'polynomials__add', 'n_epochs': 100, 'batch_size': 512}, {'e_sa': 4, 'e_rca': 4, 'd_sa': 4, 'd_rca': 4, 'd_cross': 8, 'e_n_layers': 2, 'd_n_layers': 2, 'd_model': 128, 'dff': 256, 'symbol_type': 'symbolic_attention', 'rca_type': 'relational_attention', 'task': 'polynomials__expand', 'n_epochs': 100, 'batch_size': 512}, {'e_sa': 4, 'e_rca': 4, 'd_sa': 4, 'd_rca': 4, 'd_cross': 8, 'e_n_layers': 2, 'd_n_layers': 2, 'd_model': 128, 'dff': 256, 'symbol_type': 'symbolic_attention', 'rca_type': 'relational_attention', 'task': 'calcul

In [43]:
# create jobs
created_jobs = []
for params in jobs_params:

    job_name = f"math--{params['task']}-e_sa{params['e_sa']}-e_rca{params['e_rca']}-d_sa{params['d_sa']}-d_rca{params['d_rca']}-el{params['e_n_layers']}-dl{params['d_n_layers']}-rca_type_{params['rca_type']}"

    job_file = os.path.join(job_directory, f"{job_name}.job")

    with open(job_file, 'w') as fh:
        fh.writelines(f"#!/bin/bash\n")
        fh.writelines(f"#SBATCH --partition={partition}\n")
        fh.writelines(f"#SBATCH --job-name={job_name}\n")
        fh.writelines(f"#SBATCH --output={out_dir}/%j-{job_name}.out\n")
        fh.writelines(f"#SBATCH --ntasks={ntasks} --nodes={nodes}\n")
        fh.writelines(f"#SBATCH --cpus-per-task={cpu_per_task}\n")
        fh.writelines(f"#SBATCH --mem-per-cpu={mem_per_cpu}G\n")
        fh.writelines(f"#SBATCH --time={time_str}\n")
        fh.writelines(f"#SBATCH --mail-type=ALL\n")
        fh.writelines(f"#SBATCH --gpus={n_gpus}\n")
        # fh.writelines(f"#SBATCH -C {gpus_constraints}\n")# --gpus={n_gpus}\n")

        fh.writelines('\n')
        fh.writelines('module load StdEnv\n')
        fh.writelines('export SLURM_EXPORT_ENV=ALL\n')
        fh.writelines('\n')

        # fh.writelines(f"module restore python_env\n") # load modules i need
        fh.writelines(f"module load miniconda\n") # load modules i need
        # fh.writelines(f"conda init\n")
        fh.writelines(f"conda activate abstract_transformer\n") # activate conda environment
        fh.writelines(f"conda info --envs\n") # activate conda environment

        fh.writelines('\n')
        fh.writelines(f"nvidia-smi -L\n") # print gpu information
        fh.writelines('\n')

        fh.writelines(f"cd {project_dir}\n") # navigate to project directory
        # run python script
        fh.writelines(f"python train_model.py ")
        fh.writelines(f"--task {params['task']} --n_epochs {params['n_epochs']} --batch_size {params['batch_size']} ")
        fh.writelines(f"--e_sa {params['e_sa']} --e_rca {params['e_rca']} --d_sa {params['d_sa']} --d_rca {params['d_rca']} --d_cross {params['d_cross']} ")
        fh.writelines(f"--d_model {params['d_model']} --dff {params['dff']} --rca_type {params['rca_type']} --symbol_type {params['symbol_type']} ")
        fh.writelines(f"--e_n_layers {params['e_n_layers']} --d_n_layers {params['d_n_layers']}\n")

    created_jobs.append(job_file)

In [44]:
proceed = input("Run all jobs? [Y/y or N/n]")
if proceed in ('Y', 'y'):
    for job in created_jobs:
        os.system(f'sbatch {job}')

Submitted batch job 8130
Submitted batch job 8131
Submitted batch job 8132
Submitted batch job 8133
Submitted batch job 8134
Submitted batch job 8135
Submitted batch job 8136
Submitted batch job 8137
Submitted batch job 8138
Submitted batch job 8139
Submitted batch job 8140
Submitted batch job 8141
Submitted batch job 8142
Submitted batch job 8143
Submitted batch job 8144
Submitted batch job 8145
Submitted batch job 8146
Submitted batch job 8147
Submitted batch job 8148
Submitted batch job 8149


In [45]:
created_jobs

['math/math--algebra__linear_1d-e_sa4-e_rca4-d_sa4-d_rca4-el2-dl2-rca_type_disentangled_v2.job',
 'math/math--polynomials__add-e_sa4-e_rca4-d_sa4-d_rca4-el2-dl2-rca_type_disentangled_v2.job',
 'math/math--polynomials__expand-e_sa4-e_rca4-d_sa4-d_rca4-el2-dl2-rca_type_disentangled_v2.job',
 'math/math--calculus__differentiate-e_sa4-e_rca4-d_sa4-d_rca4-el2-dl2-rca_type_disentangled_v2.job',
 'math/math--algebra__sequence_next_term-e_sa4-e_rca4-d_sa4-d_rca4-el2-dl2-rca_type_disentangled_v2.job',
 'math/math--algebra__linear_1d-e_sa4-e_rca4-d_sa8-d_rca0-el2-dl2-rca_type_disentangled_v2.job',
 'math/math--polynomials__add-e_sa4-e_rca4-d_sa8-d_rca0-el2-dl2-rca_type_disentangled_v2.job',
 'math/math--polynomials__expand-e_sa4-e_rca4-d_sa8-d_rca0-el2-dl2-rca_type_disentangled_v2.job',
 'math/math--calculus__differentiate-e_sa4-e_rca4-d_sa8-d_rca0-el2-dl2-rca_type_disentangled_v2.job',
 'math/math--algebra__sequence_next_term-e_sa4-e_rca4-d_sa8-d_rca0-el2-dl2-rca_type_disentangled_v2.job',
 'ma