In [9]:
import os
import itertools

In [10]:
def mkdir(dir):
    if not os.path.exists(dir):
        os.mkdir(dir)

In [11]:
# global job parameters

job_directory = f"math"
out_dir = f'{job_directory}/.out'
time_str = '00-24:00:00'
partition = 'gpu'
ntasks = 1
nodes = 1
cpu_per_task = 8
mem_per_cpu = 2
n_gpus = 1
# gpus_constraints = '"a100|rtx3090|v100|rtx2080ti"' # all gpus are pretty good now
project_dir = "/home/ma2393/scratch/abstract_transformer/experiments/math"

mkdir(job_directory)
mkdir(out_dir)

In [12]:
# define params of individual jobs

model_params = [
    # dict(e_sa=4, e_rca=4, d_sa=8, d_rca=0, d_cross=8, e_n_layers=2, d_n_layers=2, d_model=128, dff=256, disentangled_rca=0),
    dict(e_sa=4, e_rca=4, d_sa=4, d_rca=4, d_cross=8, e_n_layers=2, d_n_layers=2, d_model=128, dff=256, disentangled_rca=0),
    # dict(e_sa=4, e_rca=0, d_sa=4, d_rca=0, d_cross=4, e_n_layers=2, d_n_layers=2, d_model=128, dff=256, disentangled_rca=0),
    dict(e_sa=8, e_rca=0, d_sa=8, d_rca=0, d_cross=8, e_n_layers=2, d_n_layers=2, d_model=128, dff=256, disentangled_rca=0),
    # dict(e_sa=4, e_rca=4, d_sa=8, d_rca=0, d_cross=8, e_n_layers=2, d_n_layers=2, d_model=128, dff=256, disentangled_rca=1),
    dict(e_sa=4, e_rca=4, d_sa=4, d_rca=4, d_cross=8, e_n_layers=2, d_n_layers=2, d_model=128, dff=256, disentangled_rca=1),
    # dict(e_sa=4, e_rca=0, d_sa=4, d_rca=0, d_cross=4, e_n_layers=2, d_n_layers=2, d_model=128, dff=256, disentangled_rca=1),
    dict(e_sa=8, e_rca=0, d_sa=8, d_rca=0, d_cross=8, e_n_layers=2, d_n_layers=2, d_model=128, dff=256, disentangled_rca=1),
]

In [13]:
tasks =  ['algebra__linear_1d', 'polynomials__add', 'polynomials__expand', 'calculus__differentiate', 'algebra__sequence_next_term']
n_epochs = 100
batch_size = 512

In [14]:
jobs_params = []
for model_p, task in itertools.product(model_params, tasks):
    jobs_params.append({**model_p, 'task': task, 'n_epochs': n_epochs, 'batch_size': batch_size})
print(jobs_params)
print(len(jobs_params))

[{'e_sa': 4, 'e_rca': 4, 'd_sa': 4, 'd_rca': 4, 'd_cross': 8, 'e_n_layers': 2, 'd_n_layers': 2, 'd_model': 128, 'dff': 256, 'disentangled_rca': 0, 'task': 'algebra__linear_1d', 'n_epochs': 100, 'batch_size': 512}, {'e_sa': 4, 'e_rca': 4, 'd_sa': 4, 'd_rca': 4, 'd_cross': 8, 'e_n_layers': 2, 'd_n_layers': 2, 'd_model': 128, 'dff': 256, 'disentangled_rca': 0, 'task': 'polynomials__add', 'n_epochs': 100, 'batch_size': 512}, {'e_sa': 4, 'e_rca': 4, 'd_sa': 4, 'd_rca': 4, 'd_cross': 8, 'e_n_layers': 2, 'd_n_layers': 2, 'd_model': 128, 'dff': 256, 'disentangled_rca': 0, 'task': 'polynomials__expand', 'n_epochs': 100, 'batch_size': 512}, {'e_sa': 4, 'e_rca': 4, 'd_sa': 4, 'd_rca': 4, 'd_cross': 8, 'e_n_layers': 2, 'd_n_layers': 2, 'd_model': 128, 'dff': 256, 'disentangled_rca': 0, 'task': 'calculus__differentiate', 'n_epochs': 100, 'batch_size': 512}, {'e_sa': 4, 'e_rca': 4, 'd_sa': 4, 'd_rca': 4, 'd_cross': 8, 'e_n_layers': 2, 'd_n_layers': 2, 'd_model': 128, 'dff': 256, 'disentangled_rca': 

In [15]:
# create jobs
created_jobs = []
for params in jobs_params:

    job_name = f"math--{params['task']}-e_sa{params['e_sa']}-e_rca{params['e_rca']}-d_sa{params['d_sa']}-d_rca{params['d_rca']}-el{params['e_n_layers']}-dl{params['d_n_layers']}-dis_rca{params['disentangled_rca']}"

    job_file = os.path.join(job_directory, f"{job_name}.job")

    with open(job_file, 'w') as fh:
        fh.writelines(f"#!/bin/bash\n")
        fh.writelines(f"#SBATCH --partition={partition}\n")
        fh.writelines(f"#SBATCH --job-name={job_name}\n")
        fh.writelines(f"#SBATCH --output={out_dir}/%j-{job_name}.out\n")
        fh.writelines(f"#SBATCH --ntasks={ntasks} --nodes={nodes}\n")
        fh.writelines(f"#SBATCH --cpus-per-task={cpu_per_task}\n")
        fh.writelines(f"#SBATCH --mem-per-cpu={mem_per_cpu}G\n")
        fh.writelines(f"#SBATCH --time={time_str}\n")
        fh.writelines(f"#SBATCH --mail-type=ALL\n")
        fh.writelines(f"#SBATCH --gpus={n_gpus}\n")
        # fh.writelines(f"#SBATCH -C {gpus_constraints}\n")# --gpus={n_gpus}\n")

        fh.writelines('\n')
        fh.writelines('module load StdEnv\n')
        fh.writelines('export SLURM_EXPORT_ENV=ALL\n')
        fh.writelines('\n')

        # fh.writelines(f"module restore python_env\n") # load modules i need
        fh.writelines(f"module load miniconda\n") # load modules i need
        # fh.writelines(f"conda init\n")
        fh.writelines(f"conda activate abstract_transformer\n") # activate conda environment
        fh.writelines(f"conda info --envs\n") # activate conda environment

        fh.writelines('\n')
        fh.writelines(f"nvidia-smi -L\n") # print gpu information
        fh.writelines('\n')

        fh.writelines(f"cd {project_dir}\n") # navigate to project directory
        # run python script
        fh.writelines(f"python train_model.py ")
        fh.writelines(f"--task {params['task']} --n_epochs {params['n_epochs']} --batch_size {params['batch_size']} ")
        fh.writelines(f"--e_sa {params['e_sa']} --e_rca {params['e_rca']} --d_sa {params['d_sa']} --d_rca {params['d_rca']} --d_cross {params['d_cross']} ")
        fh.writelines(f"--d_model {params['d_model']} --dff {params['dff']} --disentangled_rca {params['disentangled_rca']} ")
        fh.writelines(f"--e_n_layers {params['e_n_layers']} --d_n_layers {params['d_n_layers']}\n")

    created_jobs.append(job_file)

In [16]:
created_jobs

['math/math--algebra__linear_1d-e_sa4-e_rca4-d_sa4-d_rca4-el2-dl2-dis_rca0.job',
 'math/math--polynomials__add-e_sa4-e_rca4-d_sa4-d_rca4-el2-dl2-dis_rca0.job',
 'math/math--polynomials__expand-e_sa4-e_rca4-d_sa4-d_rca4-el2-dl2-dis_rca0.job',
 'math/math--calculus__differentiate-e_sa4-e_rca4-d_sa4-d_rca4-el2-dl2-dis_rca0.job',
 'math/math--algebra__sequence_next_term-e_sa4-e_rca4-d_sa4-d_rca4-el2-dl2-dis_rca0.job',
 'math/math--algebra__linear_1d-e_sa8-e_rca0-d_sa8-d_rca0-el2-dl2-dis_rca0.job',
 'math/math--polynomials__add-e_sa8-e_rca0-d_sa8-d_rca0-el2-dl2-dis_rca0.job',
 'math/math--polynomials__expand-e_sa8-e_rca0-d_sa8-d_rca0-el2-dl2-dis_rca0.job',
 'math/math--calculus__differentiate-e_sa8-e_rca0-d_sa8-d_rca0-el2-dl2-dis_rca0.job',
 'math/math--algebra__sequence_next_term-e_sa8-e_rca0-d_sa8-d_rca0-el2-dl2-dis_rca0.job',
 'math/math--algebra__linear_1d-e_sa4-e_rca4-d_sa4-d_rca4-el2-dl2-dis_rca1.job',
 'math/math--polynomials__add-e_sa4-e_rca4-d_sa4-d_rca4-el2-dl2-dis_rca1.job',
 'ma