In [1]:
import os
import itertools

In [2]:
def mkdir(dir):
    if not os.path.exists(dir):
        os.mkdir(dir)

In [3]:
# global job parameters

job_directory = f"object_sorting"
out_dir = f'{job_directory}/.out'
time_str = '00-12:00:00'
partition = 'gpu'
ntasks = 1
nodes = 1
cpu_per_task = 8
mem_per_cpu = 2
n_gpus = 1
# gpus_constraints = '"a100|rtx3090|v100|rtx2080ti"' # all gpus are pretty good now
project_dir = "/home/ma2393/scratch/abstract_transformer/experiments/object_sorting"

mkdir(job_directory)
mkdir(out_dir)

In [4]:
# define params of individual jobs

jobs_params = [
    dict(ee=2, ea=2, de=2, da=2, e_n_layers=2, d_n_layers=2),
    dict(ee=2, ea=2, de=2, da=0, e_n_layers=2, d_n_layers=2),
    dict(ee=0, ea=2, de=2, da=0, e_n_layers=2, d_n_layers=2),
    dict(ee=0, ea=4, de=4, da=0, e_n_layers=2, d_n_layers=2),
    dict(ee=2, ea=0, de=2, da=0, e_n_layers=2, d_n_layers=2),
    dict(ee=4, ea=0, de=4, da=0, e_n_layers=2, d_n_layers=2),
]

In [5]:
jobs_params

[{'ee': 4,
  'ea': 4,
  'de': 4,
  'da': 4,
  'e_n_layers': 1,
  'd_n_layers': 1,
  'd_model': 512,
  'dff': 2048},
 {'ee': 4,
  'ea': 4,
  'de': 8,
  'da': 0,
  'e_n_layers': 1,
  'd_n_layers': 1,
  'd_model': 512,
  'dff': 2048},
 {'ee': 0,
  'ea': 8,
  'de': 8,
  'da': 0,
  'e_n_layers': 1,
  'd_n_layers': 1,
  'd_model': 512,
  'dff': 2048},
 {'ee': 4,
  'ea': 0,
  'de': 4,
  'da': 0,
  'e_n_layers': 1,
  'd_n_layers': 1,
  'd_model': 512,
  'dff': 2048},
 {'ee': 8,
  'ea': 0,
  'de': 8,
  'da': 0,
  'e_n_layers': 1,
  'd_n_layers': 1,
  'd_model': 512,
  'dff': 2048}]

In [6]:
# create jobs
created_jobs = []
for params in jobs_params:

    job_name = f"object_sorting-ee{params['ee']}-ea{params['ea']}-de{params['de']}-da{params['da']}-el{params['e_n_layers']}-dl{params['d_n_layers']}"

    job_file = os.path.join(job_directory, f"{job_name}.job")

    with open(job_file, 'w') as fh:
        fh.writelines(f"#!/bin/bash\n")
        fh.writelines(f"#SBATCH --partition={partition}\n")
        fh.writelines(f"#SBATCH --job-name={job_name}\n")
        fh.writelines(f"#SBATCH --output={out_dir}/%j-{job_name}.out\n")
        fh.writelines(f"#SBATCH --ntasks={ntasks} --nodes={nodes}\n")
        fh.writelines(f"#SBATCH --cpus-per-task={cpu_per_task}\n")
        fh.writelines(f"#SBATCH --mem-per-cpu={mem_per_cpu}G\n")
        fh.writelines(f"#SBATCH --time={time_str}\n")
        fh.writelines(f"#SBATCH --mail-type=ALL\n")
        fh.writelines(f"#SBATCH --gpus={n_gpus}\n")
        # fh.writelines(f"#SBATCH -C {gpus_constraints}\n")# --gpus={n_gpus}\n")

        fh.writelines('\n')
        fh.writelines('module load StdEnv\n')
        fh.writelines('export SLURM_EXPORT_ENV=ALL\n')
        fh.writelines('\n')

        # fh.writelines(f"module restore python_env\n") # load modules i need
        fh.writelines(f"module load miniconda\n") # load modules i need
        # fh.writelines(f"conda init\n")
        fh.writelines(f"conda activate abstract_transformer\n") # activate conda environment
        fh.writelines(f"conda info --envs\n") # activate conda environment

        fh.writelines('\n')
        fh.writelines(f"nvidia-smi -L\n") # print gpu information
        fh.writelines('\n')

        fh.writelines(f"cd {project_dir}\n") # navigate to project directory
        # run python script
        fh.writelines(f"python eval_learning_curves.py ")
        fh.writelines(f"--num_trials 5 --n_epochs 2500 ")
        fh.writelines(f"--ee {params['ee']} --ea {params['ea']} --de {params['de']} --da {params['da']} ")
        fh.writelines(f"--e_n_layers {params['e_n_layers']} --d_n_layers {params['d_n_layers']}\n")

    created_jobs.append(job_file)

In [7]:
created_jobs

['object_sorting/object_sorting-ee4-ea4-de4-da4-el1-dl1.job',
 'object_sorting/object_sorting-ee4-ea4-de8-da0-el1-dl1.job',
 'object_sorting/object_sorting-ee0-ea8-de8-da0-el1-dl1.job',
 'object_sorting/object_sorting-ee4-ea0-de4-da0-el1-dl1.job',
 'object_sorting/object_sorting-ee8-ea0-de8-da0-el1-dl1.job']