# Create jobs forr "contains SET" task

In [28]:
import os
import itertools

In [29]:
def mkdir(dir):
    if not os.path.exists(dir):
        os.mkdir(dir)

In [30]:
# global job parameters

job_directory = f"set"
out_dir = f'{job_directory}/.out'
time_str = '00-01:00:00'
partition = 'gpu'
ntasks = 1
nodes = 1
cpu_per_task = 8
mem_per_cpu = 4
n_gpus = 1
gpus_constraints = '"a100|rtx3090|a5000|v100|rtx2080ti"'
project_dir = "/gpfs/gibbs/project/lafferty/ma2393/relational-neural-networks/experiments/set"


mkdir(job_directory)
mkdir(out_dir)

In [31]:
# define params of individual jobs

models = ['relconvnet', 'relconvnet_maxpooling', 'corelnet', 'nosoftmax_corelnet', 'predinet', 'transformer']
n_epochs = 100
train_sizes = [-1]
num_trials_per_run = 1
num_trials = 10
start_trials = list(range(num_trials))

jobs_params = []

for model, train_size, start_trial in itertools.product(models, train_sizes, start_trials):
    jobs_params.append(
        dict(model=model, n_epochs=n_epochs, num_trials=num_trials_per_run, start_trial=start_trial, train_size=train_size))

In [32]:
len(jobs_params)

30

In [33]:
jobs_params[:5]

[{'model': 'relconvnet',
  'n_epochs': 100,
  'num_trials': 1,
  'start_trial': 5,
  'train_size': -1},
 {'model': 'relconvnet',
  'n_epochs': 100,
  'num_trials': 1,
  'start_trial': 6,
  'train_size': -1},
 {'model': 'relconvnet',
  'n_epochs': 100,
  'num_trials': 1,
  'start_trial': 7,
  'train_size': -1},
 {'model': 'relconvnet',
  'n_epochs': 100,
  'num_trials': 1,
  'start_trial': 8,
  'train_size': -1},
 {'model': 'relconvnet',
  'n_epochs': 100,
  'num_trials': 1,
  'start_trial': 9,
  'train_size': -1}]

In [34]:
# create jobs
job_files = []

for params in jobs_params:

    job_name = f"set-{params['model']}-{params['start_trial']}"

    job_file = os.path.join(job_directory, f"{job_name}.job")

    with open(job_file, 'w') as fh:
        fh.writelines(f"#!/bin/bash\n")
        fh.writelines(f"#SBATCH --partition={partition}\n")
        fh.writelines(f"#SBATCH --job-name={job_name}\n")
        fh.writelines(f"#SBATCH --output={out_dir}/{job_name}-%j.out\n")
        fh.writelines(f"#SBATCH --ntasks={ntasks} --nodes={nodes}\n")
        fh.writelines(f"#SBATCH --cpus-per-task={cpu_per_task}\n")
        fh.writelines(f"#SBATCH --mem-per-cpu={mem_per_cpu}G\n")
        fh.writelines(f"#SBATCH --time={time_str}\n")
        fh.writelines(f"#SBATCH --mail-type=ALL\n")
        fh.writelines(f"#SBATCH -C {gpus_constraints} --gpus={n_gpus}\n")
        fh.writelines('\n')

        fh.writelines(f"cd {project_dir}\n") # navigate to project directory

        fh.writelines(f"module restore python_env\n") # load modules i need
        fh.writelines(f"conda activate tf\n") # activate conda environment

        fh.writelines(f"nvidia-smi -L\n") # print gpu information
        fh.writelines('\n')

        # run python script
        fh.writelines(f"python train_model.py --model '{params['model']}' \\\n")
        fh.writelines(f"\t --n_epochs {params['n_epochs']} --train_size {params['train_size']} --start_trial {params['start_trial']} --num_trials {params['num_trials']}\n")

    job_files.append(job_file)

In [35]:
# run jobs
input('confirm that you would like to run this')

for job_file in job_files:
    print(f'submitting {job_file}')
    os.system(f"sbatch {job_file}")

submitting set/set-relconvnet-5.job
Submitted batch job 26025290
submitting set/set-relconvnet-6.job
Submitted batch job 26025291
submitting set/set-relconvnet-7.job
Submitted batch job 26025292
submitting set/set-relconvnet-8.job
Submitted batch job 26025293
submitting set/set-relconvnet-9.job
Submitted batch job 26025294
submitting set/set-relconvnet_maxpooling-5.job
Submitted batch job 26025295
submitting set/set-relconvnet_maxpooling-6.job
Submitted batch job 26025296
submitting set/set-relconvnet_maxpooling-7.job
Submitted batch job 26025297
submitting set/set-relconvnet_maxpooling-8.job
Submitted batch job 26025298
submitting set/set-relconvnet_maxpooling-9.job
Submitted batch job 26025299
submitting set/set-corelnet-5.job
Submitted batch job 26025300
submitting set/set-corelnet-6.job
Submitted batch job 26025301
submitting set/set-corelnet-7.job
Submitted batch job 26025302
submitting set/set-corelnet-8.job
Submitted batch job 26025303
submitting set/set-corelnet-9.job
Submitted