# Create jobs forr "contains SET" task

In [4]:
import os
import itertools

In [5]:
def mkdir(dir):
    if not os.path.exists(dir):
        os.mkdir(dir)

In [6]:
# global job parameters

job_directory = f"set"
out_dir = f'{job_directory}/.out'
time_str = '00-12:00:00'
partition = 'gpu'
ntasks = 1
nodes = 1
cpu_per_task = 8
mem_per_cpu = 4
n_gpus = 1
gpus_constraints = '"a100|rtx3090|a5000|v100|rtx2080ti"'
project_dir = "/gpfs/gibbs/project/lafferty/ma2393/relational-neural-networks/experiments/set"


mkdir(job_directory)
mkdir(out_dir)

In [7]:
# define params of individual jobs

models = ['relconvnet', 'relconvnet_maxpoooling', 'corelnet', 'predinet', 'transformer']
n_epochs = 100
train_sizes = [-1]
num_trials_per_run = 1
num_trials = 5
start_trials = list(range(num_trials))

jobs_params = []

for model, train_size, start_trial in itertools.product(models, train_sizes, start_trials):
    jobs_params.append(
        dict(model=model, n_epochs=n_epochs, num_trials=num_trials_per_run, start_trial=start_trial, train_size=train_size))

In [8]:
len(jobs_params)

25

In [9]:
jobs_params[:5]

[{'model': 'relconvnet',
  'n_epochs': 100,
  'num_trials': 1,
  'start_trial': 0,
  'train_size': -1},
 {'model': 'relconvnet',
  'n_epochs': 100,
  'num_trials': 1,
  'start_trial': 1,
  'train_size': -1},
 {'model': 'relconvnet',
  'n_epochs': 100,
  'num_trials': 1,
  'start_trial': 2,
  'train_size': -1},
 {'model': 'relconvnet',
  'n_epochs': 100,
  'num_trials': 1,
  'start_trial': 3,
  'train_size': -1},
 {'model': 'relconvnet',
  'n_epochs': 100,
  'num_trials': 1,
  'start_trial': 4,
  'train_size': -1}]

In [10]:
# create jobs
job_files = []

for params in jobs_params:

    job_name = f"set-{params['model']}-{params['start_trial']}"

    job_file = os.path.join(job_directory, f"{job_name}.job")

    with open(job_file, 'w') as fh:
        fh.writelines(f"#!/bin/bash\n")
        fh.writelines(f"#SBATCH --partition={partition}\n")
        fh.writelines(f"#SBATCH --job-name={job_name}\n")
        fh.writelines(f"#SBATCH --output={out_dir}/{job_name}-%j.out\n")
        fh.writelines(f"#SBATCH --ntasks={ntasks} --nodes={nodes}\n")
        fh.writelines(f"#SBATCH --cpus-per-task={cpu_per_task}\n")
        fh.writelines(f"#SBATCH --mem-per-cpu={mem_per_cpu}G\n")
        fh.writelines(f"#SBATCH --time={time_str}\n")
        fh.writelines(f"#SBATCH --mail-type=ALL\n")
        fh.writelines(f"#SBATCH -C {gpus_constraints} --gpus={n_gpus}\n")
        fh.writelines('\n')

        fh.writelines(f"cd {project_dir}\n") # navigate to project directory

        fh.writelines(f"module restore python_env\n") # load modules i need
        fh.writelines(f"conda activate tf\n") # activate conda environment

        fh.writelines(f"nvidia-smi -L\n") # print gpu information
        fh.writelines('\n')

        # run python script
        fh.writelines(f"python train_model.py --model '{params['model']}' \\\n")
        fh.writelines(f"\t --n_epochs {params['n_epochs']} --train_size {params['train_size']} --start_trial {params['start_trial']} --num_trials {params['num_trials']}\n")

    job_files.append(job_file)

In [11]:
# run jobs
input('confirm that you would like to run this')

for job_file in job_files:
    print(f'submitting {job_file}')
    os.system(f"sbatch {job_file}")

submitting set/set-relconvnet-0.job
Submitted batch job 26024583
submitting set/set-relconvnet-1.job
Submitted batch job 26024584
submitting set/set-relconvnet-2.job
Submitted batch job 26024585
submitting set/set-relconvnet-3.job
Submitted batch job 26024586
submitting set/set-relconvnet-4.job
Submitted batch job 26024587
submitting set/set-relconvnet_maxpoooling-0.job
Submitted batch job 26024588
submitting set/set-relconvnet_maxpoooling-1.job
Submitted batch job 26024589
submitting set/set-relconvnet_maxpoooling-2.job
Submitted batch job 26024590
submitting set/set-relconvnet_maxpoooling-3.job
Submitted batch job 26024591
submitting set/set-relconvnet_maxpoooling-4.job
Submitted batch job 26024592
submitting set/set-corelnet-0.job
Submitted batch job 26024593
submitting set/set-corelnet-1.job
Submitted batch job 26024594
submitting set/set-corelnet-2.job
Submitted batch job 26024595
submitting set/set-corelnet-3.job
Submitted batch job 26024596
submitting set/set-corelnet-4.job
Subm