In [8]:
import os
import itertools

In [9]:
def mkdir(dir):
    if not os.path.exists(dir):
        os.mkdir(dir)

In [10]:
# global job parameters

job_directory = f"relational_games"
out_dir = f'{job_directory}/.out'
time_str = '00-4:00:00'
partition = 'gpu'
ntasks = 1
nodes = 1
cpu_per_task = 8
mem_per_cpu = 4
n_gpus = 1
gpus_constraints = '"rtx3090|v100|a100|rtx2080ti"'
project_dir = "/gpfs/gibbs/project/lafferty/ma2393/relational-neural-networks/experiments/relational_games"


mkdir(job_directory)
mkdir(out_dir)

In [11]:
# define params of individual jobs

models = ['tcn_corelnet']
tasks = ['1task_match_patt']
train_splits = ['pentos']
n_epochs = 50
train_sizes = [-1]
num_trials_per_run = 1
start_trials = list(range(5))

jobs_params = []

for model, task, train_size, train_split, start_trial in itertools.product(models, tasks, train_sizes, train_splits, start_trials):
    jobs_params.append(dict(model=model, task=task, n_epochs=n_epochs, num_trials=num_trials_per_run, start_trial=start_trial, train_size=train_size, train_split=train_split))

In [12]:
jobs_params

[{'model': 'tcn_corelnet',
  'task': '1task_match_patt',
  'n_epochs': 50,
  'num_trials': 1,
  'start_trial': 0,
  'train_size': -1,
  'train_split': 'pentos'},
 {'model': 'tcn_corelnet',
  'task': '1task_match_patt',
  'n_epochs': 50,
  'num_trials': 1,
  'start_trial': 1,
  'train_size': -1,
  'train_split': 'pentos'},
 {'model': 'tcn_corelnet',
  'task': '1task_match_patt',
  'n_epochs': 50,
  'num_trials': 1,
  'start_trial': 2,
  'train_size': -1,
  'train_split': 'pentos'},
 {'model': 'tcn_corelnet',
  'task': '1task_match_patt',
  'n_epochs': 50,
  'num_trials': 1,
  'start_trial': 3,
  'train_size': -1,
  'train_split': 'pentos'},
 {'model': 'tcn_corelnet',
  'task': '1task_match_patt',
  'n_epochs': 50,
  'num_trials': 1,
  'start_trial': 4,
  'train_size': -1,
  'train_split': 'pentos'}]

In [13]:
# create jobs
job_files = []

for params in jobs_params:

    job_name = f"relational_games-{params['task']}-{params['model']}-{params['start_trial']}"

    job_file = os.path.join(job_directory, f"{job_name}.job")

    with open(job_file, 'w') as fh:
        fh.writelines(f"#!/bin/bash\n")
        fh.writelines(f"#SBATCH --partition={partition}\n")
        fh.writelines(f"#SBATCH --job-name={job_name}\n")
        fh.writelines(f"#SBATCH --output={out_dir}/{job_name}-%j.out\n")
        fh.writelines(f"#SBATCH --ntasks={ntasks} --nodes={nodes}\n")
        fh.writelines(f"#SBATCH --cpus-per-task={cpu_per_task}\n")
        fh.writelines(f"#SBATCH --mem-per-cpu={mem_per_cpu}G\n")
        fh.writelines(f"#SBATCH --time={time_str}\n")
        fh.writelines(f"#SBATCH --mail-type=ALL\n")
        fh.writelines(f"#SBATCH -C {gpus_constraints} --gpus={n_gpus}\n")

        fh.writelines(f"cd {project_dir}\n") # navigate to project directory

        fh.writelines(f"module restore python_env\n") # load modules i need
        fh.writelines(f"conda activate tf\n") # activate conda environment

        fh.writelines(f"nvidia-smi -L\n") # print gpu information

        # run python script
        fh.writelines(f"python train_model.py --model '{params['model']}' --task '{params['task']}' --train_split '{params['train_split']}' ")
        fh.writelines(f"--n_epochs {params['n_epochs']} --train_size {params['train_size']} --start_trial {params['start_trial']} --num_trials {params['num_trials']}\n")

    job_files.append(job_file)

In [14]:
# run jobs
input('confirm that you would like to run this')

for job_file in job_files:
    os.system(f"sbatch {job_directory}/{job_file}")

sbatch: error: Unable to open file relational_games/relational_games/relational_games-1task_match_patt-tcn_corelnet-0.job
sbatch: error: Unable to open file relational_games/relational_games/relational_games-1task_match_patt-tcn_corelnet-1.job
sbatch: error: Unable to open file relational_games/relational_games/relational_games-1task_match_patt-tcn_corelnet-2.job
sbatch: error: Unable to open file relational_games/relational_games/relational_games-1task_match_patt-tcn_corelnet-3.job
sbatch: error: Unable to open file relational_games/relational_games/relational_games-1task_match_patt-tcn_corelnet-4.job
