In [1]:
import os
import itertools

In [2]:
def mkdir(dir):
    if not os.path.exists(dir):
        os.mkdir(dir)

In [3]:
# global job parameters

job_directory = f"relational_games"
out_dir = f'{job_directory}/.out'
time_str = '00-48:00:00'
partition = 'test'
ntasks = 1
nodes = 1
cpu_per_task = 16
mem_per_cpu = 2
gpus_constraints = '"rtx4090:1"'
project_dir = "/diskarray/home/awni/projects/relational-neural-networks/experiments/relational_games"


mkdir(job_directory)
mkdir(out_dir)

In [4]:
# define params of individual jobs

models = ['relconvnet', 'corelnet', 'transformer']
tasks = ['1task_match_patt']
train_splits = ['pentos']
n_epochs = 50
train_sizes = [-1]
num_trials = 5

jobs_params = []

for model, task, train_size, train_split in itertools.product(models, tasks, train_sizes, train_splits):
    jobs_params.append(dict(model=model, task=task, n_epochs=n_epochs, num_trials=num_trials, train_size=train_size, train_split=train_split))

In [5]:
jobs_params

[{'model': 'relconvnet',
  'task': '1task_match_patt',
  'n_epochs': 50,
  'num_trials': 5,
  'train_size': -1,
  'train_split': 'pentos'},
 {'model': 'corelnet',
  'task': '1task_match_patt',
  'n_epochs': 50,
  'num_trials': 5,
  'train_size': -1,
  'train_split': 'pentos'},
 {'model': 'transformer',
  'task': '1task_match_patt',
  'n_epochs': 50,
  'num_trials': 5,
  'train_size': -1,
  'train_split': 'pentos'}]

In [6]:
# create jobs
job_files = []

for params in jobs_params:

    job_name = f"relational_games-{params['task']}-{params['model']}"

    job_file = os.path.join(job_directory, f"{job_name}.job")

    with open(job_file, 'w') as fh:
        fh.writelines(f"#!/bin/bash\n")
        fh.writelines(f"#SBATCH --partition={partition}\n")
        fh.writelines(f"#SBATCH --job-name={job_name}\n")
        fh.writelines(f"#SBATCH --output={out_dir}/{job_name}-%j.out\n")
        fh.writelines(f"#SBATCH --ntasks={ntasks} --nodes={nodes}\n")
        fh.writelines(f"#SBATCH --cpus-per-task={cpu_per_task}\n")
        fh.writelines(f"#SBATCH --mem-per-cpu={mem_per_cpu}G\n")
        fh.writelines(f"#SBATCH --time={time_str}\n")
        fh.writelines(f"#SBATCH --mail-type=ALL\n")
        fh.writelines(f"#SBATCH --gpus={gpus_constraints}\n")

        fh.writelines(f"cd {project_dir}\n") # navigate to project directory
        fh.writelines(f"module load miniconda\n") # load modules i need
        fh.writelines(f"conda init bash\n") # activate conda  environment
        fh.writelines(f"conda activate relconvnet\n") # activate conda  environment

        fh.writelines(f"nvidia-smi -L\n") # print gpu information

        # run python script
        fh.writelines(f"python train_model.py --model '{params['model']}' --task '{params['task']}' --train_split '{params['train_split']}' ")
        fh.writelines(f"--n_epochs {params['n_epochs']} --train_size {params['train_size']} --num_trials {params['num_trials']}\n")

    job_files.append(job_file)

In [7]:
# run jobs
input('confirm that you would like to run this')

for job_file in job_files:
    os.system(f"sbatch {job_file}")

Submitted batch job 239
Submitted batch job 240
Submitted batch job 241
