In [1]:
import os
import itertools
import time

In [2]:
def mkdir(dir):
    if not os.path.exists(dir):
        os.mkdir(dir)

In [3]:
# global job parameters

job_directory = f"relational_games"
out_dir = f'{job_directory}/.out'
time_str = '00-04:00:00'
partition = 'gpu'
ntasks = 1
nodes = 1
cpu_per_task = 8
mem_per_cpu = 2
n_gpus = 1
# gpus_constraints = '"a100|rtx3090|v100|rtx2080ti"' # all gpus are pretty good now
project_dir = "/home/ma2393/project/abstract_transformer/experiments/relational_games"

mkdir(job_directory)
mkdir(out_dir)

In [4]:
# model params
model_params = [
    # dict(d_model=128, dff=256, n_layers=1, sa=0, ra=4, ra_type='relational_attention', symbol_type='positional_symbols'),
    # dict(d_model=128, dff=256, n_layers=1, sa=2, ra=2, ra_type='relational_attention', symbol_type='positional_symbols'),
    # dict(d_model=128, dff=256, n_layers=1, sa=0, ra=4, ra_type='rca', symbol_type='positional_symbols'),
    # dict(d_model=128, dff=256, n_layers=1, sa=2, ra=2, ra_type='rca', symbol_type='positional_symbols'),
    # dict(d_model=128, dff=256, n_layers=1, sa=4, ra=0, ra_type='NA', symbol_type='NA'),
    # dict(d_model=128, dff=256, n_layers=2, sa=0, ra=4, ra_type='relational_attention', symbol_type='positional_symbols'),
    # dict(d_model=128, dff=256, n_layers=2, sa=2, ra=2, ra_type='relational_attention', symbol_type='positional_symbols'),
    # dict(d_model=128, dff=256, n_layers=2, sa=0, ra=4, ra_type='rca', symbol_type='positional_symbols'),
    # dict(d_model=128, dff=256, n_layers=2, sa=2, ra=2, ra_type='rca', symbol_type='positional_symbols'),
    # dict(d_model=144, dff=144*2, n_layers=2, sa=2, ra=0, ra_type='NA', symbol_type='NA'),
    # dict(d_model=144, dff=144*2, n_layers=2, sa=4, ra=0, ra_type='NA', symbol_type='NA'),
    # dict(d_model=144, dff=144*2, n_layers=2, sa=8, ra=0, ra_type='NA', symbol_type='NA'),
    dict(d_model=128, dff=256, n_layers=2, sa=0, ra=2, n_relations=8, symmetric_rels=1, ra_type='relational_attention', symbol_type='positional_symbols'),
    dict(d_model=128, dff=256, n_layers=2, sa=0, ra=2, n_relations=8, symmetric_rels=1, ra_type='relational_attention', symbol_type='symbolic_attention'),
    # dict(d_model=128, dff=256, n_layers=2, sa=2, ra=2, ra_type='relational_attention', symbol_type='positional_symbols'),
]

# global config parameters
patch_size = 12
n_epochs = 50
# max_steps = -1
log_to_wandb = 1

# tasks
tasks = ['same', 'occurs', 'xoccurs', '1task_between', '1task_match_patt']
train_sizes = {'1task_match_patt': '10_000 12_500 15_000 17_500 20_000',
    **{task: '250 500 750 1_000 1_250 1_500 1_750 2_000 2_250 2_500' for task in tasks if task != '1task_match_patt'}}

In [5]:
jobs_params = []
for task in tasks:
    for mparams in model_params:
        jobs_params.append({'task': task, 'compile': 1, 'train_sizes': train_sizes[task], **mparams})

In [6]:
jobs_params

[{'task': 'same',
  'compile': 1,
  'train_sizes': '250 500 750 1_000 1_250 1_500 1_750 2_000 2_250 2_500',
  'd_model': 128,
  'dff': 256,
  'n_layers': 2,
  'sa': 0,
  'ra': 2,
  'n_relations': 8,
  'symmetric_rels': 1,
  'ra_type': 'relational_attention',
  'symbol_type': 'positional_symbols'},
 {'task': 'same',
  'compile': 1,
  'train_sizes': '250 500 750 1_000 1_250 1_500 1_750 2_000 2_250 2_500',
  'd_model': 128,
  'dff': 256,
  'n_layers': 2,
  'sa': 0,
  'ra': 2,
  'n_relations': 8,
  'symmetric_rels': 1,
  'ra_type': 'relational_attention',
  'symbol_type': 'symbolic_attention'},
 {'task': 'occurs',
  'compile': 1,
  'train_sizes': '250 500 750 1_000 1_250 1_500 1_750 2_000 2_250 2_500',
  'd_model': 128,
  'dff': 256,
  'n_layers': 2,
  'sa': 0,
  'ra': 2,
  'n_relations': 8,
  'symmetric_rels': 1,
  'ra_type': 'relational_attention',
  'symbol_type': 'positional_symbols'},
 {'task': 'occurs',
  'compile': 1,
  'train_sizes': '250 500 750 1_000 1_250 1_500 1_750 2_000 2_250

In [7]:
len(jobs_params)

10

In [8]:
# create jobs
created_jobs = []
for params in jobs_params:

    job_name = (f"relational_games-{params['task']}-d{params['d_model']}-sa{params['sa']}-ra{params['ra']}-L{params['n_layers']}"
        f"-ra_type_{params['ra_type']}-symbol_type_{params['symbol_type']}")

    job_file = os.path.join(job_directory, f"{job_name}.job")

    with open(job_file, 'w') as fh:
        fh.writelines(f"#!/bin/bash\n")
        fh.writelines(f"#SBATCH --partition={partition}\n")
        fh.writelines(f"#SBATCH --job-name={job_name}\n")
        fh.writelines(f"#SBATCH --output={out_dir}/%j-{job_name}.out\n")
        fh.writelines(f"#SBATCH --ntasks={ntasks} --nodes={nodes}\n")
        fh.writelines(f"#SBATCH --cpus-per-task={cpu_per_task}\n")
        fh.writelines(f"#SBATCH --mem-per-cpu={mem_per_cpu}G\n")
        fh.writelines(f"#SBATCH --time={time_str}\n")
        fh.writelines(f"#SBATCH --mail-type=ALL\n")
        fh.writelines(f"#SBATCH --gpus={n_gpus}\n")
        # fh.writelines(f"#SBATCH --reservation=h100\n") # NOTE: using h100 reservation for noow
        # fh.writelines(f"#SBATCH -C {gpus_constraints}\n")# --gpus={n_gpus}\n")

        fh.writelines('\n')
        fh.writelines('module load StdEnv\n')
        fh.writelines('export SLURM_EXPORT_ENV=ALL\n')
        fh.writelines('\n')

        # fh.writelines(f"module restore python_env\n") # load modules i need
        fh.writelines(f"module load miniconda\n") # load modules i need
        # fh.writelines(f"conda init\n")
        fh.writelines(f"conda activate abstract_transformer\n") # activate conda environment
        fh.writelines(f"conda info --envs\n") # activate conda environment

        fh.writelines('\n')
        fh.writelines(f"nvidia-smi -L\n") # print gpu information
        fh.writelines('\n')

        fh.writelines(f"cd {project_dir}\n") # navigate to project directory
        # run python script
        fh.writelines(f"python eval_relational_games_learning_curve.py --task {params['task']} ")
        fh.writelines(f"--d_model {params['d_model']} --dff {params['dff']} --sa {params['sa']} --ra {params['ra']} --n_layers {params['n_layers']} ")
        if 'n_relations' in params:
            fh.writelines(f"--n_relations {params['n_relations']} ")
        if 'symmetric_rels' in params:
            fh.writelines(f"--symmetric_rels {params['symmetric_rels']} ")
        fh.writelines(f"--ra_type {params['ra_type']} --symbol_type {params['symbol_type']} --patch_size {patch_size} ")
        fh.writelines(f"--train_sizes {params['train_sizes']} --n_epochs {n_epochs} --log_to_wandb {log_to_wandb} --compile {params['compile']} \n")

    created_jobs.append(job_file)

In [9]:
created_jobs

['relational_games/relational_games-same-d128-sa0-ra2-L2-ra_type_relational_attention-symbol_type_positional_symbols.job',
 'relational_games/relational_games-same-d128-sa0-ra2-L2-ra_type_relational_attention-symbol_type_symbolic_attention.job',
 'relational_games/relational_games-occurs-d128-sa0-ra2-L2-ra_type_relational_attention-symbol_type_positional_symbols.job',
 'relational_games/relational_games-occurs-d128-sa0-ra2-L2-ra_type_relational_attention-symbol_type_symbolic_attention.job',
 'relational_games/relational_games-xoccurs-d128-sa0-ra2-L2-ra_type_relational_attention-symbol_type_positional_symbols.job',
 'relational_games/relational_games-xoccurs-d128-sa0-ra2-L2-ra_type_relational_attention-symbol_type_symbolic_attention.job',
 'relational_games/relational_games-1task_between-d128-sa0-ra2-L2-ra_type_relational_attention-symbol_type_positional_symbols.job',
 'relational_games/relational_games-1task_between-d128-sa0-ra2-L2-ra_type_relational_attention-symbol_type_symbolic_atte

In [10]:
n_trials = 5

In [11]:
[j for j in created_jobs if (any((task in j) for task in ['occurs', 'same', 'xoccurs']))]

['relational_games/relational_games-same-d128-sa0-ra2-L2-ra_type_relational_attention-symbol_type_positional_symbols.job',
 'relational_games/relational_games-same-d128-sa0-ra2-L2-ra_type_relational_attention-symbol_type_symbolic_attention.job',
 'relational_games/relational_games-occurs-d128-sa0-ra2-L2-ra_type_relational_attention-symbol_type_positional_symbols.job',
 'relational_games/relational_games-occurs-d128-sa0-ra2-L2-ra_type_relational_attention-symbol_type_symbolic_attention.job',
 'relational_games/relational_games-xoccurs-d128-sa0-ra2-L2-ra_type_relational_attention-symbol_type_positional_symbols.job',
 'relational_games/relational_games-xoccurs-d128-sa0-ra2-L2-ra_type_relational_attention-symbol_type_symbolic_attention.job']

In [12]:
confirm = input("CONTINUE TO RUN ALL JOBS? (enter 'Y' or 'y')")
if confirm in ("Y", "y"):
    for trial in range(n_trials):
        for job in created_jobs:
            os.system(f'sbatch {job}')
        time.sleep(5)
else:
    print("did not run jobs since you did not confirm.")

Submitted batch job 25387
Submitted batch job 25388
Submitted batch job 25389
Submitted batch job 25390
Submitted batch job 25391
Submitted batch job 25392
Submitted batch job 25393
Submitted batch job 25394
Submitted batch job 25395
Submitted batch job 25396
Submitted batch job 25397
Submitted batch job 25398
Submitted batch job 25399
Submitted batch job 25400
Submitted batch job 25401
Submitted batch job 25402
Submitted batch job 25403
Submitted batch job 25404
Submitted batch job 25405
Submitted batch job 25406
Submitted batch job 25407
Submitted batch job 25408
Submitted batch job 25409
Submitted batch job 25410
Submitted batch job 25411
Submitted batch job 25412
Submitted batch job 25413
Submitted batch job 25414
Submitted batch job 25415
Submitted batch job 25416
Submitted batch job 25417
Submitted batch job 25418
Submitted batch job 25419
Submitted batch job 25420
Submitted batch job 25421
Submitted batch job 25422
Submitted batch job 25423
Submitted batch job 25424
Submitted ba