In [1]:
import os
import itertools

In [2]:
def mkdir(dir):
    if not os.path.exists(dir):
        os.mkdir(dir)

In [3]:
# global job parameters

job_directory = f"tiny_stories"
out_dir = f'{job_directory}/.out'
time_str = '00-12:00:00'
partition = 'gpu'
ntasks = 1
nodes = 1
cpu_per_task = 8
mem_per_cpu = 2
n_gpus = 1
# gpus_constraints = '"a100|rtx3090|v100|rtx2080ti"' # all gpus are pretty good now
project_dir = "/home/ma2393/scratch/abstract_transformer/experiments/tiny_stories"

mkdir(job_directory)
mkdir(out_dir)

In [4]:
# model params
pos_enc_types = ['RoPE', 'pos_emb']
symbol_types = ['position_relative', 'symbolic_attention']
rca_type = [0, 1]

model_params = [
    dict(d_model=768, n_layers=2, sa=12, rca=0),
    dict(d_model=768, n_layers=2, sa=8, rca=4),
    dict(d_model=768, n_layers=2, sa=6, rca=6),
]

jobs_params = []
for mparams in model_params:
    for pos_enc_type in pos_enc_types:
        if mparams['rca'] != 0:
            for disrca in disentangled_rcas:
                for symbol_type in symbol_types:
                    jobs_params.append({**mparams, 'pos_enc_type': pos_enc_type, 'disentangled_rca': disrca, "symbol_type": symbol_type})
        else:
            jobs_params.append({**mparams, 'pos_enc_type': pos_enc_type, 'disentangled_rca': -1, 'symbol_type': 'NA'})

In [5]:
jobs_params

[{'d_model': 768,
  'n_layers': 2,
  'sa': 8,
  'rca': 4,
  'pos_enc_type': 'RoPE',
  'disentangled_rca': 0,
  'symbol_type': 'position_relative'},
 {'d_model': 768,
  'n_layers': 2,
  'sa': 8,
  'rca': 4,
  'pos_enc_type': 'RoPE',
  'disentangled_rca': 0,
  'symbol_type': 'symbolic_attention'},
 {'d_model': 768,
  'n_layers': 2,
  'sa': 8,
  'rca': 4,
  'pos_enc_type': 'RoPE',
  'disentangled_rca': 1,
  'symbol_type': 'position_relative'},
 {'d_model': 768,
  'n_layers': 2,
  'sa': 8,
  'rca': 4,
  'pos_enc_type': 'RoPE',
  'disentangled_rca': 1,
  'symbol_type': 'symbolic_attention'},
 {'d_model': 768,
  'n_layers': 2,
  'sa': 8,
  'rca': 4,
  'pos_enc_type': 'pos_emb',
  'disentangled_rca': 0,
  'symbol_type': 'position_relative'},
 {'d_model': 768,
  'n_layers': 2,
  'sa': 8,
  'rca': 4,
  'pos_enc_type': 'pos_emb',
  'disentangled_rca': 0,
  'symbol_type': 'symbolic_attention'},
 {'d_model': 768,
  'n_layers': 2,
  'sa': 8,
  'rca': 4,
  'pos_enc_type': 'pos_emb',
  'disentangled_

In [6]:
len(jobs_params)

16

In [7]:
# global config parameters
n_epochs = 1
max_steps = -1
log_to_wandb = 1

In [8]:
# create jobs
created_jobs = []
for params in jobs_params:

    job_name = (f"tiny_stories_LM-d{params['d_model']}-sa{params['sa']}-rca{params['rca']}-L{params['n_layers']}"
        f"-{params['pos_enc_type']}-dis_rca{params['disentangled_rca']}-{params['symbol_type']}")

    job_file = os.path.join(job_directory, f"{job_name}.job")

    with open(job_file, 'w') as fh:
        fh.writelines(f"#!/bin/bash\n")
        fh.writelines(f"#SBATCH --partition={partition}\n")
        fh.writelines(f"#SBATCH --job-name={job_name}\n")
        fh.writelines(f"#SBATCH --output={out_dir}/%j-{job_name}.out\n")
        fh.writelines(f"#SBATCH --ntasks={ntasks} --nodes={nodes}\n")
        fh.writelines(f"#SBATCH --cpus-per-task={cpu_per_task}\n")
        fh.writelines(f"#SBATCH --mem-per-cpu={mem_per_cpu}G\n")
        fh.writelines(f"#SBATCH --time={time_str}\n")
        fh.writelines(f"#SBATCH --mail-type=ALL\n")
        fh.writelines(f"#SBATCH --gpus={n_gpus}\n")
        # fh.writelines(f"#SBATCH -C {gpus_constraints}\n")# --gpus={n_gpus}\n")

        fh.writelines('\n')
        fh.writelines('module load StdEnv\n')
        fh.writelines('export SLURM_EXPORT_ENV=ALL\n')
        fh.writelines('\n')

        # fh.writelines(f"module restore python_env\n") # load modules i need
        fh.writelines(f"module load miniconda\n") # load modules i need
        # fh.writelines(f"conda init\n")
        fh.writelines(f"conda activate abstract_transformer\n") # activate conda environment
        fh.writelines(f"conda info --envs\n") # activate conda environment

        fh.writelines('\n')
        fh.writelines(f"nvidia-smi -L\n") # print gpu information
        fh.writelines('\n')

        fh.writelines(f"cd {project_dir}\n") # navigate to project directory
        # run python script
        fh.writelines(f"python train_tiny_stories_language_model.py ")
        fh.writelines(f"--d_model {params['d_model']} --sa {params['sa']} --rca {params['rca']} --n_layers {params['n_layers']} ")
        fh.writelines(f"--pos_enc_type {params['pos_enc_type']} --disentangled_rca {params['disentangled_rca']} --symbol_type {params['symbol_type']} ")
        fh.writelines(f"--n_epochs {n_epochs} --max_steps {max_steps} --log_to_wandb {log_to_wandb}\n")

    created_jobs.append(job_file)

In [9]:
created_jobs

['tiny_stories/tiny_stories_LM-d768-sa8-rca4-L2-RoPE-dis_rca0-pos_relative.job',
 'tiny_stories/tiny_stories_LM-d768-sa8-rca4-L2-RoPE-dis_rca0-sym_attn.job',
 'tiny_stories/tiny_stories_LM-d768-sa8-rca4-L2-RoPE-dis_rca1-pos_relative.job',
 'tiny_stories/tiny_stories_LM-d768-sa8-rca4-L2-RoPE-dis_rca1-sym_attn.job',
 'tiny_stories/tiny_stories_LM-d768-sa8-rca4-L2-pos_emb-dis_rca0-pos_relative.job',
 'tiny_stories/tiny_stories_LM-d768-sa8-rca4-L2-pos_emb-dis_rca0-sym_attn.job',
 'tiny_stories/tiny_stories_LM-d768-sa8-rca4-L2-pos_emb-dis_rca1-pos_relative.job',
 'tiny_stories/tiny_stories_LM-d768-sa8-rca4-L2-pos_emb-dis_rca1-sym_attn.job',
 'tiny_stories/tiny_stories_LM-d768-sa6-rca6-L2-RoPE-dis_rca0-pos_relative.job',
 'tiny_stories/tiny_stories_LM-d768-sa6-rca6-L2-RoPE-dis_rca0-sym_attn.job',
 'tiny_stories/tiny_stories_LM-d768-sa6-rca6-L2-RoPE-dis_rca1-pos_relative.job',
 'tiny_stories/tiny_stories_LM-d768-sa6-rca6-L2-RoPE-dis_rca1-sym_attn.job',
 'tiny_stories/tiny_stories_LM-d768-sa6-

In [10]:
input("CONTINUE TO RUN ALL JOBS?")
for job in created_jobs:
    os.system(f'sbatch {job}')

Submitted batch job 3374
Submitted batch job 3375
Submitted batch job 3376
Submitted batch job 3377
Submitted batch job 3378
Submitted batch job 3379
Submitted batch job 3380
Submitted batch job 3381
Submitted batch job 3382
Submitted batch job 3383
Submitted batch job 3384
Submitted batch job 3385
Submitted batch job 3386
Submitted batch job 3387
Submitted batch job 3388
Submitted batch job 3389
