# Set global parameters

In [67]:
use_accelerate = False
WRITE_MODE = 'w' # 'w' for sweeps, 'a' for tests
num_devices = 2 if use_accelerate else 1
DEVICE = 1
base_dir = '/home/lange/fantastic-umbrella/runs'
boolean_args = {
    'with_tracking' : [True],
}

# Set run(s) parameters

In [68]:
# 1024 sweeps
GENERATION = "19_sweeps"
SWEEP_ID = '1'
RUN_NAME = f"19_S{SWEEP_ID}" + "_ID_{run_id}"
TAGS = [f'SWP{SWEEP_ID}','NEW_GRAD_OUTPUT']
value_args = {
    'task_name' : ['qnli'],#["qnli"], #["cola","sst2","mrpc","stsb","qqp","mnli","rte"],
    'model_name_or_path' : ['bert-base-uncased'],
    'per_device_train_batch_size' : [32],
    'per_device_eval_batch_size' : [128],    
    'weight_decay' : [0.01], 
    'lr_scheduler_type' : ['linear'],
    'warmup_steps_fraction' : [0.1],
    'report_to' : ['all'],
    'insert_dropout' : [0.04, 0.08, 0.1, 0.12, 0.16, 0.2, 0.24, 0.28, 0.32],
    # 'catch_dropout' : [0],
    'training_size' : [1024],
    'beta1' : [0.9],
    'beta2' : [0.999],
    'early_stopping_patience' : [5],
    'num_train_epochs' : [25], 
    'learning_rate': [6e-5, 4e-5, 2e-5, 1e-5, 8e-6],
    'evaluation_steps': [32],
    'dataset_seed' : [1]
}
seeds = [1,2,3]

# Util functions

In [69]:
def compose_instruction(value_args, boolean_args, run_name, use_accelerate=False, device=0,tags = [], ):
    run_name = run_name.format(**value_args) + f'_{str(tags[-1][0:3]).upper()}'
    if use_accelerate:
        instruction = 'accelerate launch run_glue_no_trainer_modded.py'
    else:
        # instruction = 'python run_glue_no_trainer_modded.py'
        tag_string = f"""'[{','.join([f'"{s}"' for s in tags])}]'"""
        env_variables = f'CUDA_VISIBLE_DEVICES={device} WANBD_TAGS={tag_string} WANDB_RUNNAME="{run_name}" '
        instruction = env_variables + f'python run_glue_no_trainer_modded.py'

    for k, v in value_args.items():
        instruction += f' --{k} {v}'

    for k, v in boolean_args.items():
        if v:
            instruction += f' --{k}'

    return instruction

In [70]:
import pandas as pd
INDEX_FILE = 'run_index.csv'
zfill = 4
def get_param_config_id(param_dict, insert_new=True):
    try:
        pc_index = pd.read_csv(INDEX_FILE, index_col='id')
    except FileNotFoundError: 
        pc_index = pd.DataFrame()
        pc_index.index.name='id'

    
    if len(pc_index) == 0:
        new_row = pd.DataFrame(param_dict,index = [0])
        
    else: 
        new_row = pd.DataFrame(param_dict,index = [pc_index.index.max()+1])
    new_row.index.name = 'id'
    # new_row.name = 0
    pc_index = pd.concat([pc_index,new_row],join='outer')

    duplicate_rows = pc_index.duplicated(keep=False)
    if duplicate_rows.any():
        pcid = duplicate_rows.idxmax()
        pc_index = pc_index.drop_duplicates()
    else:
        pcid = new_row.index[0]

    if insert_new:
        pc_index.to_csv(INDEX_FILE)
        
    return f'pcid_{str(pcid).zfill(zfill)}'

In [71]:
def determine_run_type(list_of_value_args):
    if 'catch_dropout' in list_of_value_args[0]:
        return "modded"
    else:
        return "vanilla"

# Create combinations

In [72]:
from itertools import product
keys, values = zip(*value_args.items())
list_of_value_args = [dict(zip(keys, v)) for v in product(*values)]
print(len(list_of_value_args))
list_of_value_args[0]

45


{'task_name': 'qnli',
 'model_name_or_path': 'bert-base-uncased',
 'per_device_train_batch_size': 32,
 'per_device_eval_batch_size': 128,
 'weight_decay': 0.01,
 'lr_scheduler_type': 'linear',
 'warmup_steps_fraction': 0.1,
 'report_to': 'all',
 'insert_dropout': 0.04,
 'training_size': 1024,
 'beta1': 0.9,
 'beta2': 0.999,
 'early_stopping_patience': 5,
 'num_train_epochs': 25,
 'learning_rate': 6e-05,
 'evaluation_steps': 32,
 'dataset_seed': 1}

# Generate instructions

In [73]:
from math import log, ceil
import json
sweep_type = determine_run_type(list_of_value_args)
print(f'Detected sweep type: {sweep_type}')
n_runs = len(list_of_value_args)*len(seeds)
magnitude = ceil(log(n_runs,10))
print(f'Number of experiments in this run in the range of 10^{magnitude} ({n_runs})')
sweep_identifier = f'{GENERATION}_{str(SWEEP_ID).zfill(2)}'

with open(f'instructions_{sweep_identifier}_config.json',WRITE_MODE) as f:
    json.dump(value_args | {"seeds":seeds},f)

with open(f'instructions_{sweep_identifier}.txt',WRITE_MODE, newline='\n') as f:
    sweep_identifier += f'_{sweep_type}'
    for idx, value_args in enumerate(list_of_value_args): # Iterate over param constellations
        value_args["run_generation"] = GENERATION # set generation arg
        param_config_id = get_param_config_id(value_args) # get pcid
        value_args["param_config_id"] = param_config_id # set pcid to args
        for idy, seed in enumerate(seeds):
            value_args["seed"] = seed # update current seed to args

            run_id = str(idx*len(seeds)+idy).zfill(magnitude)
            output_dir = f'{base_dir}/{sweep_identifier}/run_{run_id}'
            value_args["output_dir"] = output_dir
            try:
                run_name = RUN_NAME.format(**{'run_id':run_id})
            except:
                run_name = RUN_NAME

            instruction = compose_instruction(
                value_args = value_args, 
                boolean_args = boolean_args,
                run_name = run_name,
                use_accelerate = use_accelerate,
                device=DEVICE,
                tags=TAGS+[sweep_type]
            )
            f.write(instruction + '\n')
            print(instruction)

Detected sweep type: vanilla
Number of experiments in this run in the range of 10^3 (135)
CUDA_VISIBLE_DEVICES=1 WANBD_TAGS='["SWP1","NEW_GRAD_OUTPUT","vanilla"]' WANDB_RUNNAME="19_S1_ID_000_VAN" python run_glue_no_trainer_modded.py --task_name qnli --model_name_or_path bert-base-uncased --per_device_train_batch_size 32 --per_device_eval_batch_size 128 --weight_decay 0.01 --lr_scheduler_type linear --warmup_steps_fraction 0.1 --report_to all --insert_dropout 0.04 --training_size 1024 --beta1 0.9 --beta2 0.999 --early_stopping_patience 5 --num_train_epochs 25 --learning_rate 6e-05 --evaluation_steps 32 --dataset_seed 1 --run_generation 19_sweeps --param_config_id pcid_1368 --seed 1 --output_dir /home/lange/fantastic-umbrella/runs/19_sweeps_01_vanilla/run_000 --with_tracking
CUDA_VISIBLE_DEVICES=1 WANBD_TAGS='["SWP1","NEW_GRAD_OUTPUT","vanilla"]' WANDB_RUNNAME="19_S1_ID_001_VAN" python run_glue_no_trainer_modded.py --task_name qnli --model_name_or_path bert-base-uncased --per_device_trai

CUDA_VISIBLE_DEVICES=1 WANBD_TAGS='["SWP1","NEW_GRAD_OUTPUT","vanilla"]' WANDB_RUNNAME="19_S1_ID_006_VAN" python run_glue_no_trainer_modded.py --task_name qnli --model_name_or_path bert-base-uncased --per_device_train_batch_size 32 --per_device_eval_batch_size 128 --weight_decay 0.01 --lr_scheduler_type linear --warmup_steps_fraction 0.1 --report_to all --insert_dropout 0.04 --training_size 1024 --beta1 0.9 --beta2 0.999 --early_stopping_patience 5 --num_train_epochs 25 --learning_rate 2e-05 --evaluation_steps 32 --dataset_seed 1 --run_generation 19_sweeps --param_config_id pcid_1370 --seed 1 --output_dir /home/lange/fantastic-umbrella/runs/19_sweeps_01_vanilla/run_006 --with_tracking
CUDA_VISIBLE_DEVICES=1 WANBD_TAGS='["SWP1","NEW_GRAD_OUTPUT","vanilla"]' WANDB_RUNNAME="19_S1_ID_007_VAN" python run_glue_no_trainer_modded.py --task_name qnli --model_name_or_path bert-base-uncased --per_device_train_batch_size 32 --per_device_eval_batch_size 128 --weight_decay 0.01 --lr_scheduler_type l

In [74]:
# # tests 256 base
# GENERATION = "16_sweeps"
# SWEEP_ID = '0'
# RUN_NAME = f"16_S{SWEEP_ID}" + "_ID_{run_id}"
# TAGS = [f'SWP{SWEEP_ID}','NEW_GRAD_OUTPUT']
# value_args = {
#     'task_name' : ['qnli'],#["qnli"], #["cola","sst2","mrpc","stsb","qqp","mnli","rte"],
#     'model_name_or_path' : ['bert-base-uncased'],
#     'per_device_train_batch_size' : [32],
#     'per_device_eval_batch_size' : [128],    
#     'weight_decay' : [0.01], 
#     'lr_scheduler_type' : ['linear'],
#     'warmup_steps_fraction' : [0.1],
#     'report_to' : ['all'],
#     'insert_dropout' : [0.04, 0.08, 0.1, 0.12, 0.16, 0.2, 0.24, 0.28, 0.32],
#     'catch_dropout' : [0],
#     'training_size' : [256],
#     'beta1' : [0.9],
#     'beta2' : [0.999],
#     'early_stopping_patience' : [10],
#     'num_train_epochs' : [100], 
#     'learning_rate': [6e-5, 4e-5, 2e-5, 1e-5, 8e-6],
#     'evaluation_steps': [16],
#     'dataset_seed' : [1]
# }
# seeds = [1,2,3]

In [75]:
# # tests 1024 base
# GENERATION = "19_experiments"
# SWEEP_ID = '0'
# RUN_NAME = "19_E0_EP_{num_train_epochs}"
# TAGS = [f'EXPT{SWEEP_ID}','NEW_GRAD_OUTPUT']
# value_args = {
#     'task_name' : ['qnli'],#["qnli"], #["cola","sst2","mrpc","stsb","qqp","mnli","rte"],
#     'model_name_or_path' : ['bert-base-uncased'],
#     'per_device_train_batch_size' : [32],
#     'per_device_eval_batch_size' : [128],    
#     'weight_decay' : [0.01], 
#     'lr_scheduler_type' : ['linear'],
#     'warmup_steps_fraction' : [0.1],
#     'report_to' : ['all'],
#     'insert_dropout' : [0.1],
#     'catch_dropout' : [0],
#     'training_size' : [1024],
#     'beta1' : [0.9],
#     'beta2' : [0.999],
#     'early_stopping_patience' : [200],
#     'num_train_epochs' : [6,12,25, 50, 100, 200], 
#     'learning_rate': [3e-5],
#     'evaluation_steps': [32],
#     'dataset_seed' : [1]
# }
# seeds = [1,2,3]

In [76]:
# # sweep 64 base
# GENERATION = "18_experiments"
# SWEEP_ID = '2C'
# RUN_NAME = "18_E2C_EP_{num_train_epochs}_LR_{learning_rate}"
# TAGS = [f'EXPT{SWEEP_ID}','NEW_GRAD_OUTPUT'] # ,'alternateGrad'
# value_args = {
#     'task_name' : ['qnli'], #["qnli","cola","sst2","mrpc","stsb","qqp","mnli","rte"],
#     'model_name_or_path' : ['bert-base-uncased'],
#     'per_device_train_batch_size' : [32],
#     'per_device_eval_batch_size' : [128],    
#     'weight_decay' : [0.01], 
#     'lr_scheduler_type' : ['linear'],
#     'warmup_steps_fraction' : [0.1],
#     'report_to' : ['all'],
#     'insert_dropout' : [0.1],
#     'catch_dropout' : [0],
#     'training_size' : [64],
#     'beta1' : [0.9],
#     'beta2' : [0.999],
#     'early_stopping_patience' : [6400],
#     'num_train_epochs' : [100,200,400,800,1600,3200], 
#     'learning_rate': [3e-5],
#     'evaluation_steps': [4],
#     "dataset_seed" : [1]
# }
# seeds = [2,3]