# Set global parameters

In [10]:
use_accelerate = False
num_devices = 2 if use_accelerate else 1

GENERATION = "13_bert_base_paper"
SWEEP_ID = 0
base_dir = '/home/lange/fantastic-umbrella/runs'

boolean_args = {
    'with_tracking' : [True],
}

# Set run(s) parameters

In [11]:
value_args ={
    'task_name' : ["cola","sst2","mrpc","stsb","stsb","qqp","mnli","rte"],
    'model_name_or_path' : ['bert-base-uncased'],
    'per_device_train_batch_size' : [32],
    'per_device_eval_batch_size' : [32],
    
    'weight_decay' : [0.01],
    
    # 'max_train_steps' : [20000],
    'lr_scheduler_type' : ['linear'],
    # 'num_warmup_steps' : [4000],
    'warmup_steps_fraction' : [0.1],
    # 'seed' : [1,2,3,4,5],
    'report_to' : ['all'],
    'insert_dropout' : [0.1],
    # 'catch_dropout' : [0,0.02],
    'training_size' : [1],
    'beta1' : [0.9],
    'beta2' : [0.999],
    'early_stopping_patience' : [5],
    'early_stopping_min_delta' : [0],
    'original_gradient_fraction' : [0],
    'num_train_epochs' : [3],
    'learning_rate': [5e-5,4e-5,3e-5,2e-5]
}
seeds = [1,2,3,4,5]

In [12]:
# import wandb
# from pprint import pprint
# api = wandb.Api()

# pcids = []
# run_names = []
# for run in api.runs("ricu/fantastic-umbrella"):
#     if ('sweep_12a_00' in run.name) | ('sweep_12a_01' in run.name):
#         run.config["run_generation"] = GENERATION
#         run.config["learning_rate"] /= 2
#         pcid = get_param_config_id(
#             {k: run.config[k] for k in (list(value_args) + ['run_generation'])},
#             insert_new=False
#         )
#         run.config["param_config_id"] = pcid
#         run.update()


# Util functions

In [13]:
def compose_instruction(value_args, boolean_args, use_accelerate=False):
    if use_accelerate:
        instruction = 'accelerate launch run_glue_no_trainer_modded.py'
    else:
        instruction = 'python run_glue_no_trainer_modded.py'

    for k, v in value_args.items():
        instruction += f' --{k} {v}'

    for k, v in boolean_args.items():
        if v:
            instruction += f' --{k}'

    return instruction

In [14]:
import pandas as pd
INDEX_FILE = 'run_index.csv'
zfill = 4
def get_param_config_id(param_dict, insert_new=True):
    try:
        pc_index = pd.read_csv(INDEX_FILE, index_col='id')
    except FileNotFoundError: 
        pc_index = pd.DataFrame()
        pc_index.index.name='id'

    
    if len(pc_index) == 0:
        new_row = pd.DataFrame(param_dict,index = [0])
        
    else: 
        new_row = pd.DataFrame(param_dict,index = [pc_index.index.max()+1])
    new_row.index.name = 'id'
    # new_row.name = 0
    pc_index = pd.concat([pc_index,new_row],join='outer')

    duplicate_rows = pc_index.duplicated(keep=False)
    if duplicate_rows.any():
        pcid = duplicate_rows.idxmax()
        pc_index = pc_index.drop_duplicates()
    else:
        pcid = new_row.index[0]

    if insert_new:
        pc_index.to_csv(INDEX_FILE)
        
    return f'pcid_{str(pcid).zfill(zfill)}'

In [15]:
def determine_run_type(list_of_value_args):
    if 'catch_dropout' in list_of_value_args[0]:
        return "modded"
    else:
        return "vanilla"

# Create combinations

In [16]:
from itertools import product
keys, values = zip(*value_args.items())
list_of_value_args = [dict(zip(keys, v)) for v in product(*values)]
print(len(list_of_value_args))
list_of_value_args[0]

32


{'task_name': 'cola',
 'model_name_or_path': 'bert-base-uncased',
 'per_device_train_batch_size': 32,
 'per_device_eval_batch_size': 32,
 'weight_decay': 0.01,
 'lr_scheduler_type': 'linear',
 'warmup_steps_fraction': 0.1,
 'report_to': 'all',
 'insert_dropout': 0.1,
 'training_size': 1,
 'beta1': 0.9,
 'beta2': 0.999,
 'early_stopping_patience': 5,
 'early_stopping_min_delta': 0,
 'original_gradient_fraction': 0,
 'num_train_epochs': 3,
 'learning_rate': 5e-05}

# Generate instructions

In [17]:
from math import log, ceil
sweep_type = determine_run_type(list_of_value_args)
print(f'Detected sweep type: {sweep_type}')
magnitude = ceil(log(len(list_of_value_args)*len(seeds),10))
print(f'Number of experiments in this run in the range of 10^{magnitude}')
sweep_identifier = f'{GENERATION}_{str(SWEEP_ID).zfill(2)}'




with open(f'instructions_{sweep_identifier}.txt','w', newline='\n') as f:
    sweep_identifier += f'_{sweep_type}'
    for idx, value_args in enumerate(list_of_value_args): # Iterate over param constellations
        value_args["run_generation"] = GENERATION # set generation arg
        param_config_id = get_param_config_id(value_args) # get pcid
        value_args["param_config_id"] = param_config_id # set pcid to args
        for idy, seed in enumerate(seeds):
            value_args["seed"] = seed # update current seed to args
            
            
            




            run_id = str(idx+idy).zfill(magnitude)
            output_dir = f'{base_dir}/sweep_{sweep_identifier}/run_{run_id}'
            value_args["output_dir"] = output_dir

            instruction = compose_instruction(
                value_args = value_args, 
                boolean_args = boolean_args,
                use_accelerate = use_accelerate
            )

            f.write(instruction + '\n')


Detected sweep type: vanilla
Number of experiments in this run in the range of 10^3


In [18]:
# run_id = 0
# run_type = 'single'


# output_dir = f'{base_dir}/{run_name}'

# value_args ={
#     'task' : 'rte',
#     'model_name_or_path' : 'bert-base-cased',
#     'per_device_train_batch_size' : 32 // num_devices,
#     'per_device_eval_batch_size' : 16 // num_devices,
#     'learning_rate': 1e-6,
#     'weight_decay' : 0,
#     'num_train_epochs' : 100000,
#     'max_train_steps' : 100000,
#     'lr_scheduler_type' : 'linear',
#     'num_warmup_steps' : 500,
#     'output_dir' : output_dir,
#     'seed' : 0,
#     'report_to' : 'all',
#     'insert_dropout' : 0.1,
#     'catch_dropout' : 0,
#     'training_size' : 32,
#     'beta1' : 0.9,
#     'beta2' : 0.999,
#     'early_stopping_patience' : 5000,
#     'early_stopping_min_delta' : 0,
#     'original_gradient_fraction' : 0
# }
# boolean_args = {
#     'with_tracking' : True,

# }

# # Compose instruction
# instruction = compose_instruction(value_args = value_args, 
#                                   boolean_args = boolean_args,
#                                   use_accelerate = use_accelerate)

# print(instruction)
# # Write instruction
# with open(f'instruction_{GENERATION}_{run_id}.txt', 'w') as f:
#     f.write(instruction)