# Set global parameters

In [11]:
use_accelerate = False
WRITE_MODE = 'w' # 'w' for sweeps, 'a' for tests
num_devices = 2 if use_accelerate else 1

GENERATION = "16_256_modded_base"
SWEEP_ID = 0
DEVICE = 0
base_dir = '/home/lange/fantastic-umbrella/runs'
TAGS = []

boolean_args = {
    'with_tracking' : [True],
}

# Set run(s) parameters

In [12]:
import numpy as np
l_learning_rate = list(1e-6 * np.array([3,5,7,9,10,30,50]))
print(l_learning_rate)
numbers = list(np.array([2,5,8]))
l_weight_decay = [0.01] #np.concatenate([0.001 * numbers, 0.01 * numbers, np.array([0.1])]) # 0.01
print(l_weight_decay)
l_insert_dropout = list(np.arange(0.1,0.25, 0.02)) # 0.01
print(l_insert_dropout) 
l_catch_dropout = [0.0] 
print(l_catch_dropout) 
l_beta_1 = [0.9]# np.arange(0.8,1,0.02) # 0.9
print(l_beta_1)
l_beta_2 = [0.999,0.9999]#[0.9,0.99,0.999,0.9999] # 0.999
print(l_beta_2)
l_num_train_epochs = [300,600]
print(l_num_train_epochs)

[3e-06, 4.9999999999999996e-06, 7e-06, 9e-06, 9.999999999999999e-06, 2.9999999999999997e-05, 4.9999999999999996e-05]
[0.01]
[0.1, 0.12000000000000001, 0.14, 0.16000000000000003, 0.18000000000000002, 0.2, 0.22000000000000003, 0.24000000000000002]
[0.0]
[0.9]
[0.999, 0.9999]
[300, 600]


In [13]:
value_args ={
    'task_name' : ['qnli'],#["qnli"], #["cola","sst2","mrpc","stsb","qqp","mnli","rte"],
    'model_name_or_path' : ['bert-base-uncased'],
    'per_device_train_batch_size' : [32],
    'per_device_eval_batch_size' : [128],    
    'weight_decay' : [0.01],   #l_weight_decay,#[0.01],
    # 'max_train_steps' : [15000],
    'lr_scheduler_type' : ['linear'],
    # 'num_warmup_steps' : [4000],
    'warmup_steps_fraction' : [0.1],
    'report_to' : ['all'],
    'insert_dropout' : [0.1],#l_insert_dropout,#[0.1],
    # 'catch_dropout' : l_catch_dropout,#[0,0.02],
    'training_size' : [256],
    'beta1' : [0.9], #l_beta_1,#[0.9],
    'beta2' : [0.999],# l_beta_2,#[0.999],
    'early_stopping_patience' : [1000],
    # 'early_stopping_min_delta' : [0],
    # 'original_gradient_fraction' : [0],
    'num_train_epochs' : [500], #l_num_train_epochs,
    'learning_rate': [2e-5],# [5e-5,4e-5,3e-5,2e-5]l_learning_rate,
    # 'evaluation_steps': [128]
}
# seeds = [0,1,2,3,4]
seeds = [42]

In [14]:
# import wandb
# from pprint import pprint
# api = wandb.Api()

# pcids = []
# run_names = []
# for run in api.runs("ricu/fantastic-umbrella"):
#     if ('sweep_12a_00' in run.name) | ('sweep_12a_01' in run.name):
#         run.config["run_generation"] = GENERATION
#         run.config["learning_rate"] /= 2
#         pcid = get_param_config_id(
#             {k: run.config[k] for k in (list(value_args) + ['run_generation'])},
#             insert_new=False
#         )
#         run.config["param_config_id"] = pcid
#         run.update()


# Util functions

In [15]:
def compose_instruction(value_args, boolean_args, use_accelerate=False, device=0,tags=[]):
    if use_accelerate:
        instruction = 'accelerate launch run_glue_no_trainer_modded.py'
    else:
        # instruction = 'python run_glue_no_trainer_modded.py'
        tag_string = f"""'[{','.join([f'"{s}"' for s in tags])}]'"""
        env_variables = f'CUDA_VISIBLE_DEVICES={device} WANBD_TAGS={tag_string} '
        instruction = env_variables + f'python run_glue_no_trainer_modded.py'

    for k, v in value_args.items():
        instruction += f' --{k} {v}'

    for k, v in boolean_args.items():
        if v:
            instruction += f' --{k}'

    return instruction

In [16]:
import pandas as pd
INDEX_FILE = 'run_index.csv'
zfill = 4
def get_param_config_id(param_dict, insert_new=True):
    try:
        pc_index = pd.read_csv(INDEX_FILE, index_col='id')
    except FileNotFoundError: 
        pc_index = pd.DataFrame()
        pc_index.index.name='id'

    
    if len(pc_index) == 0:
        new_row = pd.DataFrame(param_dict,index = [0])
        
    else: 
        new_row = pd.DataFrame(param_dict,index = [pc_index.index.max()+1])
    new_row.index.name = 'id'
    # new_row.name = 0
    pc_index = pd.concat([pc_index,new_row],join='outer')

    duplicate_rows = pc_index.duplicated(keep=False)
    if duplicate_rows.any():
        pcid = duplicate_rows.idxmax()
        pc_index = pc_index.drop_duplicates()
    else:
        pcid = new_row.index[0]

    if insert_new:
        pc_index.to_csv(INDEX_FILE)
        
    return f'pcid_{str(pcid).zfill(zfill)}'

In [17]:
def determine_run_type(list_of_value_args):
    if 'catch_dropout' in list_of_value_args[0]:
        return "modded"
    else:
        return "vanilla"

# Create combinations

In [18]:
from itertools import product
keys, values = zip(*value_args.items())
list_of_value_args = [dict(zip(keys, v)) for v in product(*values)]
print(len(list_of_value_args))
list_of_value_args[0]

1


{'task_name': 'qnli',
 'model_name_or_path': 'bert-base-uncased',
 'per_device_train_batch_size': 32,
 'per_device_eval_batch_size': 128,
 'weight_decay': 0.01,
 'lr_scheduler_type': 'linear',
 'warmup_steps_fraction': 0.1,
 'report_to': 'all',
 'insert_dropout': 0.1,
 'training_size': 256,
 'beta1': 0.9,
 'beta2': 0.999,
 'early_stopping_patience': 1000,
 'early_stopping_min_delta': 0,
 'original_gradient_fraction': 0,
 'num_train_epochs': 500,
 'learning_rate': 2e-05}

In [19]:
# import random
# random.shuffle((list_of_value_args))
# list_of_value_args = list_of_value_args[:200]

# Generate instructions

In [20]:
from math import log, ceil
import json
sweep_type = determine_run_type(list_of_value_args)
print(f'Detected sweep type: {sweep_type}')
n_runs = len(list_of_value_args)*len(seeds)
magnitude = ceil(log(n_runs,10))
print(f'Number of experiments in this run in the range of 10^{magnitude} ({n_runs})')
sweep_identifier = f'{GENERATION}_{str(SWEEP_ID).zfill(2)}'

with open(f'instructions_{sweep_identifier}_config.json',WRITE_MODE) as f:
    json.dump(value_args | {"seeds":seeds},f)

with open(f'instructions_{sweep_identifier}.txt',WRITE_MODE, newline='\n') as f:
    sweep_identifier += f'_{sweep_type}'
    for idx, value_args in enumerate(list_of_value_args): # Iterate over param constellations
        value_args["run_generation"] = GENERATION # set generation arg
        param_config_id = get_param_config_id(value_args) # get pcid
        value_args["param_config_id"] = param_config_id # set pcid to args
        for idy, seed in enumerate(seeds):
            value_args["seed"] = seed # update current seed to args

            run_id = str(idx*len(seeds)+idy).zfill(magnitude)
            output_dir = f'{base_dir}/{sweep_identifier}/run_{run_id}'
            value_args["output_dir"] = output_dir

            instruction = compose_instruction(
                value_args = value_args, 
                boolean_args = boolean_args,
                use_accelerate = use_accelerate,
                device=DEVICE,
                tags=TAGS+[sweep_type]
            )
            f.write(instruction + '\n')
            print(instruction)

Detected sweep type: vanilla
Number of experiments in this run in the range of 10^0 (1)
CUDA_VISIBLE_DEVICES=0 WANBD_TAGS='["vanilla"]' python run_glue_no_trainer_modded.py --task_name qnli --model_name_or_path bert-base-uncased --per_device_train_batch_size 32 --per_device_eval_batch_size 128 --weight_decay 0.01 --lr_scheduler_type linear --warmup_steps_fraction 0.1 --report_to all --insert_dropout 0.1 --training_size 256 --beta1 0.9 --beta2 0.999 --early_stopping_patience 1000 --early_stopping_min_delta 0 --original_gradient_fraction 0 --num_train_epochs 500 --learning_rate 2e-05 --run_generation 16_256_modded_base --param_config_id pcid_0291 --seed 42 --output_dir /home/lange/fantastic-umbrella/runs/16_256_modded_base_00_vanilla/run_0 --with_tracking
