# Random Search of Parameters

Method which takes in a dictionary of parameter values, and produces `n` sets of values through random sampling

To get within *a*% of the true maximum, you need to run $n > \frac{log(a)}{log(1-a)}$ trials [[ref]](https://stats.stackexchange.com/questions/160479/practical-hyperparameter-optimization-random-vs-grid-search). 

To get within 10% of the true maximum we need to run $n > 21.85$ trials. 

In [17]:
import numpy as np
import itertools
import datetime

script_folder = "./dgl-ke/scripts"

## 1. Get parameters

In [23]:
params = {
    "lr": [0.001, 0.01, 0.1],
    "neg_adv": [True, False],
    "hidden_dim": [50, 100, 250, 400],
    "norm": ['l1', 'l2'],
    "regularization_coef": [2e-6, 2e-8],
    "gamma": [1, 5, 10, 20],
}

In [24]:
def get_random_samples(params, n, replacement=False, seed=42):
    all_keys = params.keys()
    all_vals = [v for k,v in params.items()]
    combinations = list(itertools.product(*all_vals))
    
    rnd = np.random.RandomState(seed)
    res_list = [combinations[i] for i in rnd.choice(len(combinations), n, replace=replacement)]
    all_res = []
    
    for p in res_list:
        all_res.append( {k: p[i] for (i,k) in enumerate(all_keys)} )
        
    return all_res

In [25]:
n = 22
samples = get_random_samples(params, n)
samples[0:3]

[{'lr': 0.1,
  'neg_adv': True,
  'hidden_dim': 50,
  'norm': 'l2',
  'regularization_coef': 2e-08,
  'gamma': 1},
 {'lr': 0.01,
  'neg_adv': False,
  'hidden_dim': 400,
  'norm': 'l2',
  'regularization_coef': 2e-06,
  'gamma': 10},
 {'lr': 0.1,
  'neg_adv': False,
  'hidden_dim': 250,
  'norm': 'l1',
  'regularization_coef': 2e-06,
  'gamma': 20}]

## 2. Run DGL-KE on each of the parameter sets

In [22]:
# fixed params
DATA_PATH="~/data"
SAVE_PATH="~/data/results"
DATASET="hc1708"
FORMAT="raw_udd_htr"

LOG_INTERVAL=10000
BATCH_SIZE=1000
BATCH_SIZE_EVAL=16
NEG_SAMPLE_SIZE=200


hc1808


In [None]:
for idx, s in enumerate(samples[0:1]):
    print(f"---TEST {idx+1}---")
    
    filename = f"./run_{idx+1}.txt"
    model_name = f"TransE_{s['norm']}"
    neg_adv_flag = '-adv' if s['neg_adv'] else ''

    ! dglke_train --model_name {model_name} --data_path {DATA_PATH} --save_path {SAVE_PATH}  --dataset {DATASET}  --format {FORMAT} \
    --data_files train.txt valid.txt test.txt \
    --log_interval {LOG_INTERVAL} --batch_size {BATCH_SIZE} --batch_size_eval {BATCH_SIZE_EVAL} --neg_sample_size {NEG_SAMPLE_SIZE} \
    --lr {s['lr']} {neg_adv_flag} --hidden_dim {s['hidden_dim']} -rc {s['regularization_coef']} -g {s['gamma']}
    --gpu 0 --test --mix_cpu_gpu --async_update |& tee {filename}
    