In [1]:
args = {'dataset': 'fb15k',
        'vocab_size' : 16396,
        'num_relations': 1345,
        
        # training hyper-paramters
        'batch_size':512,
        'learning_rate':5e-4,
        'epoch':400,
        'soft_label':0.8,
        'skip_steps':1000,
        'max_seq_len':3,
        'hidden_dropout_prob':0.1,
        'attention_probs_dropout_prob':0.1,
        
        # file paths for training and evaluation 
        'data':"./data",
        # OUTPUT="./output_"+dataset
        'train_file':"./data/train.coke.txt",
        # VALID_FILE="./data/valid.coke.txt"
        # TEST_FILE="./data/test.coke.txt"
        'vocab_path':"./data/vocab.txt",
        'true_triple_path':"./data/all.txt",
        
        # transformer (default) config
        'hidden_size':256,
        'num_hidden_layers':12,
        'num_attention_heads':4,
        'max_position_embeddings':40}

In [2]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

In [18]:
import argparse
from bin.utils.args import ArgumentGroup, print_arguments

In [19]:
parser = argparse.ArgumentParser()

# class ArgumentGroup(parser, title, des, **kwargs)
model_g = ArgumentGroup(parser, "model", "model configuration and paths.")

# ArgumentGroup().add_arg(name, type, default, help)
model_g.add_arg("hidden_size",              int, 256,            "CoKE model config: hidden size, default 256")
model_g.add_arg("num_hidden_layers",        int, 6,              "CoKE model config: num_hidden_layers, default 6")
model_g.add_arg("num_attention_heads",      int, 4,              "CoKE model config: num_attention_heads, default 4")
model_g.add_arg("vocab_size",               int, -1,           "CoKE model config: vocab_size")
model_g.add_arg("num_relations",         int, None,           "CoKE model config: vocab_size")
model_g.add_arg("max_position_embeddings",  int, 10,             "CoKE model config: max_position_embeddings")
model_g.add_arg("hidden_act",               str, "gelu",         "CoKE model config: hidden_ac, default gelu")
model_g.add_arg("hidden_dropout_prob",      float, 0.1,          "CoKE model config: attention_probs_dropout_prob, default 0.1")
model_g.add_arg("attention_probs_dropout_prob", float, 0.1,      "CoKE model config: attention_probs_dropout_prob, default 0.1")
model_g.add_arg("initializer_range",        int, 0.02,           "CoKE model config: initializer_range")
model_g.add_arg("intermediate_size",        int, 512,            "CoKE model config: intermediate_size, default 512")
model_g.add_arg("init_checkpoint",          str,  None,          "Init checkpoint to resume training from, or for prediction only")
model_g.add_arg("init_pretraining_params",  str,  None,          "Init pre-training params which preforms fine-tuning from. If the "
                 "arg 'init_checkpoint' has been set, this argument wouldn't be valid.")
model_g.add_arg("checkpoints",              str,  "checkpoints", "Path to save checkpoints.")
model_g.add_arg("weight_sharing",           bool, True,          "If set, share weights between word embedding and masked lm.")


In [20]:
train_g = ArgumentGroup(parser, "training", "training options.")

train_g.add_arg("epoch",             int,    100,                "Number of epoches for training.")
train_g.add_arg("learning_rate",     float,  5e-5,               "Learning rate used to train with warmup.")
train_g.add_arg("lr_scheduler",     str, "linear_warmup_decay",  "scheduler of learning rate.", 
                choices=['linear_warmup_decay', 'noam_decay'])
train_g.add_arg("soft_label",               float, 0.9,          "Value of soft labels for loss computation")
train_g.add_arg("weight_decay",      float,  0.01,               "Weight decay rate for L2 regularizer.")
train_g.add_arg("warmup_proportion", float,  0.1,                "Proportion of training steps to perform linear learning rate warmup for.")
train_g.add_arg("use_ema",           bool,   True,               "Whether to use ema.")
train_g.add_arg("ema_decay",         float,  0.9999,             "Decay rate for expoential moving average.")
train_g.add_arg("use_fp16",          bool,   False,              "Whether to use fp16 mixed precision training.")
train_g.add_arg("loss_scaling",      float,  1.0,                "Loss scaling factor for mixed precision training, only valid when use_fp16 is enabled.")

In [13]:
# log_g = ArgumentGroup(parser, "logging", "logging related.")

# log_g.add_arg("skip_steps",          int,    1000,               "The steps interval to print loss.")
# log_g.add_arg("verbose",             bool,   False,              "Whether to output verbose log.")

In [21]:
data_g = ArgumentGroup(parser, "data", "Data paths, vocab paths and data processing options")
data_g.add_arg("dataset",                   str,   "",    "dataset name")
data_g.add_arg("train_file",                str,   None,  "Data for training.")
data_g.add_arg("sen_candli_file",           str,   None,  "sentence_candicate_list file for path query evaluation. Only used for path query datasets")
data_g.add_arg("sen_trivial_file",           str,   None,  "trivial sentence file for pathquery evaluation. Only used for path query datasets")
data_g.add_arg("predict_file",              str,   None,  "Data for predictions.")
data_g.add_arg("vocab_path",                str,   None,  "Path to vocabulary.")
data_g.add_arg("true_triple_path",          str,   None,  "Path to all true triples. Only used for KBC evaluation.")
data_g.add_arg("max_seq_len",               int,   3,     "Number of tokens of the longest sequence.")
data_g.add_arg("batch_size",                int,   12,    "Total examples' number in batch for training. see also --in_tokens.")
data_g.add_arg("in_tokens",                 bool,  False,
               "If set, the batch size will be the maximum number of tokens in one batch. "
               "Otherwise, it will be the maximum number of examples in one batch.")

In [16]:
# run_type_g = ArgumentGroup(parser, "run_type", "running type options.")
# run_type_g.add_arg("do_train",                     bool,   False,  "Whether to perform training.")
# run_type_g.add_arg("do_predict",                   bool,   False,  "Whether to perform prediction.")
# run_type_g.add_arg("use_cuda",                     bool,   True,   "If set, use GPU for training, default is True.")
# run_type_g.add_arg("use_fast_executor",            bool,   False,  "If set, use fast parallel executor (in experiment).")
# run_type_g.add_arg("num_iteration_per_drop_scope", int,    1,      "Ihe iteration intervals to clean up temporary variables.")

ArgumentError: argument --do_train: conflicting option string: --do_train

In [22]:
# args = parser.parse_args()

usage: ipykernel_launcher.py [-h] [--hidden_size HIDDEN_SIZE] [--num_hidden_layers NUM_HIDDEN_LAYERS]
                             [--num_attention_heads NUM_ATTENTION_HEADS] [--vocab_size VOCAB_SIZE]
                             [--num_relations NUM_RELATIONS] [--max_position_embeddings MAX_POSITION_EMBEDDINGS]
                             [--hidden_act HIDDEN_ACT] [--hidden_dropout_prob HIDDEN_DROPOUT_PROB]
                             [--attention_probs_dropout_prob ATTENTION_PROBS_DROPOUT_PROB]
                             [--initializer_range INITIALIZER_RANGE] [--intermediate_size INTERMEDIATE_SIZE]
                             [--init_checkpoint INIT_CHECKPOINT] [--init_pretraining_params INIT_PRETRAINING_PARAMS]
                             [--checkpoints CHECKPOINTS] [--weight_sharing WEIGHT_SHARING] [--epoch EPOCH]
                             [--learning_rate LEARNING_RATE] [--lr_scheduler {linear_warmup_decay,noam_decay}]
                             [--soft_label SOFT_LABE

SystemExit: 2

In [24]:
model_g['hidden_size']

TypeError: 'ArgumentGroup' object is not subscriptable