# 自动配置参数，帮助自动生成运行文件

In [73]:
import tensorflow as tf

def load_hparams(model_dir):
  """
  Load hparams from an existing model directory.
  """
  hparams_file = os.path.join(model_dir, "hparams")
  if tf.gfile.Exists(hparams_file):
    print_out("# Loading hparams from %s" % hparams_file)
    with codecs.getreader("utf-8")(tf.gfile.GFile(hparams_file, "rb")) as f:
      try:
        hparams_values = json.load(f)
        hparams = tf.contrib.training.HParams(**hparams_values)
      except ValueError:
        print_out("  can't load hparams file")
        return None
    return hparams
  else:
    return None

def load_or_update_configs(config_path, default_dict=dict()):
    """
    Load configs from an existing config file
    """
    configs = default_dict
    try:
        with open(config_path,'r+',encoding='utf-8') as fin:
            print("Load config file: %s" % config_path)
            lines = fin.readlines()
            for line in lines:
                line = line.strip("\n").strip(" ")
                items = line.split('=')
                if line[0] == "#":
                    continue
                elif len(items) != 2:
                    print("Bad line: %s" % line)
                else:
                    key = items[0].strip()
                    value = items[1].strip()
                    configs[key] = value
                    print("%s=%s" % (key,value))
    except Exception:
        return None
    return configs

# Word-Level or single_level

In [74]:
def generate_word_level_model(data_space,model_id,gpu='0',language='chinese',vocab_prefix='vocab',train_prefix='train',test_prefix='test',dev_prefix='dev',preset_configs = dict()):
    config = {
        'vocab_prefix': data_space + vocab_prefix,
        'train_prefix': data_space + train_prefix,
        'test_prefix': data_space + test_prefix,
        'dev_prefix': data_space + dev_prefix,
        'out_dir' : "models/" + model_id ,
    }
    config = load_or_update_configs('configs/basic.config',config)
    if language == 'english':
        config = load_or_update_configs('configs/en_wl_offset.config',config)
    if language == 'chinese_char':
        config = load_or_update_configs('configs/cn_charlevel_offset.config',config)
    if language == 'english_char':
        config = load_or_update_configs('configs/en_charlevel_offset.config',config)
    
    # preset
    for key in preset_configs.keys():
        config[key] = preset_configs[key]
        print("preset: %s=%s" % (key,preset_configs[key]))
    
    with open("../%s.sh" % model_id,'w+',encoding='utf-8') as fin:
        out = "export CUDA_VISIBLE_DEVICES=%s \n\n" % gpu
        out += "python3 -m nrm.nrm  " 
        for key in config:
            out += "    --%s=%s  " % (key, config[key])
        out += "   >> logs/%s.txt \n" % model_id
        # print(out)
        fin.write(out)

In [75]:
generate_word_level_model('/ldev/tensorflow/nmt2/nmt/data/enwordlevel/','enword_gru',language='english',vocab_prefix="vocab.40000.separate")
generate_word_level_model('/ldev/tensorflow/nmt2/nmt/data/wordlevel/','chinese_gru',vocab_prefix="vocab.40000.separate")
generate_word_level_model('/ldev/tensorflow/nmt2/nmt/data/charspace/','chinese_char_gru',language='chinese_char',vocab_prefix="vocab.40000.separate")
generate_word_level_model('/ldev/tensorflow/nmt2/nmt/data/encharspace/','english_char_gru',language='english_char',vocab_prefix="vocab.40000.separate")

Load config file: configs/basic.config
num_units=512
embed_dim=340
num_layers=4
unit_type=gru
share_vocab=False
src_max_len=20
tgt_max_len=20
batch_size=256
dropout=0.3
encoder_type=bi
infer_batch_size=10
attention=luong
src=message
tgt=response
num_train_steps=1000000
steps_per_stats=100
metrics=rouge,bleu-1,bleu-2,bleu-3,bleu-4,distinct-1,distinct-2
Load config file: configs/en_wl_offset.config
src_max_len=30
tgt_max_len=30
Load config file: configs/basic.config
num_units=512
embed_dim=340
num_layers=4
unit_type=gru
share_vocab=False
src_max_len=20
tgt_max_len=20
batch_size=256
dropout=0.3
encoder_type=bi
infer_batch_size=10
attention=luong
src=message
tgt=response
num_train_steps=1000000
steps_per_stats=100
metrics=rouge,bleu-1,bleu-2,bleu-3,bleu-4,distinct-1,distinct-2
Load config file: configs/basic.config
num_units=512
embed_dim=340
num_layers=4
unit_type=gru
share_vocab=False
src_max_len=20
tgt_max_len=20
batch_size=256
dropout=0.3
encoder_type=bi
infer_batch_size=10
attention=l