We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
#!/usr/bin/env python # -*- coding: utf-8 -*- """ # Author : yuanxiangxie # Email : # Create Time : 2018-07-06 17:29 # Description : # Copyright (c) 2017 . All Rights Reserved """ import os import sys reload(sys) sys.setdefaultencoding('utf-8') import paddle.v2 as paddle import gzip import getopt import pickle import shutil is_online = False offline_data_dir = "." train_data_dir = os.path.join(offline_data_dir, "train_data_dir") test_data_dir = os.path.join(offline_data_dir, "test_data_dir") word_dict_file = os.path.join("thirdparty", "word_dict.pkl") if is_online: online_data_dir = "." online_train_data_dir = os.path.join(online_data_dir, "train_data_dir") online_test_data_dir = os.path.join(online_data_dir, "test_data_dir") train_data_dir = os.path.join(online_train_data_dir, "train_data") test_data_dir = os.path.join(online_test_data_dir, "test_data") word_dict_file = os.path.join("thirdparty", "thirdparty/word_dict.pkl") hidden_size = 128 train_buf_size = 128 train_batch_size = 64 test_batch_size = 64 word_len_size = 106 num_passes = 1 word_dict = pickle.load(open(word_dict_file, "r")) word_dict_unk = word_dict["<UNK>"] word_dict_size = len(word_dict) + 16 model_save_dir = os.path.join(".", "model_dir") emb_share_attr = paddle.attr.Param(name = "emb_share_attr") class ctr_model(object): """ paddlepaddle模型 """ def __init__(self): """ 初始化函数 """ paddle.init( use_gpu = False, trainer_count = int(os.getenv("PADDLE_TRAINER_COUNT", "1")), port=int(os.getenv("PADDLE_PORT", "22535")), ports_num=int(os.getenv("PADDLE_PORTS_NUM", "1")), num_gradient_servers=int(os.getenv("PADDLE_NUM_GRADIENT_SERVERS", "1")), trainer_id=int(os.getenv("PADDLE_TRAINER_ID", "0")), pservers=os.getenv("PADDLE_PSERVERS", "127.0.0.1") ) def __parse_data_reader(self, line_list, is_infer = False): """ 解析读取进来的数据,统一格式要求 """ content = [int(i_item) for i_item in line_list[1].split(' ')] word = int(line_list[2]) word_len = int(line_list[3]) label = int(line_list[4]) if is_infer: return content, word, word_len else: return content, word, word_len, [label] def data_reader(self, data_dir): """ 读入数据 """ def _data_reader(): """ 读入数据的迭代器 """ for file_name in os.listdir(data_dir): with open(os.path.join(data_dir, file_name), "r") as in_file: for line in in_file: line = line.strip('\n') line_list = line.split('\t') if len(line_list) != 5: continue (content, word, word_len, label) = self.__parse_data_reader(line_list, False) yield (content, word, word_len, label) return _data_reader def get_content_feature(self, share_attr): """ 得到文本的特征 """ content_digit = paddle.layer.data(name = "content", type = paddle.data_type.integer_value_sequence(word_dict_size)) content_emb = paddle.layer.embedding(input = content_digit, size = 128, param_attr = share_attr) content_conv_3 = paddle.networks.sequence_conv_pool(input = content_emb, hidden_size = 128, context_len = 3) content_conv_5 = paddle.networks.sequence_conv_pool(input = content_emb, hidden_size = 128, context_len = 5) content_conv_7 = paddle.networks.sequence_conv_pool(input = content_emb, hidden_size = 128, context_len = 7) content_fc_1 = paddle.layer.fc(input = [content_conv_3, content_conv_5, content_conv_7], size = 256) content_dropout_1 = paddle.layer.dropout(input = content_fc_1, dropout_rate = 0.5) content_fc_2 = paddle.layer.fc(input = content_dropout_1, size = 128) return content_fc_2 def get_word_feature(self, share_attr): """ 得到词语的特征 """ word_digit = paddle.layer.data(name = "word", type = paddle.data_type.integer_value(word_dict_size)) word_emb = paddle.layer.embedding(input = word_digit, size = 128, param_attr = share_attr) word_len_digit = paddle.layer.data(name = "word_len", type = paddle.data_type.integer_value(word_len_size)) word_len_emb = paddle.layer.embedding(input = word_len_digit, size = 128) word_concat_feature = paddle.layer.concat(input = [word_emb, word_len_emb]) word_concat_feature_fc_1 = paddle.layer.fc(input = word_concat_feature, size = 128) word_concat_feature_dropout_1 = paddle.layer.dropout(input = word_concat_feature_fc_1, dropout_rate = 0.5) word_concat_feature_fc_2 = paddle.layer.fc(input = word_concat_feature_dropout_1, size = 128) return word_concat_feature_fc_2 def build_model_feature(self): """ 构建模型特征 """ content_feature = self.get_content_feature(emb_share_attr) word_feature = self.get_word_feature(emb_share_attr) return content_feature, word_feature def train_model(self): """ 训练模型 """ train_data_reader = paddle.batch(paddle.reader.shuffle(self.data_reader(train_data_dir), buf_size = train_buf_size), batch_size = train_batch_size) test_data_reader = paddle.batch(self.data_reader(test_data_dir), batch_size = test_batch_size) (content_feature, word_feature) = self.build_model_feature() label = paddle.layer.data(name = "label", type = paddle.data_type.dense_vector(1)) inference = paddle.layer.cos_sim(a = content_feature, b = word_feature, size = 1) cost = paddle.layer.square_error_cost(input = (inference + 1.0) * 0.5, label = label) parameters = paddle.parameters.create(cost) adam_optimizer = paddle.optimizer.Adam( learning_rate=1e-3, regularization=paddle.optimizer.L2Regularization(rate=1e-3), model_average=paddle.optimizer.ModelAverage(average_window=0.5, max_average_window = 10000)) trainer = paddle.trainer.SGD( cost = cost, extra_layers=paddle.evaluator.classification_error(input= (inference + 1.0) * 0.5, label=label), parameters=parameters, update_equation=adam_optimizer) feeding = { "content": 0, "word": 1, "word_len": 2, "label": 3 } def event_handler(event): """ 事件监听 """ if isinstance(event, paddle.event.EndIteration): if event.batch_id % 10 == 0: print >> sys.stderr, "[NOTICE] Pass:{} Batch:{} Cost:{:.2f} {}".format(event.pass_id, event.batch_id, event.cost, event.metrics) if isinstance(event, paddle.event.EndPass): if test_data_reader is not None: result = trainer.test(reader = test_data_reader, feeding = feeding) print >> sys.stderr, "[NOTICE] Test at Pass:{} {}".format(event.pass_id, result.metrics) if not os.path.exists(model_save_dir): os.mkdir(model_save_dir) with gzip.open(os.path.join(model_save_dir, "ctr_model_{}.tar.gz".format(event.pass_id)), "w") as out_file: trainer.save_parameter_to_tar(out_file) print >> sys.stderr, "[NOTICE] move model to output dir ..." shutil.move(model_save_dir, "output/model_params") print >> sys.stderr, "[NOTICE] embedding feature building finished ..." print >> sys.stderr, "[NOTICE] train ctr model start ..." trainer.train( reader = train_data_reader, event_handler = event_handler, feeding = feeding, num_passes = num_passes) print >> sys.stderr, "[NOTICE] train ctr model finished ..." if __name__ == '__main__': ctr_model_function = ctr_model() ctr_model_function.train_model()
The text was updated successfully, but these errors were encountered:
请参考这里:http://www.paddlepaddle.org/documentation/api/en/0.12.0/config/evaluators.html#print
Sorry, something went wrong.
嗯,好的。我试试看~
您好,此issue若已经解决,我们将于三天内关闭。若在关闭后您仍需跟进 提问,可重新开启此问题, 我们将在24小时内回复您。因关闭带来的不便我们深表歉意,请您谅解~感谢您对PaddlePaddle的支持!
wanghaoshuang
No branches or pull requests
代码如下所示,如果想要输出slope_interception layer的output和gradient该在哪里添加信息(想要追梯度计算问题)?因为在训练模型的时候遇到图片所示梯度计算问题
The text was updated successfully, but these errors were encountered: