Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

训练序列标注模型用于预测新数据,发现预测结果都是空 #2441

Closed
April1010 opened this issue Jun 12, 2017 · 3 comments
Closed
Assignees

Comments

@April1010
Copy link

April1010 commented Jun 12, 2017

模型配置如下,基本和book中的07是一样的。不一样的只有1.数据特征(增加了一些其他特征),2.word_embedding没有加载预先的配置。

 def train(data_reader, args):
     paddle.init(use_gpu=False, trainer_count=args.trainer_count)                          
     
     # define network topology
     feature_out = db_lstm(
             stag_dict_size=data_reader.get_dict_size('stag_dict'),                        
             predicate_dict_size=data_reader.get_dict_size('predicate_dict'),              
             word_dict_size=data_reader.get_dict_size('word_dict'),                        
             label_dict_size=data_reader.get_dict_size('label_dict'),
             mix_hidden_lr=args.mix_hidden_lr,
             default_std=args.default_std,
             hidden_dim=args.hidden_dim,                                                   
             word_dim=args.word_dim,                                                       
             mark_dim=args.mark_dim,
             stag_dim=args.stag_dim,                                                       
             num_lstm_layers=args.num_lstm_layers)                                         
             
     target = paddle.layer.data(name='target', type=d_type(data_reader.get_dict_size('label_dict')))
     crf_cost = paddle.layer.crf(
         size=data_reader.get_dict_size('label_dict'),                                     
         input=feature_out,
         label=target,                                                                     
         param_attr=paddle.attr.Param(
             name='crfw', initial_std=args.default_std, learning_rate=args.mix_hidden_lr)) 
     
     crf_dec = paddle.layer.crf_decoding(                                                  
         size=data_reader.get_dict_size('label_dict'),                                     
         input=feature_out,                                                                
         label=target,
         param_attr=paddle.attr.Param(name='crfw'))
     evaluator.sum(input=crf_dec)
                 
     # create parameters                                                                   
     parameters = paddle.parameters.create(crf_cost)                                       
     # create optimizer
     optimizer = paddle.optimizer.Momentum(
         momentum=0,
         learning_rate=2e-2,
         regularization=paddle.optimizer.L2Regularization(rate=8e-4),
         model_average=paddle.optimizer.ModelAverage(
             average_window=0.5, max_average_window=10000), )
 
     is_local_flag=True if args.is_local > 0 else False
     train_batch_reader = paddle.batch(\
             paddle.reader.shuffle(data_reader.get_train_reader(is_local_flag), buf_size=8192), \
                 batch_size=args.batch_size)
     test_batch_reader=paddle.batch(data_reader.get_test_reader(is_local_flag), batch_size=args.batch_size)
     feeding=data_reader.data_name_feeding()
 
     trainer = paddle.trainer.SGD(
         cost=crf_cost,
         parameters=parameters,
         update_equation=optimizer,
         extra_layers=crf_dec)
 
     def event_handler(event):
         global start_time, cost_sum, cost_counter
         if isinstance(event, paddle.event.EndIteration):
             cost_sum += event.cost
             cost_counter += 1
             if event.batch_id % 500 == 0:
                 print "Pass %d, Batch %d, Cost %f" % (
                     event.pass_id, event.batch_id, cost_sum / cost_counter)
             else:
                 sys.stdout.write('.')
                 sys.stdout.flush()
 
         if isinstance(event, paddle.event.BeginPass):
             start_time = time.time()
             cost_sum, cost_counter = 0.0, 0
 
         if isinstance(event, paddle.event.EndPass):
             # save parameters
             with gzip.open(args.output_path + '/params_pass_%d.tar.gz' % event.pass_id, 'w') as f:
                 parameters.to_tar(f)
             result = trainer.test(reader=test_batch_reader, feeding=feeding)
             print "\n------- PASS END ------- \n Time: %d sec,  Pass: %d, ValidationCost: %s" % (
                 time.time() - start_time, event.pass_id, result.cost)
 
     trainer.train(
         reader=train_batch_reader,
         event_handler=event_handler,
         num_passes=args.num_passes,
         feeding=feeding)
def db_lstm(stag_dict_size, predicate_dict_size, word_dict_size,                          
        label_dict_size,                                                                  
        mix_hidden_lr, default_std, hidden_dim, word_dim, mark_dim,                       
        stag_dim, num_lstm_layers):                                                       
    #7 features                                                                           
    word = paddle.layer.data(name='word_data', type=d_type(word_dict_size))               
    ctx_n1 = paddle.layer.data(name='ctx_n1_data', type=d_type(word_dict_size))           
    ctx_p1 = paddle.layer.data(name='ctx_p1_data', type=d_type(word_dict_size))           
    predicate = paddle.layer.data(name='predicate_data', type=d_type(predicate_dict_size))
    word_stag = paddle.layer.data(name='word_stag_data', type=d_type(stag_dict_size))     
    predicate_stag = paddle.layer.data(name='predicate_stag_data', \                      
            type=d_type(stag_dict_size))                                                  
    mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_size))               
                                                                                          
    emb_para = paddle.attr.Param(name='emb', initial_std=0.)                              
    std_0 = paddle.attr.Param(initial_std=0.)                                             
    std_default = paddle.attr.Param(initial_std=default_std)                              
                                                                                          
    predicate_embedding = paddle.layer.embedding(                                         
        size=word_dim,                                                                    
        input=predicate,                                                                  
        param_attr=paddle.attr.Param(name='vemb', initial_std=default_std))               
    mark_embedding = paddle.layer.embedding(                                              
        size=mark_dim, input=mark, param_attr=std_0)                                      
    word_stag_embedding = paddle.layer.embedding(                                         
        size=stag_dim, input=word_stag, param_attr=std_0)                                 
    predicate_stag_embedding = paddle.layer.embedding(                                    
        size=stag_dim, input=predicate_stag, param_attr=std_0)                            
                                                                                          
    word_input = [word, ctx_n1, ctx_p1]                                                   
    emb_layers = [                                                                        
        paddle.layer.embedding(size=word_dim, input=x, param_attr=emb_para)               
        for x in word_input                                                               
    ]                                                                                     
    emb_layers.append(predicate_embedding)                                                
    emb_layers.append(word_stag_embedding)                                                
    emb_layers.append(predicate_stag_embedding)                                           
    emb_layers.append(mark_embedding)                                                     
                                                                                          
    hidden_0 = paddle.layer.mixed(                                                        
        size=hidden_dim,                                                                  
        bias_attr=std_default,                                                        
        input=[                                                                       
            paddle.layer.full_matrix_projection(                                      
                input=emb, param_attr=std_default) for emb in emb_layers              
        ])                                                                            
                                                                                      
    lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0)            
    hidden_para_attr = paddle.attr.Param(                                             
        initial_std=default_std, learning_rate=mix_hidden_lr)                         
                                                                                      
    lstm_0 = paddle.layer.lstmemory(                                                  
        input=hidden_0,                                                               
        act=paddle.activation.Relu(),                                                 
        gate_act=paddle.activation.Sigmoid(),                                         
        state_act=paddle.activation.Sigmoid(),                                        
        bias_attr=std_0,                                                              
        param_attr=lstm_para_attr)                                                    
                                                                                      
    #stack L-LSTM and R-LSTM with direct edges                                        
    input_tmp = [hidden_0, lstm_0]                                                    
                                                                                      
    for i in range(1, num_lstm_layers):                                               
        mix_hidden = paddle.layer.mixed(                                              
            size=hidden_dim,                                                          
            bias_attr=std_default,                                                    
            input=[                                                                   
                paddle.layer.full_matrix_projection(                                  
                    input=input_tmp[0], param_attr=hidden_para_attr),                 
                paddle.layer.full_matrix_projection(                                  
                    input=input_tmp[1], param_attr=lstm_para_attr)                    
            ])                                                                        
                                                                                      
        lstm = paddle.layer.lstmemory(                                                
            input=mix_hidden,                                                         
            act=paddle.activation.Relu(),                                             
            gate_act=paddle.activation.Sigmoid(),                                     
            state_act=paddle.activation.Sigmoid(),                                    
            reverse=((i % 2) == 1),                                                   
            bias_attr=std_0,                                                          
            param_attr=lstm_para_attr)                                                
                                                                                      
        input_tmp = [mix_hidden, lstm]                                                
                                                                                      
    feature_out = paddle.layer.mixed(                                                 
        size=label_dict_size,                                                         
        bias_attr=std_default,                                                        
        input=[                                                                       
            paddle.layer.full_matrix_projection(                                      
                input=input_tmp[0], param_attr=hidden_para_attr),                     
            paddle.layer.full_matrix_projection(                                      
                input=input_tmp[1], param_attr=lstm_para_attr)                        
        ], )                                                                          
                                                                                      
    return feature_out         
@qingqing01
Copy link
Contributor

发现预测结果都是空

有预测的log信息吗? 空的指的是啥呢?

@qingqing01 qingqing01 self-assigned this Jun 12, 2017
@April1010
Copy link
Author

  1. 任务的目的是标注句子中给定一个谓语的主语和宾语都是什么,无关的词就是空。在现在的数据中,一个句子中主谓宾数目很少,大部分都是空
  2. feature主要有词序列,词性序列,谓语词,谓语词性
  3. 使用同样的数据(小数据集,只有1000+)去训练和预测,结果预测出来的所有结果都是空
  4. 我猜测是由于训练数据量太少,embedding的时候训练的不充分,后来把feature中的词换成了词向量(nlp产出的那个),但是发现还是一样的问题

@lcy-seso
Copy link
Contributor

lcy-seso commented Jun 12, 2017

请问这里的“空”是指什么?是标注结果全部是“O”,标注不出来主语和宾语吗?
如果是:

  1. 学习曲线如何?拟合情况是否健康?
  2. 用 paddle 的 chunk evaluator 输出 chunk 标注准确率如何?
  3. 输入序列平均长度多少?大部分 label 都是 “O”?
  4. 看一下训练集上的 chunk 识别准确率在多少,如果chunk准确率低,但是cost
    在下降,有可能并没有学到有效的 pattern,下降准确率都在拟合“O” 这个label。
    • 训练数据太少了,从配置看网络参数很多,这么少的参数训练不好一个这样多参数的网络。
    • 我看到你加入了词性标注序列,试试多任务,加强约束。

heavengate pushed a commit to heavengate/Paddle that referenced this issue Aug 16, 2021
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants