Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Seq2Seq网络(对示例网络的部分layer做了修改),报Check failed: size != 0 allocate 0 bytes,输入数据和batch数正常非空,麻烦帮忙看下原因,谢谢 #10187

Closed
wangshuohuan opened this issue Apr 25, 2018 · 11 comments
Labels
User 用于标记用户问题

Comments

@wangshuohuan
Copy link

wangshuohuan commented Apr 25, 2018

网络:

# edit-mode: -*- python -*-

import sys
import os
from paddle.trainer_config_helpers import *

def seq_to_seq_data(data_dir,
                    is_generating,
                    dict_size=30000,
                    train_list='social_network.train.list',
                    test_list='social_network.test.list',
                    gen_list='social_network.gen.list',
                    src_dict_file='social_network.src.dict',
                    trg_dict_file='social_network.trg.dict',
                    gen_result='gen_result'):
    """
    Predefined seqToseq train data provider for application
    is_generating: whether this config is used for generating
    dict_size: word count of dictionary
    train_list: a text file containing a list of training data
    test_list: a text file containing a list of testing data
    gen_list: a text file containing a list of generating data
    gen_result: a text file containing generating result
    """
    src_lang_dict = os.path.join(data_dir, src_dict_file)
    trg_lang_dict = os.path.join(data_dir, trg_dict_file)

    if is_generating:
        train_list = None
        test_list = os.path.join(data_dir, gen_list)
    else:
        train_list = os.path.join(data_dir, train_list)
        test_list = os.path.join(data_dir, test_list)

    define_py_data_sources2(
        train_list,
        test_list,
        module="dataprovider",
        obj="process",
        args={
            "src_dict_path": src_lang_dict,
            "trg_dict_path": trg_lang_dict,
            "is_generating": is_generating
        })

    return {
        "src_dict_path": src_lang_dict,
        "trg_dict_path": trg_lang_dict,
        "gen_result": gen_result
    }


def lstm_encoder_decoder(data_conf,
                        is_generating,
                        word_vector_dim=100,
                        encoder_size=200,
                        decoder_size=200,
                        beam_size=3,
                        max_length=250):
    """
    A wrapper for an attention version of GRU Encoder-Decoder network
    is_generating: whether this config is used for generating
    encoder_size: dimension of hidden unit in GRU Encoder network
    decoder_size: dimension of hidden unit in GRU Decoder network
    word_vector_dim: dimension of word vector
    beam_size: expand width in beam search
    max_length: a stop condition of sequence generation
    """
    for k, v in data_conf.iteritems():
        globals()[k] = v
    source_dict_dim = len(open(src_dict_path, "r").readlines())
    target_dict_dim = len(open(trg_dict_path, "r").readlines())
    gen_trans_file = gen_result

    src_word_id = data_layer(name='source_language_word', size=source_dict_dim)
    src_embedding = embedding_layer(
        input=src_word_id,
        size=word_vector_dim,
        param_attr=ParamAttr(name='_source_language_embedding'))
    src_forward = simple_lstm(input=src_embedding, size=encoder_size)
    src_backward = simple_lstm(
        input=src_embedding, size=encoder_size, reverse=True)
    encoded_vector = concat_layer(input=[src_forward, src_backward])

    with mixed_layer(size=decoder_size) as encoded_proj:
        encoded_proj += full_matrix_projection(input=encoded_vector)

    backward_first = first_seq(input=src_backward)
    forward_last = last_seq(input=src_forward)

    join_first_and_last = concat_layer(input = [backward_first, forward_last])

    with mixed_layer(
            size=decoder_size,
            act=TanhActivation()) as decoder_boot:
        decoder_boot += full_matrix_projection(input = join_first_and_last)

    def lstm_decoder_with_attention(enc_vec, enc_proj, current_word):
        decoder_mem = memory(
            name='lstm_decoder', size=decoder_size, boot_layer=decoder_boot)

        with mixed_layer(size=enc_vec.size, name="attention_transform_1") as m:
            m += full_matrix_projection(
                 decoder_mem, param_attr=None)

        expanded = expand_layer(
            input=m, expand_as=enc_vec, name='attention_expand')

        with mixed_layer(size=1, name="attention_transform_2") as attention_before_normalize: 
            attention_before_normalize += full_matrix_projection(expanded, param_attr=None)

        attention_after_normalize = sum_to_one_norm_layer(attention_before_normalize)

        weighted_part_encoded_proj = scaling_layer(
            weight = attention_after_normalize,
            input = enc_vec,
            name = "scaling")
 
        weighted_encoded_proj = pooling_layer(input=weighted_part_encoded_proj, pooling_type=SumPooling(), name="vsum")
        
        concat_state_and_weighted = concat_layer([decoder_mem, weighted_encoded_proj])     
        concat_current_state_weighted = concat_layer([concat_layer([current_word, weighted_encoded_proj]), decoder_mem])

        with mixed_layer(size=decoder_size * 4, name="lstm_mix") as decoder_inputs:
            decoder_inputs += full_matrix_projection(input=concat_state_and_weighted)

        lstm_step = lstm_step_layer(
            name='lstm_decoder',
            input=decoder_inputs,
            state=decoder_mem,
            size=decoder_size)

        with mixed_layer(
                size=target_dict_dim, bias_attr=True, name="final",
                act=SoftmaxActivation()) as out:
            out += full_matrix_projection(input=concat_state_and_weighted)

        return out

    decoder_group_name = "decoder_group"
    group_inputs = [
        StaticInput(
            input=encoded_vector, is_seq=True), StaticInput(
                input=encoded_proj, is_seq=True)
    ]
    if not is_generating:
        trg_embedding = embedding_layer(
            input=data_layer(
                name='target_language_word', size=target_dict_dim),
            size=word_vector_dim,
            param_attr=ParamAttr(name='_target_language_embedding'))
        group_inputs.append(trg_embedding)

        # For decoder equipped with attention mechanism, in training,
        # target embeding (the groudtruth) is the data input,
        # while encoded source sequence is accessed to as an unbounded memory.
        # Here, the StaticInput defines a read-only memory
        # for the recurrent_group.
        decoder = recurrent_group(
            name=decoder_group_name,
            step=lstm_decoder_with_attention,
            input=group_inputs)

        lbl = data_layer(name='target_language_next_word', size=target_dict_dim)
        cost = classification_cost(input=decoder, label=lbl)
        outputs(cost)
    else:
        # In generation, the decoder predicts a next target word based on
        # the encoded source sequence and the last generated target word.

        # The encoded source sequence (encoder's output) must be specified by
        # StaticInput, which is a read-only memory.
        # Embedding of the last generated word is automatically gotten by
        # GeneratedInputs, which is initialized by a start mark, such as <s>,
        # and must be included in generation.

        trg_embedding = GeneratedInput(
            size=target_dict_dim,
            embedding_name='_target_language_embedding',
            embedding_size=word_vector_dim)
        group_inputs.append(trg_embedding)

        beam_gen = beam_search(
            name=decoder_group_name,
            step=lstm_decoder_with_attention,
            input=group_inputs,
            bos_id=0,
            eos_id=1,
            beam_size=beam_size,
            max_length=max_length)

        seqtext_printer_evaluator(
            input=beam_gen,
            id_input=data_layer(
                name="sent_id", size=1),
            dict_file=trg_dict_path,
            result_file=gen_trans_file)

        outputs(beam_gen)

错误:

F0425 13:04:17.944658 11056 MemoryHandle.cpp:49] Check failed: size != 0  allocate 0 bytes
*** Check failure stack trace: ***
    @           0x62620d  google::LogMessage::Fail()
    @           0x629cbc  google::LogMessage::SendToLog()
    @           0x625d33  google::LogMessage::Flush()
    @           0x62b1ce  google::LogMessageFatal::~LogMessageFatal()
    @           0x908699  paddle::CpuMemoryHandle::CpuMemoryHandle()
    @           0x9301f7  paddle::CpuMatrix::CpuMatrix()
    @           0x9304a6  paddle::Matrix::create()
    @           0x940121  paddle::Matrix::resizeOrCreate()
    @           0x6e3bc7  paddle::Layer::resetSpecifyOutput()
    @           0x6e3e44  paddle::Layer::resetOutput()
    @           0x6c84f1  paddle::MixedLayer::forward()
    @           0x773a1d  paddle::NeuralNetwork::forward()
    @           0x795344  paddle::TrainerThread::forward()
    @           0x7965a5  paddle::TrainerThread::computeThread()
    @       0x318eab6470  (unknown)
    @       0x318b207851  (unknown)
    @       0x318aee767d  (unknown)
    @              (nil)  (unknown)

gdb堆栈:


#0  0x000000318ae328a5 in raise () from /lib64/libc.so.6
#1  0x000000318ae34085 in abort () from /lib64/libc.so.6
#2  0x000000000062f645 in google::DumpStackTraceAndExit() ()
#3  0x000000000062620d in google::LogMessage::Fail() ()
#4  0x0000000000629cbc in google::LogMessage::SendToLog() ()
#5  0x0000000000625d33 in google::LogMessage::Flush() ()
#6  0x000000000062b1ce in google::LogMessageFatal::~LogMessageFatal() ()
#7  0x0000000000908699 in paddle::CpuMemoryHandle::CpuMemoryHandle(unsigned long) ()
#8  0x00000000009301f7 in paddle::CpuMatrix::CpuMatrix(unsigned long, unsigned long, bool) ()
#9  0x00000000009304a6 in paddle::Matrix::create(unsigned long, unsigned long, bool, bool) ()
#10 0x0000000000940121 in paddle::Matrix::resizeOrCreate(std::shared_ptr<paddle::Matrix>&, unsigned long, unsigned long, bool, bool) ()
#11 0x00000000006e3bc7 in paddle::Layer::resetSpecifyOutput(paddle::Argument&, unsigned long, unsigned long, bool, bool) ()
#12 0x00000000006e3e44 in paddle::Layer::resetOutput(unsigned long, unsigned long) ()
#13 0x00000000006c84f1 in paddle::MixedLayer::forward(paddle::enumeration_wrapper::PassType) ()
#14 0x0000000000773a1d in paddle::NeuralNetwork::forward(std::vector<paddle::Argument, std::allocator<paddle::Argument> > const&, std::vector<paddle::Argument, std::allocator<paddle::Argument> >*, paddle::enumeration_wrapper::PassType) ()
#15 0x0000000000795344 in paddle::TrainerThread::forward() ()
#16 0x00000000007965a5 in paddle::TrainerThread::computeThread() ()
#17 0x000000318eab6470 in ?? () from /usr/lib64/libstdc++.so.6
#18 0x000000318b207851 in start_thread () from /lib64/libpthread.so.0
#19 0x000000318aee767d in clone () from /lib64/libc.so.6
@wangshuohuan wangshuohuan changed the title Seq2Seq网络(对示例网络的部分layer做了修改),报Check failed: size != 0 allocate 0 bytes,输入数据正常非空,麻烦帮忙看下原因 Seq2Seq网络(对示例网络的部分layer做了修改),报Check failed: size != 0 allocate 0 bytes,输入数据正常非空,麻烦帮忙看下原因,谢谢 Apr 25, 2018
@wangshuohuan wangshuohuan changed the title Seq2Seq网络(对示例网络的部分layer做了修改),报Check failed: size != 0 allocate 0 bytes,输入数据正常非空,麻烦帮忙看下原因,谢谢 Seq2Seq网络(对示例网络的部分layer做了修改),报Check failed: size != 0 allocate 0 bytes,输入数据和batch数正常非空,麻烦帮忙看下原因,谢谢 Apr 25, 2018
@typhoonzero typhoonzero added the User 用于标记用户问题 label Apr 25, 2018
@typhoonzero
Copy link
Contributor

  1. 建议贴出来原始代码的链接和修改部分的代码片段,而不是把所有代码贴出来
  2. 建议检查data layer相关的size是否设置正确
  3. 您使用的是paddle最老的版本的配置方法了,建议升级到fluid

@wangshuohuan
Copy link
Author

wangshuohuan commented Apr 25, 2018

好的,以下是修改部分的代码,主要是改了attention
def lstm_decoder_with_attention(enc_vec, enc_proj, current_word):
decoder_mem = memory(
name='lstm_decoder', size=decoder_size, boot_layer=decoder_boot)

    with mixed_layer(size=enc_vec.size, name="attention_transform_1") as m:
        m += full_matrix_projection(
             decoder_mem, param_attr=None)

    expanded = expand_layer(
        input=m, expand_as=enc_vec, name='attention_expand')

    with mixed_layer(size=1, name="attention_transform_2") as attention_before_normalize: 
        attention_before_normalize += full_matrix_projection(expanded, param_attr=None)

    attention_after_normalize = sum_to_one_norm_layer(attention_before_normalize)

    weighted_part_encoded_proj = scaling_layer(
        weight = attention_after_normalize,
        input = enc_vec,
        name = "scaling")

    weighted_encoded_proj = pooling_layer(input=weighted_part_encoded_proj, pooling_type=SumPooling(), name="vsum")
    
    concat_state_and_weighted = concat_layer([decoder_mem, weighted_encoded_proj])     
    concat_current_state_weighted = concat_layer([concat_layer([current_word, weighted_encoded_proj]), decoder_mem])

    with mixed_layer(size=decoder_size * 4, name="lstm_mix") as decoder_inputs:
        decoder_inputs += full_matrix_projection(input=concat_current_state_weighted)

    lstm_step = lstm_step_layer(
        name='lstm_decoder',
        input=decoder_inputs,
        state=decoder_mem,
        size=decoder_size)

    with mixed_layer(
            size=target_dict_dim, bias_attr=True, name="final",
            act=SoftmaxActivation()) as out:
        out += full_matrix_projection(input=concat_state_and_weighted)

    return out

@typhoonzero
Copy link
Contributor

可以把修改部分的size都先print下,看是否有为0的

@wangshuohuan
Copy link
Author

我把几个mix_layer的size都print了,没有为0的

@typhoonzero
Copy link
Contributor

paddle Matrix allocate的size就是 height * width,有一个为0会导致这个问题。建议检查的方法:

  • 检查训练数据是否存在输入空sequence之类的问题
  • 逐步简化模型配置,用排除法调试找到是哪些层配置的问题

@wangshuohuan
Copy link
Author

好的,谢谢,请问下fluid版本的seq2seq网络可以在哪里能找到呢?

@typhoonzero
Copy link
Contributor

@wangshuohuan
Copy link
Author

wangshuohuan commented Apr 25, 2018

语言模型和这个差别比较大。Seq2Seq的,只有Transformer吗

@typhoonzero
Copy link
Contributor

目前没有现成的代码。

@wangshuohuan
Copy link
Author

旧版本中,两个同长度的序列向量相乘,应该用什么layer呢?

@typhoonzero
Copy link
Contributor

Please check out the latest code under models repo to get newly released models.

Closing due to low activity, feel free to reopen.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
User 用于标记用户问题
Projects
None yet
Development

No branches or pull requests

2 participants