In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

from tensorflow.keras import layers,optimizers
from tensorflow import keras
import tensorflow as tf
import pickle


class textCNN(keras.Model):
    def __init__(self, hidden_size, vocab_size, filter_size, N_gram_list, n_class):
        super(textCNN, self).__init__()
        self.embedder = layers.Embedding(input_dim=vocab_size, output_dim=hidden_size)
        self.concat_hidden = len(N_gram_list * filter_size)

        self.fc = layers.Dense(n_class, input_shape=[self.concat_hidden, ])

        self.convlayers = []
        for kernel in N_gram_list:
            self.convlayers.append(layers.Conv1D(activation='relu', filters=filter_size, kernel_size=kernel, strides=1))

    def call(self, x):
        x_embed = self.embedder(x)
        x_convolved = [c(x_embed) for c in self.convlayers]
        x_concatenated = None

        for xc in x_convolved:
            x_mapped = tf.reduce_max(xc, axis=1)
            if x_concatenated is None:
                x_concatenated = x_mapped
            else:
                x_concatenated = tf.concat([x_concatenated, x_mapped], axis=1)

        logits=self.fc(x_concatenated)

        return logits

@tf.function
def compute_loss(predict, true):
    return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(true, predict))

@tf.function
def compute_accuracy(predict, true):
    predictions = tf.argmax(predict, axis=-1)
    return tf.reduce_mean(tf.cast(tf.equal(predictions, true), tf.float32))


@tf.function
def train_one_step(model, optimizer, x, y):
    with tf.GradientTape() as tape:
        predict = model(x)
        loss = compute_loss(predict, y)

    grads = tape.gradient(loss, model.trainable_variables)

    optimizer.apply_gradients(zip(grads, model.trainable_variables))

    accuracy = compute_accuracy(predict, y)

    return loss, accuracy


def train(epoch, model, optimizer, train_data):
    print("train!!")
    loss = 0.0
    accuracy = 0.0
    for e in range(epoch):
        for step, (x, y) in enumerate(train_data):
            loss, accuracy = train_one_step(model, optimizer, x, y)
            if ((step + 1) % 10 == 0):
                print("epoch:", e + 1, " step:", step + 1, " loss: {:0.5}".format(loss.numpy()),
                      " accuracy:  {:0.4}".format(accuracy.numpy()))
        print()
        print("epoch:", e + 1, " loss: {:0.5}".format(loss.numpy()), " accuracy:  {:0.4}".format(accuracy.numpy()))
        print()
    return loss, accuracy




#for debug

def main():

    with open("./data/token_data.pickle", 'rb') as f:
        data = pickle.load(f)

    question = data["question"]
    word2idx=data["word2idx"]
    X_train = [a[0:-1] for a in question]
    # y_train= [a[1:] for a in question]

    X_train = tf.cast(keras.preprocessing.sequence.pad_sequences(X_train, maxlen=20), tf.int64)
    y_train = tf.random.uniform(dtype=tf.int64, maxval=2, minval=0, shape=[60407])
    train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train))

    train_ds = train_ds.shuffle(3000).batch(300)

    model = textCNN(N_gram_list=[2, 3, 4], filter_size=30, hidden_size=300, vocab_size=len(word2idx), n_class=2)

    optimizer = optimizers.Adam()

    train(epoch=30, model=model, optimizer=optimizer, train_data=train_ds)


if __name__=="__main__":
    main()


train!!
epoch: 1  step: 10  loss: 0.69463  accuracy:  0.4967
epoch: 1  step: 20  loss: 0.69164  accuracy:  0.56
epoch: 1  step: 30  loss: 0.69749  accuracy:  0.47
epoch: 1  step: 40  loss: 0.70364  accuracy:  0.4667
epoch: 1  step: 50  loss: 0.69433  accuracy:  0.4667
epoch: 1  step: 60  loss: 0.69338  accuracy:  0.4867
epoch: 1  step: 70  loss: 0.6911  accuracy:  0.53
epoch: 1  step: 80  loss: 0.69924  accuracy:  0.4767
epoch: 1  step: 90  loss: 0.69774  accuracy:  0.4833
epoch: 1  step: 100  loss: 0.69215  accuracy:  0.5267
epoch: 1  step: 110  loss: 0.69069  accuracy:  0.5433
epoch: 1  step: 120  loss: 0.68876  accuracy:  0.5567
epoch: 1  step: 130  loss: 0.69362  accuracy:  0.5233
epoch: 1  step: 140  loss: 0.70431  accuracy:  0.4367
epoch: 1  step: 150  loss: 0.69441  accuracy:  0.5167
epoch: 1  step: 160  loss: 0.69516  accuracy:  0.52
epoch: 1  step: 170  loss: 0.69209  accuracy:  0.4967
epoch: 1  step: 180  loss: 0.69781  accuracy:  0.52
epoch: 1  step: 190  loss: 0.69139  accu

KeyboardInterrupt: 

In [2]:
from __future__ import absolute_import, division, print_function, unicode_literals

from tensorflow import keras
import tensorflow as tf
from preprocessing import preprocessing

tf.keras.backend.clear_session()


class Encoder(keras.layers.Layer):
    def __init__(self, vocab_size, hidden_size):
        super(Encoder, self).__init__()

        self.embedder = keras.layers.Embedding(input_dim=vocab_size, output_dim=hidden_size)
        self.lstm = keras.layers.Bidirectional(keras.layers.LSTM(units=hidden_size, return_sequences=True, return_state=True))

    def call(self, src):
        src_embed = self.embedder(src)
        h = self.lstm(src_embed)

        return h[0], h[1:]

def dot_product_attention(query,value,mask=None):
    score = tf.matmul(a=query, b=value, transpose_b=True)

    if mask is not None:
        score += tf.cast(tf.expand_dims(mask, axis=1), tf.float32) * -1e9

    attn_score=tf.nn.softmax(score,axis=-1)

    return attn_score



class Decoder(keras.layers.Layer):
    def __init__(self, vocab_size, hidden_size):
        super(Decoder, self).__init__()

        self.embedder = keras.layers.Embedding(input_dim=vocab_size, output_dim=hidden_size)
        self.lstm = keras.layers.LSTM(units=hidden_size, return_sequences=True, return_state=True)

        self.attention = dot_product_attention

    def call(self, src_hidden, trg, mask,previous_hidden):

        trg_embed = self.embedder(trg)

        trg_embed=tf.expand_dims(trg_embed,axis=1)

        trg_hidden,hidden,cell = self.lstm(trg_embed,previous_hidden)



        attn_score = self.attention(query=trg_hidden, value=src_hidden, mask=mask)

        weighted_sum = tf.matmul(attn_score, src_hidden)


        return tf.concat([weighted_sum, trg_hidden], axis=-1),(hidden,cell)


class Seq2Seq(tf.keras.Model):
    def __init__(self, src_size, trg_size, hidden_size):
        super(Seq2Seq, self).__init__()
        self.encoder = Encoder(vocab_size=src_size, hidden_size=hidden_size)
        self.decoder = Decoder(vocab_size=trg_size, hidden_size=hidden_size)

        self.fc = keras.layers.Dense(units=hidden_size)
        self.classifier = keras.layers.Dense(units=trg_size)
        self.trg_size = trg_size

    def call(self, src, trg, training=False):

        n_batch = trg.shape[0]
        trg_seq_lens = trg.shape[1]

        src_hidden, hc = self.encoder(src)


        src_hidden_ = self.fc(src_hidden)

        mask = tf.equal(0, src)
        previous_hidden=self.fc(tf.concat([hc[0],hc[2]],axis=-1))
        previous_cell=self.fc(tf.concat([hc[1],hc[3]],axis=-1))

        previous_=(previous_hidden,previous_cell)

        logits = None

        seq_generated=tf.expand_dims(trg[:,0],axis=1)

        if training:
            for t in range(trg_seq_lens-1):
                dec_state,previous_= self.decoder.call(trg=trg[:, t], src_hidden=src_hidden_, mask=mask,previous_hidden=previous_)

                if logits is not None:
                    logits=tf.concat([logits,self.classifier(dec_state)],axis=1)
                else:
                    logits= self.classifier(dec_state)

            return logits

        else:
            for t in range(trg_seq_lens-1):
                dec_state,previous_ = self.decoder.call(trg=seq_generated[:, t], src_hidden=src_hidden_, mask=mask,previous_hidden=previous_)

                if logits is not None:
                    logits=tf.concat([logits,self.classifier(dec_state)],axis=1)
                else:
                    logits= self.classifier(dec_state)

                seq_generated=tf.concat([seq_generated,tf.expand_dims(tf.argmax(logits[:, t, :], axis=-1),axis=-1)],axis=-1)

            return logits,seq_generated


@tf.function
def compute_loss(predict,true):
    pre_loss=tf.nn.sparse_softmax_cross_entropy_with_logits(logits=predict,labels=true)
    pre_loss*=tf.cast(tf.logical_not(tf.equal(x=0,y=true)),tf.float32)

    return tf.reduce_mean(pre_loss)

@tf.function
def compute_accuracy(predict,true):
    predictions=tf.cast(tf.argmax(predict,axis=-1),tf.int32)
    return tf.reduce_mean(tf.cast(tf.equal(predictions,true),tf.float32))


@tf.function
def train_one_step(model, optimizer, x, y):
    with tf.GradientTape() as tape:
        predict = model.call(x,y,True)
        loss = compute_loss(predict=predict,true= y[:,1:])

    grads = tape.gradient(loss, model.trainable_variables)

    optimizer.apply_gradients(zip(grads, model.trainable_variables))

    accuracy = compute_accuracy(predict=predict,true= y[:,1:])

    return loss, accuracy


def train(epoch,model,optimizer,train_data):
    print("start train!!")
    loss=0.0
    accuracy=0.0
    for e in range(epoch):
        for step,(x,y) in enumerate(train_data):
            loss,accuracy=train_one_step(model,optimizer,x,y)
            print("epoch:",e+1," step:",step+1," loss: {:0.5}".format(loss.numpy())," accuracy:  {:0.4}".format(accuracy.numpy()))
        print()
        print("epoch:",e+1," loss: {:0.5}".format(loss.numpy())," accuracy:  {:0.4}".format(accuracy.numpy()))
        print()
    return loss,accuracy


def main():
    f_name="./data/token_data.pickle"
    train_ds,word2idx,idx2word=preprocessing(f_name=f_name)
    model = Seq2Seq(src_size=len(word2idx), trg_size=len(word2idx), hidden_size=300)
    train(epoch=30, model=model, optimizer=keras.optimizers.Adam(), train_data=train_ds)

if __name__ =="__main__":
    main()

start train!!


UnknownError:  [_Derived_]  Fail to find the dnn implementation.
	 [[{{node CudnnRNN}}]]
	 [[encoder/bidirectional/forward_lstm/StatefulPartitionedCall]]
	 [[concat_242/_350]]
	Encountered when executing an operation using EagerExecutor. This error cancels all future operations and poisons their output tensors. [Op:__inference_train_one_step_173720]

Function call stack:
train_one_step -> train_one_step -> train_one_step
