In [1]:
import tensorflow as tf
from tensorflow.contrib import rnn
from tensorflow.losses import Reduction

class RL_GRU2:
    def __init__(self, input_dim, hidden_dim, max_seq_len, max_word_len, class_num, action_num, sent_num):
        self.input_x = tf.placeholder(tf.float32, [None, max_seq_len, max_word_len, input_dim], name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, class_num], name="input_y")
        self.x_len = tf.placeholder(tf.int32, [None], name="x_len")

        self.sent_x = tf.placeholder(tf.float32, [None, max_word_len, input_dim], name="sent_x")
        self.sent_y = tf.placeholder(tf.float32, [None, sent_num], name="sent_y")
        
        self.init_states = tf.placeholder(tf.float32, [None, hidden_dim], name="topics")
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")

        self.rl_state = tf.placeholder(tf.float32, [None, hidden_dim], name="rl_states")
        self.rl_input = tf.placeholder(tf.float32, [None, max_word_len, input_dim], name="rl_input")
        self.action = tf.placeholder(tf.float32, [None, action_num], name="action")
        self.reward = tf.placeholder(tf.float32, [None], name="reward")

        output_dim = hidden_dim

        # shared pooling layer
        self.w_t = tf.Variable(tf.random_uniform([input_dim, output_dim], -1.0, 1.0), name="w_t")
        self.b_t = tf.Variable(tf.constant(0.01, shape=[output_dim]), name="b_t")
        #[batchsize, max_seq_len, max_word_len, input_dim] --> [batchsize, max_seq_len, output_dim]
        pooled_input_x = self.shared_pooling_layer(self.input_x, input_dim, max_seq_len, max_word_len, output_dim) # replace the shared_pooling_layer with a sentiment analysis model
        pooled_rl_input = self.shared_pooling_layer(self.rl_input, input_dim, 1, max_word_len, output_dim)
        pooled_rl_input = tf.reshape(pooled_rl_input, [-1, output_dim])

        # dropout layer
        pooled_input_x_dp = tf.nn.dropout(pooled_input_x, self.dropout_keep_prob)

        # df model
        df_cell = rnn.GRUCell(output_dim)
        df_cell = rnn.DropoutWrapper(df_cell, output_keep_prob=self.dropout_keep_prob)

        w_tp = tf.constant(0.0, shape=[hidden_dim, output_dim], name="w_tp")
        self.df_state = tf.matmul(self.init_states, w_tp, name="df_state") # w_tp is not an Variable?

        df_outputs, df_last_state = tf.nn.dynamic_rnn(df_cell, pooled_input_x_dp, self.x_len, initial_state=self.df_state, dtype=tf.float32)

        l2_loss = tf.constant(0.0)

        w_ps = tf.Variable(tf.truncated_normal([output_dim, class_num], stddev=0.1)) #
        b_ps = tf.Variable(tf.constant(0.01, shape=[class_num])) #
        l2_loss += tf.nn.l2_loss(w_ps) 
        l2_loss += tf.nn.l2_loss(b_ps) 

        self.pre_scores = tf.nn.xw_plus_b(df_last_state, w_ps, b_ps, name="p_scores")
        self.predictions = tf.argmax(self.pre_scores, 1, name="predictions")

        r_outputs = tf.reshape(df_outputs, [-1, output_dim]) #[batchsize*max_seq_len, output_dim]
        scores_seq = tf.nn.softmax(tf.nn.xw_plus_b(r_outputs, w_ps, b_ps)) # [batchsize * max_seq_len, class_num] 
        self.out_seq = tf.reshape(scores_seq, [-1, max_seq_len, class_num], name="out_seq") #[batchsize, max_seq_len, class_num]

        df_losses = tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.pre_scores, labels=self.input_y)
        self.loss = tf.reduce_mean(df_losses) + 0.1 * l2_loss

        correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
        self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")

        # rl model
        self.rl_output, self.rl_new_state = df_cell(pooled_rl_input, self.rl_state)

        w_ss1 = tf.Variable(tf.truncated_normal([output_dim, 64], stddev=0.01))
        b_ss1 = tf.Variable(tf.constant(0.01, shape=[64]))
        rl_h1 = tf.nn.relu(tf.nn.xw_plus_b(self.rl_state, w_ss1, b_ss1))  # replace the process here

        w_ss2 = tf.Variable(tf.truncated_normal([64, action_num], stddev=0.01))
        b_ss2 = tf.Variable(tf.constant(0.01, shape=[action_num]))

        self.stopScore = tf.nn.xw_plus_b(rl_h1, w_ss2, b_ss2, name="stopScore")

        self.isStop = tf.argmax(self.stopScore, 1, name="isStop")

        out_action = tf.reduce_sum(tf.multiply(self.stopScore, self.action), reduction_indices=1)
        self.rl_cost = tf.reduce_mean(tf.square(self.reward - out_action), name="rl_cost")

        
        # Sentiment Analysis Task
        self.pooled_feat = self.SentCNN(self.sent_x)
        classifier = tf.layers.Dense(sent_num, activation= tf.nn.relu, trainable=True)
        self.sent_scores = tf.nn.softmax(classifier(self.pooled_feat), axis=1)
        self.sent_pred = tf.argmax(self.sent_scores, 1, name="predictions")
        self.sent_loss = tf.losses.softmax_cross_entropy(
                        self.sent_y,
                        self.sent_scores,
                        weights=1.0,
                        label_smoothing=0,
                        scope=None,
                        loss_collection=tf.GraphKeys.LOSSES,
                        reduction=Reduction.SUM_BY_NONZERO_WEIGHTS
                    )
        sent_correct_predictions = tf.equal(self.sent_pred, tf.argmax(self.sent_y, 1))
        self.sent_acc = tf.reduce_mean(tf.cast(sent_correct_predictions, "float"), name="accuracy")


    def shared_pooling_layer(self, inputs, input_dim, max_seq_len, max_word_len, output_dim):
        t_inputs = tf.reshape(inputs, [-1, input_dim])
        # t_h = tf.nn.xw_plus_b(t_inputs, self.w_t, self.b_t)
        t_h = tf.matmul(t_inputs, self.w_t)
        t_h = tf.reshape(t_h, [-1, max_word_len, output_dim])
        t_h_expended = tf.expand_dims(t_h, -1)
        pooled = tf.nn.max_pool(
            t_h_expended,
            ksize=[1, max_word_len, 1, 1],
            strides=[1, 1, 1, 1],
            padding="VALID",
            name="max_pool"
        )
        outs = tf.reshape(pooled, [-1, max_seq_len, output_dim])
        return outs

    def pooling_layer(self, inputs, input_dim, max_seq_len, max_word_len, output_dim):
        t_inputs = tf.reshape(inputs, [-1, input_dim])
        w = tf.Variable(tf.truncated_normal([input_dim, output_dim], stddev=0.1))
        b = tf.Variable(tf.constant(0.01, shape=[output_dim]))

        h = tf.nn.xw_plus_b(t_inputs, w, b)
        hs = tf.reshape(h, [-1, max_word_len, output_dim])

        inputs_expended = tf.expand_dims(hs, -1)
        # [seq, words, out] --> [seq, words, out, 1] --> [seq, 1, out, 1] --> [1, seq, out]
        pooled = tf.nn.max_pool(
            inputs_expended,
            ksize=[1, max_word_len, 1, 1],
            strides=[1, 1, 1, 1],
            padding="VALID",
            name="max_pool"
        )
        cnn_outs = tf.reshape(pooled, [-1, max_seq_len, output_dim]) 
        return cnn_outs

    def SentCNN(self, input_x):
        num_filters = 256
        kernel_size = 5
        conv_input = tf.layers.conv1d(input_x, num_filters, kernel_size, strides=1, padding='valid', name='conv2', trainable=True)
        feature_map = tf.nn.relu(conv_input) # [batchsize, conv_feats, filters]
        pooled_feat = tf.reduce_max(feature_map, 1) #[batchsize, 1, filters]

        return pooled_feat 

In [1]:
import tensorflow as tf
import random
import keras
from tensorflow.contrib import rnn
import numpy as np

class Chars2Vec:
    def __init__(self, emb_dim, char_to_ix):
        if not isinstance(emb_dim, int) or emb_dim < 1:
            raise TypeError("parameter 'emb_dim' must be a positive integer")

        if not isinstance(char_to_ix, dict):
            raise TypeError("parameter 'char_to_ix' must be a dictionary")
            
        self.char_to_ix = char_to_ix
        self.ix_to_char = {char_to_ix[ch]: ch for ch in char_to_ix}
        self.vocab_size = len(self.char_to_ix)
        self.emb_dim = emb_dim
        self.cache = {}
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_rnn")
        self.init_state = tf.placeholder(tf.float32, [None, self.emb_dim], name='init_state')
        
    def embedding(self, input_x, x_len):
        lstm_cell_1 = rnn.GRUCell(self.emb_dim, name = 'layer1')
        lstm_cell_1 = rnn.DropoutWrapper(lstm_cell_1, output_keep_prob=self.dropout_keep_prob)
        lstm_cell_2 = rnn.GRUCell(self.emb_dim, name = 'layer2')
        lstm_cell_2 = rnn.DropoutWrapper(lstm_cell_2, output_keep_prob=self.dropout_keep_prob)
        hiddens_1, hiddens_1_final = tf.nn.dynamic_rnn(lstm_cell_1, input_x, x_len, initial_state=self.init_state, dtype=tf.float32)
        hiddens_2, hiddens_2_final = tf.nn.dynamic_rnn(lstm_cell_2, hiddens_1, x_len, initial_state=hiddens_1_final, dtype=tf.float32)
        return hiddens_2_final
    
    def PredSimilar(self, input1, input2, x1_len, x2_len):
        embed_1 = self.embedding(input1, x1_len)
        embed_2 = self.embedding(input2, x2_len)
        sub = embed_1 - embed_2
        sub = sub*sub
        RegLayer = tf.layers.Dense(1, activation= tf.nn.sigmoid, trainable=True)
        pred = RegLayer(sub)
        return pred
    
    def TrainModel(self, word_pairs, targets, max_epochs, patience, validation_split, batch_size):
        if not isinstance(word_pairs, list) and not isinstance(word_pairs, np.ndarray):
            raise TypeError("parameters 'word_pairs' must be a list or numpy.ndarray")

        if not isinstance(targets, list) and not isinstance(targets, np.ndarray):
            raise TypeError("parameters 'targets' must be a list or numpy.ndarray")
    
        assert len(word_pairs) == len(targets)
        if isinstance(targets, list) and not isinstance(targets, np.ndarray):
            targets = np.array(targets)
    
        def word2emb_list(word):
            emb_list = []
            for t in range(len(word)):
                if word[t] in self.char_to_ix:
                    x = np.zeros(self.vocab_size).tolist()
                    x[self.char_to_ix[word[t]]] = 1
                    emb_list.append(x)
                else:
                    emb_list.append(np.zeros(self.vocab_size).tolist())
            return emb_list
        
        x_1, x_2 = [], []
        for pair_words in word_pairs:
            if not isinstance(pair_words[0], str) or not isinstance(pair_words[1], str):
                raise TypeError("word must be a string")
            first_word = pair_words[0].lower()
            second_word = pair_words[1].lower()
            emb_list_1 = word2emb_list(first_word)
            emb_list_2 = word2emb_list(second_word)
            x_1.append(np.array(emb_list_1))
            x_2.append(np.array(emb_list_2))
        x1_len = np.array([len(word) for word in x_1])
        x2_len = np.array([len(word) for word in x_2])
        max_word_len = max(max(x1_len), max(x2_len))
        x_1 = keras.preprocessing.sequence.pad_sequences(x_1, maxlen=max_word_len, dtype='int32', padding='pro', truncating='pre', value=0.0)
        x_2 = keras.preprocessing.sequence.pad_sequences(x_2, maxlen=max_word_len, dtype='int32', padding='pro', truncating='pre', value=0.0)
        
        #shuffle the data
        data_size = len(targets)
        idxs = random.sample(range(data_size), data_size)
        x_1 = x_1[idxs]
        x_2 = x_2[idxs]
        x1_len = x1_len[idxs]
        x2_len = x2_len[idxs]
        targets = targets[idxs]
        
        # train:validation:test = 5:1:2
        split_1 = int((5*data_size)/8)
        split_2 = int((6*data_size)/8)
        train_idxs = idxs[:split_1]
        val_idxs = idxs[split_1:split_2]
        test_idxs =  idxs[split_2:]
        max_iter = int(split_1 / batch_size) + 1
        
        #Tensor Graph
        batch_X1 = tf.placeholder(tf.float32, [None, max_word_len, self.vocab_size], name="X_1")
        batch_X2 = tf.placeholder(tf.float32, [None, max_word_len, self.vocab_size], name="X_2")
        X1_len = tf.placeholder(tf.int32, [None], name='x1_len')
        X2_len = tf.placeholder(tf.int32, [None], name='x2_len')
        batch_Y = tf.placeholder(tf.float32, [None])
        preds = self.PredSimilar(batch_X1, batch_X2, X1_len, X2_len)
        loss = tf.reduce_sum(tf.pow(batch_Y-preds, 2), axis=0)
        train_op = tf.train.AdamOptimizer(0.001).minimize(loss)

        saver = tf.train.Saver(tf.global_variables(), max_to_keep=4)
        sess = tf.Session()
        with sess.as_default():
            sess.run(tf.global_variables_initializer())
        
        min_val_loss = 0
        init_S = np.zeros([batch_size, self.emb_dim], dtype='float32')
        for i in range(max_epochs):
            for j in range(max_iter):
                batch_idxs = [train_idxs[j*batch_size + k] if (j*batch_size + k)<split_1 else train_idxs[(j*batch_size + k)%split_1] for k in range(batch_size)]
                feed_dict = {batch_X1:x_1[batch_idxs], batch_X2:x_2[batch_idxs], batch_Y:targets[batch_idxs], self.init_state:init_S}
                _, batch_loss = sess.run([train_op, loss], feed_dict)
                print(" Step: " + str(j) + " Training loss: " + batch_loss)
            
            val_loss = 0
            for j in range(split_1, split_2, batch_size):
                batch_idxs = list(range(j, max(j+batch_size, split_2), 1))
                feed_dict = {batch_X1:x_1[batch_idxs], batch_X2:x_2[batch_idxs], batch_Y:targets[batch_idxs], self.init_state:init_S[:len(batch_idxs)]}
                batch_loss = sess.run(loss, feed_dict)
                val_loss += batch_loss
            print("Epochs: " + str(i) + "Validation loss: " + batch_loss)
            
            if i == 1:
                min_val_loss = val_loss
            else:
                if min_val_loss > val_loss:
                    saver.save(sess, "char2vec_saved/model"+str(i))
                    print("char2vec_saved/model "+str(i)+" saved")
        # test loss
        for j in range(split_2, data_size, batch_size):
            batch_idxs = list(range(j, max(j+batch_size, data_size), 1))
            feed_dict = {batch_X1:x_1[batch_idxs], batch_X2:x_2[batch_idxs], batch_Y:targets[batch_idxs], self.init_state:init_S[:len(batch_idxs)]}
            batch_loss = sess.run(loss, feed_dict)
            val_loss += batch_loss

Using TensorFlow backend.


In [2]:
chars = ['f', 'k', 'n', '7', '’', '8', 'c', '9', 'b', ')', '(', 's', 'm', 'e', 'g', '4', ',', 'j', '”', '1', 'z', 't', '2', ' ', 'i', '–', 'o', 'l', '.', '!', 'd', 'u', 'a', '0', 'y', '-', 'x', 'w', '“', 'v', 'q', '&', ':', '6', 'r', 'h', 'p', '3', '5']
char_2_ix = {c:ix for (ix, c) in enumerate(chars)}

model = Chars2Vec(300, char_2_ix)

zs = np.zeros([3, 9, len(chars)], dtype='float32')
for i in range(len(zs)):
    for j in range(zs.shape[1]):
        zs[i][j][random.randint(0,9)]=1

X_S1 = tf.convert_to_tensor(zs, name='X_S1')
X_S2 = tf.convert_to_tensor(zs, name='X_S2')
x_len = tf.placeholder(tf.int32, [None], name= 'x_len')
# bs = tf.placeholder(tf.int32, name = 'batchsize')