In [1]:
import tensorflow as tf
import tflearn
import numpy as np
import gensim
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
from gensim import corpora
from pprint import pprint
import pickle
WORD_SIZE = 5
MAX_FILTER_SIZE = 6
START_WORD_VEC = np.asarray([1,0,0,0,0], dtype=np.float32).reshape([1,5])
END_WORD_VEC = np.asarray([0,0,0,0,1], dtype=np.float32).reshape([1,5])
END_WORD_ID = 4
START_WORD_ID = 0

embedding_matrix, id2word, word2id = pickle.load(open("embedding_matrix_2D",'rb'))

In [2]:
tmp_embedding_matrix = tf.constant(np.asarray([[1,0,0,0,0],
                                   [0,1,0,0,0],
                                   [0,0,1,0,0],
                                  [0,0,0,1,0],
                                  [0,0,0,0,1]], dtype=np.float32), name="tmp_embedding_matrix")

In [3]:
def id2onehot(word_id):
    vec = np.zeros([WORD_SIZE], dtype=np.float32)
    vec[word_id] = 1.0
    return vec
def onehot2id(vec):
    index = vec.argsort()[-1]
    return index

def embedding_look_up(word_id):
    word = WORD_DIC.get(word_id)
    return word_model[word]

In [4]:
class LSTM:
    def __init__(self, vocabulary_size=200, num_nodes=128, 
                 batch_size=20, num_unrollings=2, name=""):
        '''
        the LSTM process
        '''
        self.num_nodes = num_nodes
        self.reuse = False
        self.vocabulary_size = vocabulary_size
        self.num_nodes = num_nodes
        self.num_unrollings = num_unrollings
        self.batch_size = batch_size
        self.name = name
    
    def model(self, input_sentences, vocabulary_size, num_nodes,
             batch_size):
        with tf.variable_scope("{}_LSTM_var".format(self.name) ,reuse=self.reuse):
            with tf.variable_scope("input_gate"):
                self.ix = tf.get_variable("ix", [vocabulary_size, num_nodes], 
                                      tf.float32, tf.truncated_normal_initializer(stddev=0.1))
                self.im = tf.get_variable("im", [num_nodes, num_nodes], 
                                      tf.float32, tf.truncated_normal_initializer(stddev=0.1))
                self.ib = tf.get_variable("ib", [1, num_nodes], 
                                      tf.float32, tf.constant_initializer(0.1))
            with tf.variable_scope("forget_gate"):
                self.fx = tf.get_variable("fx", [vocabulary_size, num_nodes], tf.float32, 
                                     tf.truncated_normal_initializer(stddev=0.1))
                self.fm = tf.get_variable("fm", [num_nodes, num_nodes], 
                                      tf.float32, tf.truncated_normal_initializer(stddev=0.1))
                self.fb = tf.get_variable("fb", [1, num_nodes], 
                                     tf.float32, tf.constant_initializer(0.1))
            
            with tf.variable_scope("output_gate"):
                self.ox = tf.get_variable("ox", [vocabulary_size, num_nodes], tf.float32, 
                                     tf.truncated_normal_initializer(stddev=0.1))
                self.om = tf.get_variable("om", [num_nodes, num_nodes], 
                                      tf.float32, tf.truncated_normal_initializer(stddev=0.1))
                self.ob = tf.get_variable("ob", [1, num_nodes], 
                                     tf.float32, tf.constant_initializer(0.1))
            
            with tf.variable_scope("memory_cell"):
                self.cx = tf.get_variable("cx", [vocabulary_size, num_nodes], tf.float32, 
                                     tf.truncated_normal_initializer(stddev=0.1))
                self.cm = tf.get_variable("cm", [num_nodes, num_nodes], 
                                      tf.float32, tf.truncated_normal_initializer(stddev=0.1))
                self.cb = tf.get_variable("cb", [1, num_nodes], 
                                     tf.float32, tf.constant_initializer(0.1))
            
            self.saved_output = tf.get_variable("saved_output", [batch_size, num_nodes], 
                                                tf.float32, tf.constant_initializer(0), 
                                                trainable=False)
            self.saved_state = tf.get_variable("saved_state", [batch_size, num_nodes], 
                                                       tf.float32, tf.constant_initializer(0), 
                                                       trainable=False)
            '''self.saved_state_backward = tf.get_variable("saved_state_backward", [batch_size, num_nodes], 
                                                       tf.float32, tf.truncated_normal_initializer(stddev=0.1), 
                                                       trainable=False, name="backward_state")'''
  
        self.train_data = input_sentences
        #Since now input_sentence is a list of word vectors, 
        #we do not need following 3 lines anymore.
        '''for _ in range(self.num_unrollings + 1):
            self.train_data.append(
                tf.placeholder(tf.float32, shape=[self.batch_size, self.vocabulary_size]))'''
        self.train_inputs = self.train_data[:-1]
        self.train_labels = self.train_data[1:]
        
        self.outputs = list()
        self.output = self.saved_output
        self.state = self.saved_state
        
        self.state_list = list()
        self.state_list.append(self.saved_state)
        
        for i in self.train_inputs:
            self.output, self.state = self.lstm_cell(i, self.output, self.state)
            self.outputs.append(self.output)
            self.state_list.append(self.state)
            
        self.saved_state = self.saved_state.assign(self.state) 
        self.saved_output = self.saved_output.assign(self.output)
        
        self.variables = tf.get_collection(tf.GraphKeys.VARIABLES)
        
        self.reuse = True
        
        return self.outputs
    def lstm_cell(self, inputs, hidden_layer, state): 
        
        with tf.variable_scope("{}_LSTM_cell".format(self.name), reuse=self.reuse):
            input_gate = tf.sigmoid(tf.matmul(inputs, self.ix) + tf.matmul(hidden_layer, self.im) + self.ib,
                                    name="input_gate")
            forget_gate = tf.sigmoid(tf.matmul(inputs, self.fx) + tf.matmul(hidden_layer, self.fm) + self.fb,
                                    name="forget_gate")
            update_gate = tf.tanh(tf.matmul(inputs, self.cx) + tf.matmul(hidden_layer, self.cm) + self.cb,
                            name="update_gate")
            state = tf.add(tf.mul(forget_gate, state), tf.mul(input_gate, update_gate),
                           name="state")
            output_gate = tf.sigmoid(tf.matmul(inputs, self.ox) + tf.matmul(hidden_layer, self.om) + self.ob,
                                     name="output_gate") 
            output = tf.mul(output_gate, tf.tanh(state), name="output")
        
        #Note that, the "output" is equal to the hidden state
        return output, state   
    
    '''def lstm_process(self, inputs, hidden_layer, state):
        for i in train_inputs:
            self.output, self.state = self.lstm_cell(i, self.output, self.state)
            self.outputs.append(output)

        return outputs'''
    
    '''with tf.control_dependencies([self.saved_output.assign(output),
                                     self.saved_state.assign(state)]):
            
            logits = tf.matmul(tf.concat(0, outputs), self.w) + self.b
            loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(tf.concat(0, logits), 
                                                        tf.concat(0, train_labels), name="loss"))
            
            global_step = tf.Variable(0)
            learning_rate = tf.train.exponential_decay(
                10.0, global_step, 5000, 0.1, staircase=True)
            optimizer = tf.train.RMSPropOptimizer(learning_rate)
            gradient, v = zip(*optimizer.compute_gradients(loss))
            
            
            tf.add_to_collection("loss", loss)
            tf.add_to_collection("logits", logits)
            tf.add_to_collection("train_labels", train_labels)
            
        '''
        #It seems we do not need those lines above
        
    def __call__(self, input_sentences):
        return self.model(input_sentences, self.vocabulary_size, self.num_nodes,
                  self.batch_size)
        

In [5]:
L = LSTM(vocabulary_size=2, batch_size=1, num_unrollings=2, num_nodes=4, name="TEST4")
word_model = gensim.models.Word2Vec.load("models/word2vec_2D.model")

test_word1 = np.reshape(word_model["china"], (1,2))
test_word2 = np.reshape(word_model["uk"], (1, 2))
test_word3 = np.reshape(word_model["america"], (1, 2))

test_sentence = [test_word1, test_word2, test_word3]

2016-10-11 22:09:49,628 : INFO : loading Word2Vec object from models/word2vec_2D.model
2016-10-11 22:09:49,857 : INFO : setting ignored attribute syn0norm to None
2016-10-11 22:09:49,859 : INFO : setting ignored attribute cum_table to None


In [6]:
'''sess = tf.Session()
sess.run(tf.initialize_all_variables())
sess.run(L(test_sentence))

sess.run(L.state)

sess.run(L.saved_state)

sess.run(L.state_list)'''

'sess = tf.Session()\nsess.run(tf.initialize_all_variables())\nsess.run(L(test_sentence))\n\nsess.run(L.state)\n\nsess.run(L.saved_state)\n\nsess.run(L.state_list)'

In [7]:
class gen_LSTM():
    def __init__(self, bi_hidden, batch_size, vocabulary_size, num_nodes, max_len=32, name=""):
        self.bi_hidden = bi_hidden
        self.name = name
        self.reuse = False
        self.max_len = max_len
        self.batch_size = batch_size
        self.vocabulary_size = vocabulary_size
        self.num_nodes = num_nodes
        
    def model(self, bi_hidden, batch_size, vocabulary_size, num_nodes, max_len):
        with tf.variable_scope("{}_gen_LSTM".format(self.name),
                              reuse=self.reuse):
            with tf.variable_scope("input_gate"):
                self.ix = tf.get_variable("ix", [vocabulary_size, num_nodes], 
                                      tf.float32, tf.truncated_normal_initializer(stddev=0.1))
                self.im = tf.get_variable("im", [num_nodes, num_nodes], 
                                      tf.float32, tf.truncated_normal_initializer(stddev=0.1))
                self.ib = tf.get_variable("ib", [1, num_nodes], 
                                      tf.float32, tf.constant_initializer(0.1))
                self.ic = tf.get_variable("ic", [num_nodes*2, num_nodes],
                                         tf.float32, tf.truncated_normal_initializer(stddev=0.1))
            with tf.variable_scope("forget_gate"):
                self.fx = tf.get_variable("fx", [vocabulary_size, num_nodes], tf.float32, 
                                     tf.truncated_normal_initializer(stddev=0.1))
                self.fm = tf.get_variable("fm", [num_nodes, num_nodes], 
                                      tf.float32, tf.truncated_normal_initializer(stddev=0.1))
                self.fb = tf.get_variable("fb", [1, num_nodes], 
                                     tf.float32, tf.constant_initializer(0.1))
                self.fc = tf.get_variable("fc", [num_nodes*2, num_nodes],
                                         tf.float32, tf.truncated_normal_initializer(stddev=0.1))
            
            with tf.variable_scope("output_gate"):
                self.ox = tf.get_variable("ox", [vocabulary_size, num_nodes], tf.float32, 
                                     tf.truncated_normal_initializer(stddev=0.1))
                self.om = tf.get_variable("om", [num_nodes, num_nodes], 
                                      tf.float32, tf.truncated_normal_initializer(stddev=0.1))
                self.ob = tf.get_variable("ob", [1, num_nodes], 
                                     tf.float32, tf.constant_initializer(0.1))
                self.oc = tf.get_variable("oc", [num_nodes*2, num_nodes],
                                         tf.float32, tf.truncated_normal_initializer(stddev=0.1))
            
            with tf.variable_scope("memory_cell"):
                self.cx = tf.get_variable("cx", [vocabulary_size, num_nodes], tf.float32, 
                                     tf.truncated_normal_initializer(stddev=0.1))
                self.cm = tf.get_variable("cm", [num_nodes, num_nodes], 
                                      tf.float32, tf.truncated_normal_initializer(stddev=0.1))
                self.cb = tf.get_variable("cb", [1, num_nodes], 
                                     tf.float32, tf.constant_initializer(0.1))
                self.cc = tf.get_variable("cc", [num_nodes*2, num_nodes],
                                         tf.float32, tf.truncated_normal_initializer(stddev=0.1))
                
            with tf.variable_scope("output_layer"):
                self.w = tf.get_variable("w", [num_nodes, WORD_SIZE], tf.float32,
                                        tf.truncated_normal_initializer(stddev=0.1))
                self.b = tf.get_variable("b", [WORD_SIZE], tf.float32,
                                        tf.constant_initializer(0))
                
            self.saved_output = tf.get_variable("saved_output", [batch_size, num_nodes], 
                                                tf.float32, tf.constant_initializer(0), 
                                                trainable=False)
            self.saved_state = tf.get_variable("saved_state", [batch_size, num_nodes], 
                                                       tf.float32, tf.constant_initializer(0), 
                                                       trainable=False)
        
            self.c = self.attention(self.bi_hidden, self.saved_state)
        
        self.variables = tf.get_collection(tf.GraphKeys.VARIABLES)
        
        self.init_state = self.saved_state
        self.state = self.saved_state
        self.output = self.saved_output
        self.states = list()
        self.states.append(self.saved_state)
        self.outputs = list()
        self.predicted_sentence = list()
        self.logits = list()
        self.reuse = True
        
        
        #print(self.predicted_sentence)
        for i in range(10):
            print("i=>{}".format(i))
            
            if(i == 0):
                input_word = id2onehot(START_WORD_ID).reshape([batch_size, WORD_SIZE])
            else:
                #print("at step predicted word:{}".format(self.predicted_sentence[-1]))
                input_word = tf.nn.embedding_lookup(tmp_embedding_matrix, self.predicted_sentence[-1])
                input_word = tf.reshape(input_word, [batch_size, WORD_SIZE])
                print("at step predicted word:{}".format(input_word))
            self.output, self.state = self.gen_lstm_cell(input_word, self.output, self.state, self.c)
            self.outputs.append(self.output)
            #return self.output,self.state
            self.states.append(self.state)
            logits = tf.matmul(self.output, self.w) + self.b
            self.logits.append(logits)
            prediction = tf.argmax(tf.nn.softmax(logits), 1)
            self.predicted_sentence.append(prediction[0])
            #return tf.equal(prediction, END_WORD_ID)
            print("check point for tf.equal")
            #return tf.cast(tf.equal(prediction, END_WORD_ID)[0], tf.int32)
            if tf.cast(tf.equal(prediction, END_WORD_ID), tf.int32) == 1:
                print("detected the END_WORD and break loop")
                break
            if i == max_len - 1:
                print("check point for max_len - 1")
                self.predicted_sentence.append(END_WORD_ID)
            
        
        self.saved_state = self.saved_state.assign(self.state)
        self.saved_output = self.saved_output.assign(self.output)
        '''with tf.control_dependencies([self.saved_state.assign(self.state), 
                                      self.saved_output.assign(self.output)]):'''
        
        #self.logits = tf.matmul(tf.concat(0, self.outputs), self.w) + self.b
        self.reuse = True
        print("the last check point")
        return self.logits, self.predicted_sentence, self.states, self.saved_state
    
    def gen_lstm_cell(self, inputs, hidden_layer, state, c): 
        #single step LSTM with context vector
        with tf.variable_scope("{}_LSTM_cell".format(self.name), reuse=self.reuse):
            input_gate = tf.sigmoid(tf.matmul(inputs, self.ix) + tf.matmul(hidden_layer, self.im)\
                                    + tf.matmul(c, self.ic) + self.ib,
                                    name="input_gate")
            forget_gate = tf.sigmoid(tf.matmul(inputs, self.fx) + tf.matmul(hidden_layer, self.fm)\
                                    + tf.matmul(c, self.fc) + self.fb,
                                    name="forget_gate")
            update_gate = tf.tanh(tf.matmul(inputs, self.cx) + tf.matmul(hidden_layer, self.cm)\
                                  + tf.matmul(c, self.cc) + self.cb,
                            name="update_gate")
            state = tf.add(tf.mul(forget_gate, state), tf.mul(input_gate, update_gate),
                           name="state")
            output_gate = tf.sigmoid(tf.matmul(inputs, self.ox) + tf.matmul(hidden_layer, self.om)\
                                     + self.ob,
                                     name="output_gate") 
            output = tf.mul(output_gate, tf.tanh(state), name="output")
            
        #Note that, the "output" is equal to the hidden state
        return output, state
        
    def attention(self, h, s):
        with tf.variable_scope("attention_module", reuse=self.reuse):
            self.Ua = tf.get_variable("Ua", [int(h[0].get_shape()[1])/2, int(h[0].get_shape()[1])],
                                     tf.float32, tf.truncated_normal_initializer(stddev=0.1))
            self.Wa = tf.get_variable("Wa", [int(s.get_shape()[1]), int(s.get_shape()[1])],
                                     tf.float32, tf.truncated_normal_initializer(stddev=0.1))
            #the following bias does not showed in original paper
            self.Ba = tf.get_variable("Ba", [1, int(s.get_shape()[1])], 
                                      tf.float32,
                                      tf.constant_initializer(0.1))
            self.Va = tf.get_variable("Va", [1, int(s.get_shape()[1])], 
                                      tf.float32,
                                      tf.constant_initializer(0.1))
            attn_logits = list()
            for h_j in h:
                attn_mlp = tf.tanh(
                    tf.matmul(self.Wa, tf.transpose(s)) + tf.matmul(self.Ua, tf.transpose(h_j)))
                attn_logits.append(tf.reshape(
                        tf.matmul(self.Va, attn_mlp), shape=[1])[0])
                
            a = tf.reshape(tf.nn.softmax(attn_logits),
                           shape=[len(h)], name="attention_value")
            c = 0
            for j in range(int(a.get_shape()[0])):
                c += a[j] * h[j]
                
            return c
    
    def __call__(self):
        return self.model(self.bi_hidden, self.batch_size, self.vocabulary_size, self.num_nodes, self.max_len)

In [8]:
bi_h = list()
for i in range(10):
    bi_h.append(tf.Variable(tf.truncated_normal(stddev=0.5, shape=(1,10))))

In [9]:
gen_L = gen_LSTM(bi_h, vocabulary_size=5, batch_size=1, num_nodes=5, max_len=32, name="test_gen_lstm0")

In [10]:
a, b, c, d = gen_L()

i=>0
check point for tf.equal
i=>1
at step predicted word:Tensor("Reshape:0", shape=(1, 5), dtype=float32)
check point for tf.equal
i=>2
at step predicted word:Tensor("Reshape_1:0", shape=(1, 5), dtype=float32)
check point for tf.equal
i=>3
at step predicted word:Tensor("Reshape_2:0", shape=(1, 5), dtype=float32)
check point for tf.equal
i=>4
at step predicted word:Tensor("Reshape_3:0", shape=(1, 5), dtype=float32)
check point for tf.equal
i=>5
at step predicted word:Tensor("Reshape_4:0", shape=(1, 5), dtype=float32)
check point for tf.equal
i=>6
at step predicted word:Tensor("Reshape_5:0", shape=(1, 5), dtype=float32)
check point for tf.equal
i=>7
at step predicted word:Tensor("Reshape_6:0", shape=(1, 5), dtype=float32)
check point for tf.equal
i=>8
at step predicted word:Tensor("Reshape_7:0", shape=(1, 5), dtype=float32)
check point for tf.equal
i=>9
at step predicted word:Tensor("Reshape_8:0", shape=(1, 5), dtype=float32)
check point for tf.equal
the last check point


In [11]:
sess = tf.Session()
sess.run(tf.initialize_all_variables())
feed_dict = dict()

In [12]:
x,y = sess.run([c, d], feed_dict=feed_dict)

In [13]:
sess.run(gen_L.init_state)


array([[ 0.21270046,  0.06521814,  0.20149739,  0.05270585,  0.28610146]], dtype=float32)

In [14]:
x

[array([[ 0.,  0.,  0.,  0.,  0.]], dtype=float32),
 array([[ 0.0630172 , -0.00347754,  0.07997488, -0.03464898,  0.07956603]], dtype=float32),
 array([[ 0.13928902,  0.03132836,  0.1446476 ,  0.01087406,  0.17840704]], dtype=float32),
 array([[ 0.1767053 ,  0.04861825,  0.17514524,  0.0325804 ,  0.23023163]], dtype=float32),
 array([[ 0.19509576,  0.05713952,  0.18939164,  0.04299659,  0.25726599]], dtype=float32),
 array([[ 0.20415184,  0.06132318,  0.19600181,  0.0480244 ,  0.27135199]], dtype=float32),
 array([[ 0.20861542,  0.06337228,  0.1990501 ,  0.05046887,  0.27869168]], dtype=float32),
 array([[ 0.21081622,  0.06437417,  0.20044692,  0.05166727,  0.28251815]], dtype=float32),
 array([[ 0.21190146,  0.06486335,  0.20108247,  0.05226019,  0.28451449]], dtype=float32),
 array([[ 0.21243659,  0.06510192,  0.20136926,  0.0525564 ,  0.28555682]], dtype=float32),
 array([[ 0.21270046,  0.06521814,  0.20149739,  0.05270585,  0.28610146]], dtype=float32)]

In [None]:
class attention:
    def __init__(self, h, s, name=""):
        '''
        Caculate the attention weight.
        TODO:
            I think it seems appropriate to transform the class 
            into a function...
        arguments:
            h: all the encoder's hidden states, 
            each of them has a size of [2*Hidden_size, Batch_size]
            s: [2*Hidden_size, Batch_size], decoder's hidden state
        '''
        self.name = name
        self.attn_logits = list()
        self.a = list()
        self.reuse = False
        
        #some variables
        with tf.variable_scope("{}_attention".format(self.name),
                              reuse=self.reuse):
            self.Ua = tf.get_variable("Ua", [int(h[0].get_shape()[1])/2, int(h[0].get_shape()[1])],
                                     tf.float32, tf.truncated_normal_initializer(stddev=0.1))
            self.Wa = tf.get_variable("Wa", [int(s.get_shape()[1]), int(s.get_shape()[1])],
                                     tf.float32, tf.truncated_normal_initializer(stddev=0.1))
            #the following bias does not showed in original paper
            self.Ba = tf.get_variable("Ba", [1, int(s.get_shape()[1])], 
                                      tf.float32,
                                      tf.constant_initializer(0.1))
            self.Va = tf.get_variable("Va", [1, int(s.get_shape()[1])], 
                                      tf.float32,
                                      tf.constant_initializer(0.1))
            
            #caculate the attention weight
            for h_j in h:
                self.attn_mlp = tf.tanh(
                    tf.matmul(self.Wa, tf.transpose(s)) + tf.matmul(self.Ua, tf.transpose(h_j)))
                self.attn_logits.append(tf.reshape(
                        tf.matmul(self.Va, self.attn_mlp), shape=[1])[0])
                
            self.a = tf.reshape(tf.nn.softmax(self.attn_logits),
                                shape=[len(h)], name="attention_value")
            self.c = 0
            for j in range(self.a.get_shape()[0]):
                self.c += self.a[j] * h[j]
        self.reuse = True
    def get_attention():
        return self.a

In [None]:
class Discriminator():
    def __init__(self, s, conv_sizes, num_filters, dropout_keep_prob,
                 reuse=False, name=""):
        '''
        The Discriminator, uses Convolutional Neural Network to 
        classify the input sentence. See 
        http://www.people.fas.harvard.edu/~yoonkim/data/emnlp_2014.pdf
        for detail. Those code below with some inspiration from
        [https://github.com/dennybritz/cnn-text-classification-tf/blob/master/text_cnn.py]
        
        Arguments:
        s: the sentence representation caculated by LSTM process, 
        which has a size of [batch_size, hidden_layer_size]
        '''
        self.sequence_length = int(s.get_shape()[1])
        self.reuse = reuse
        self.name = name
        self.dropout_keep_prob = dropout_keep_prob
        self.num_filters = num_filters
        self.conv_sizes = conv_sizes
        self.hidden_size = int(s.get_shape()[2])
        #print(self.hidden_size)
        self.pooled_outputs = list()
        for i, conv_size in enumerate(conv_sizes):
            with tf.variable_scope("D_conv_maxpooling_{}"
                                  .format(conv_size), reuse=self.reuse):
                #filter_shape = [conv_size, self.hidden_size, 1, num_filters]
                W = tf.get_variable("W", [conv_size, self.hidden_size, 1, num_filters],
                                   tf.float32,
                                   tf.truncated_normal_initializer(stddev=0.1))
                print("the size of {} => {}"
                      .format(W.name, W.get_shape()))
                b = tf.get_variable("b", [num_filters], tf.float32,
                                   tf.constant_initializer(0.1))
                
                conv = tf.nn.conv2d(s, W, [1,1,1,1], 'VALID', name="conv")
                print("the size of {} => {}"
                      .format(conv.name, conv.get_shape()))
                h = tf.nn.relu(conv + b)
                
                pooled = tf.nn.max_pool(h,
                                       ksize=[1, self.sequence_length-conv_size+1, 1, 1],
                                       strides=[1, 1, 1, 1],
                                       padding='VALID',
                                       name="max_pooling")
                print("the size of pooling layer {}"
                      .format(pooled.get_shape()))
            self.pooled_outputs.append(pooled)
            
        self.total_filter_num = num_filters * len(conv_sizes)
        self.h_pool = tf.concat(3, self.pooled_outputs,
                                name="{}_concat_pooling_layer".format(self.name))
        self.h_pool_flat = tf.reshape(self.h_pool, [-1, self.total_filter_num])
        print("h_pool size => {}".format(self.h_pool_flat.get_shape()))
     
        #after convolutional layer, we can define the fully connectedmlayer
        
        with tf.variable_scope("D_fully_connected_layer", reuse=self.reuse):
            w_shape = [self.h_pool_flat.get_shape()[1], 1]
            W = tf.get_variable("W", w_shape, tf.float32,
                               tf.truncated_normal_initializer(stddev=0.1))
            b = tf.get_variable("b", [1], tf.float32,
                               tf.constant_initializer(0.1))
            logits = tf.nn.relu(tf.matmul(self.h_pool_flat, W) + b)
            print("logits size => {}".format(logits.get_shape()))
            
        self.reuse = True

In [None]:
class Generator:
    def __init__(self, input_sentence, max_len=32, vocabulary_size=200, 
                 num_nodes=128, batch_size=16, name=""):
        self.input_sentence = input_sentence
        self.reverse_sentence = self.reverse_sentence(self.input_sentence)
        self.forward_LSTM_outputs = LSTM(self.input_sentence, 
                                        name="forward")
        self.backward_LSTM_outputs = LSTM(self.reverse_sentence,
                                         name="backward")     
        #concat the two outputs 
        
        #using attention
        self.gen_LSTM = gen_LSTM()
        self.attention = attention()
        
    def reverse_sentence(self, input_sentence):
        return input_sentence.reverse()
    
    def pad_sentence(self, input_sentence):
        if len(input_sentence) >= MAX_FILTER_SIZE:
            return input_sentence
        else:
            for i in range(MAX_FILTER_SIZE - len(input_sentence)):
                input_sentence.append(np.zeros(WORD_SIZE))
            return input_sentence

In [None]:
g = Generator([1.,1.])

In [None]:
'''class LSTMGAN:
    def __init__(self):
        self.lstm = LSTM()
        self.d = Discriminator()'''

In [None]:
h = tf.Variable(tf.truncated_normal((1,128)))
state = tf.Variable(tf.truncated_normal((1,128)))
test_inputs = tf.placeholder(tf.float32, shape=[1, 200])

In [None]:
h_j = list()
for _ in range(10):
    h_j.append(tf.Variable(tf.truncated_normal(shape=(1, 10), stddev=0.3)))
s_j = tf.Variable(tf.truncated_normal(shape=(1, 5), stddev=0.3))

In [None]:
s_d = tf.Variable(tf.truncated_normal(shape=(10, 200)))
s_d = tf.reshape(s_d, [1, 10, 200, 1])

In [None]:
D = Discriminator(s_d, [3], 64, 0.5, False,"ddd1d")

In [None]:
#L = LSTM(batch_size=1, num_unrollings=2, name="TEST")
A = attention(h_j, s_j, "143")
word_model = gensim.models.Word2Vec.load("models/word2vec.model")

test_word1 = word_model["china"]
test_word2 = word_model["uk"]
test_word3 = word_model["america"]

test_sentence = [test_word1, test_word2, test_word3]

with tf.Session() as sess:
    sess.run(tf.initialize_all_variables())
    feed_dict = dict()
    #for i in range(3):
        #feed_dict[L.train_data[i]] = np.reshape(test_sentence[i],(1,200))
    outc, outa = sess.run([A.c, A.a], feed_dict=feed_dict)

In [None]:
x = tf.Variable([1,2])
y = [2,2]
z = tf.equal(x, y)
c = tf.argmax(x,0)
with tf.Session() as sess:
    sess.run(tf.initialize_all_variables())
    a = sess.run(c)
    print(a)

In [None]:
x.get_shape()