In [1]:
"""
    This notebook was inspired by {'EFFICIENT VECTOR REPRESENTATION FOR DOCUMENTS THROUGH CORRUPTION' 2017. (Minmin Chen)}
    A model is trained to predict the missing word with the help of a document vector.
    The Document Vector is optimized by the model during training.
    The Document Vector is then used by a neural net to predict sentiment.
    Primary Differences from Minmin Chen 2017:
        BoW not used. 
        Context words are fed to LSTM instead of word averagings. 
        Word Vectors are taken from GLOVE/Spacy.
        Corruption is through dropout instead of removing words from BoW model.
    Results: Unable to accurately build doc-vector to be sufficient to inform sentiment.
    
    On a personal note: Brighid will come back to this once finished with other research. It was a fun model to build...
        If unesseccarily filled with LSTMS...
"""
import tensorflow as tf
import numpy as np
import en_core_web_md
import pickle
import random
import glob
nlp = en_core_web_md.load()
standard_elements = ['<UNK>','<PAD>','<SOS>','<EOS>']

In [2]:
# Vocab
try:
    vocab = pickle.load(open('imdb_vocab.pkl','rb'))
except:
    if 'y' in input('create new vocab & potentially overwrite? (y/n)').lower():
        # Step 1: Get Vocab (there will be some modifications to original, as the original is large)
        path_to_vocab = 'aclImdb/imdb.vocab'
        vocab_bytes = open(path_to_vocab, 'rb').readlines()
        vocab_string = [b.decode("utf-8", "ignore").replace('\r','').replace('\n','') for b in vocab_bytes]
        vocab_string[0:5]  # ['the', 'and', 'a', 'of', 'to']

        # Step 2: Check each word against spacy to determine if the word is value-added
        vocab = {e:i for i,e in enumerate(standard_elements)}
        for v in vocab_string:
            tokens = nlp(v)
            for t in tokens:
                if t.has_vector and not t.lemma_ in vocab:
                    vocab[t.lemma_] = len(vocab)

        pickle.dump(vocab,open('imdb_vocab.pkl','wb'))

In [3]:
# Create embedding vocab for model to use
#...Not necessary, as model could train an embedding model. However the dataset may be limited.
# Look up a vector for a given word
def get_vec(word):
    if word not in standard_elements:
        token = [t for t in nlp(word)][0]
        return token.vector
    else:
        return np.zeros([300],dtype=np.float32)

try:
    embedding_matrix = pickle.load(open('imdb_embedding_matrx.pkl','rb'))
except:
    if 'y' in input('create new embedding & potentially overwrite? (y/n)').lower():    
        embedding_matrix = np.zeros([len(vocab),300], dtype=np.float32)
        for k,v in vocab.items():
            vec = get_vec(k)
            embedding_matrix[v,:] = vec

        pickle.dump(embedding_matrix, open('imdb_embedding_matrx.pkl','wb'))


In [4]:
# Step 3: Data loader
# Requirements: Return batches of data, 
#  where each batch is from a separate review
#  where batches do not switch reviews until commanded
#  where a cursor keeps track of the batch position within each review
# Reasoning: doc vectors are trained per document, 
#  but optimizing a single model/doc vector on a single doc would be equivalent to a batchsize of 1

class data_loader:
    
    """
        filenames = list of filenames for each document
        sentiments = list of integer labels of sentiments (must match filenames)
        batch_size = number of documents per batch
        prior_words = number of words considered prior to the target word
        posterior_words = number of words considered after the target word
    """
    def __init__(self, filenames, sentiments, batch_size = 100, prior_words = 3, posterior_words = 3):
        
        # sizes
        self.batch_size = batch_size
        self.prior_words = prior_words
        self.posterior_words = posterior_words

        assert type(self.batch_size) == int and type(self.prior_words) == int and type(self.posterior_words) == int
        
        # total available documents
        self.filenames = filenames
        self.sentiments = sentiments
        
        # documents currently used for batch
        self.batch_files = []
        self.batch_sentiments = []
        self.batch_positions = []  # tuples (line, line_pos)
        self.batch_documents = []
        

    # Return list of sentences, converted to integer position
    def read_file(self, filename):
        txt = open(filename,'rb').read().decode("utf-8", "ignore").replace('\r','').replace('\n','')
        doc = nlp(txt)
        sents = []
        for s in doc.sents:
            sent = []
            for token in s:
                if token.lemma_ in vocab:
                    sent.append(vocab[token.lemma_])
                else:
                    sent.append(vocab['<UNK>'])
            sents.append(sent)
        return sents
    
    # Create new set of batches
    def create_batches(self):
        
        # Initialize/wipe batch files
        self.batch_files = []
        self.batch_sentiments = []
        self.batch_positions = []  # tuples (line, line_pos)
        self.batch_documents = []        
        
        # Select files at random
        for i in range(self.batch_size):
            f_idx = random.randint(0, len(self.filenames)-1)
            self.batch_files.append(self.filenames[f_idx])
            self.batch_sentiments.append(self.sentiments[f_idx])
            self.batch_positions.append((0,0))
            self.batch_documents.append(self.read_file(self.filenames[f_idx]))
            
            
    # Create batch and new position info
    def get_batch_for_doc(self, batch_document, batch_position):
        
        prior_words = [vocab['<PAD>'] for _ in range(self.prior_words)]
        posterior_words = [vocab['<PAD>'] for _ in range(self.posterior_words)]
        
        next_line = batch_position[0]  # Assumed sentence is off sufficient length. Else bump by 1
        next_column = batch_position[1] + 1  # The next word in sentence by default
        
        # Test that next position will not throw index error
        if not len(batch_document[next_line]) > next_column: 
            if not len(batch_document) > next_line + 1:
                # Reset position to start of document
                next_line = 0
                next_column = 0
            else:
                # Go to next line
                next_line += 1
                next_column = 0
        
        # Populate target word
        target_word = batch_document[next_line][next_column]
        
        # Populate prior words
        j = self.prior_words
        while next_column - j - 1 >= 0 and j > 0:
            
            # (Note: -1 because the target word should not be included in prior words)
            prior_word = batch_document[next_line][next_column - self.prior_words + j - 1]
            prior_words[j-1] = prior_word  # -1 to translate to zero index 
            j -= 1
            
        # Populate posterior words
        # Note: posterior words is reversed, so that the first word shows up last in the index
        j = 0
        while next_column + j + 1 < len(batch_document[next_line]) and j < self.posterior_words:
            
            # +1 to avoid using target word in posterior word
            posterior_word = batch_document[next_line][next_column + 1 + j]
            posterior_words[self.posterior_words - j - 1] = posterior_word  # -1 to convert to posterior_words to 0 index
            j+= 1
        
        return {'posterior_words':posterior_words, 'prior_words':prior_words, 'curr_pos':(next_line, next_column), 'target_word':target_word}
    
    # Get batch for training
    def aggregate_batch(self):
        
        # Define deliverables for each batch
        batch_prior_words = np.zeros([self.batch_size, self.prior_words])
        batch_posterior_words = np.zeros([self.batch_size, self.posterior_words])
        batch_sentiments = np.zeros([self.batch_size,])
        batch_target_words = np.zeros([self.batch_size,])
        
        for i in range(self.batch_size):
            
            # get the batch information for each document
            doc_part = self.get_batch_for_doc(self.batch_documents[i], self.batch_positions[i])
            self.batch_positions[i] = doc_part['curr_pos']  # Update the position for this doc
            
            # put doc batch info into aggregate
            batch_prior_words[i:] = np.array(doc_part['prior_words'])
            batch_posterior_words[i:] = np.array(doc_part['posterior_words'])
            batch_sentiments[i] = self.batch_sentiments[i]
            batch_target_words[i] = doc_part['target_word'] 

        return {'prior':batch_prior_words, 'posterior':batch_posterior_words, 'sentiment':batch_sentiments, 'target_word':batch_target_words}


    
    

In [5]:
# Step 3: Initialize Data Loaders for unsupervised, supervised, and test
neg_files = glob.glob('aclImdb\\train\\neg\\*.txt')
pos_files = glob.glob('aclImdb\\train\\pos\\*.txt')
unsup_files = glob.glob('aclImdb\\train\\unsup\\*.txt')

train_unsup_files = neg_files + pos_files + unsup_files
train_unsup_sentiments = [0 for _ in neg_files] + [1 for _ in pos_files] + [-1 for _ in unsup_files]
assert len(train_unsup_files) == len(train_unsup_sentiments)
assert len(train_unsup_files) == 75000

train_sup_files = neg_files + pos_files
train_sup_sentiments = [0 for _ in neg_files] + [1 for _ in pos_files]
assert len(train_sup_files) == 25000
assert len(train_sup_files) == len(train_sup_sentiments)

test_neg_files = glob.glob('aclImdb\\test\\neg\\*.txt')
test_pos_files = glob.glob('aclImdb\\test\\pos\\*.txt')
test_files = test_neg_files + test_pos_files
test_sentiments = [0 for _ in test_neg_files] + [1 for _ in test_pos_files]
assert len(test_files) == 25000
assert len(test_files) == len(test_sentiments)

prior_words = 5
posterior_words = 5

dl_train_unsup = data_loader(train_unsup_files, train_unsup_sentiments, prior_words = prior_words, posterior_words = posterior_words)
dl_train_sup = data_loader(train_sup_files, train_sup_sentiments, prior_words = prior_words, posterior_words = posterior_words)
dl_test = data_loader(test_files, test_sentiments, prior_words = prior_words, posterior_words = posterior_words)
# call methods below to move batches and to switch documents
# dl.create_batches()
# dl.aggregate_batch()

In [6]:
# Step 4: Build Model
# Provide embedding matrix as input (alternatively could train one, if dataset is sufficiently large)
def build_lstm_doc_vec_model(batch_size, vocab_size = len(vocab), embedding_size = 300, clip_norm = 1, learning_rate = .001, 
                             num_sentiments = 2, doc_vec_steps = 3, prior_steps = 3, posterior_steps = 3, dropout=.5):
    
    # sub-routine to generate lstm model
    def lstm_sub( emb_inp, dropout, n_hidden, n_layers = 1):
        cells = []
        for _ in range(n_layers):
            cell = tf.contrib.rnn.LSTMCell(n_hidden)
            cell = tf.contrib.rnn.DropoutWrapper(
                    cell, output_keep_prob=1.0 - dropout)
            cells.append(cell)
        rnn_cell = tf.contrib.rnn.MultiRNNCell(cells)


        outputs, state = tf.nn.dynamic_rnn(rnn_cell, 
                                            emb_inp, 
                                            time_major=False,
                                            dtype=tf.float32)
        c = tf.identity(state[0].c,'c')
        h = tf.identity(state[0].h,'h')

        return c, h

    
    with tf.variable_scope('inputs'):
        batch_var = tf.placeholder(shape=[], name='batch_size', dtype=tf.int32)
        prior_words = tf.placeholder(shape=[None, prior_steps], name='prior_words', dtype=tf.int32,)
        posterior_words = tf.placeholder(shape=[None, posterior_steps], name='posterior_words', dtype=tf.int32)
        target_word = tf.placeholder(shape=[None], name='target_word', dtype=tf.int32)  # Only used when training lstms & doc vec
        target_sentiment = tf.placeholder(shape=[None], name='target_sentiment', dtype=tf.int32)  # Only used when training sentiment engine
        embedding_matrix = tf.placeholder(shape=[vocab_size, embedding_size], name='embedding_matrix', dtype=tf.float32)
        
        feed_dict = {'batch_size':batch_var, 'prior_words':prior_words, 'posterior_words':posterior_words, 'target_word':target_word,
                    'target_sentiment':target_sentiment, 'embedding_matrix':embedding_matrix}
    
    with tf.variable_scope('doc_vector'):
        doc_vector = tf.get_variable('batch_doc_vectors',[batch_size, doc_vec_steps, embedding_size],dtype=tf.float32)
        doc_vector = tf.nn.dropout(doc_vector, keep_prob=1-dropout, name='batch_doc_vectors_w_dropout')
        dv_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
        
        with tf.variable_scope('lstm'):
            doc_c, doc_h = lstm_sub( emb_inp=doc_vector, dropout=.01, n_hidden=1000, n_layers = 1)
    
    with tf.variable_scope('prior_words'):
        prior_words_embedded = tf.nn.embedding_lookup(embedding_matrix, prior_words)
        
        with tf.variable_scope('lstm'):
            prior_c, prior_h = lstm_sub( emb_inp=prior_words_embedded, dropout=.01, n_hidden=1000, n_layers = 1)

    with tf.variable_scope('posterior_words'):
        posterior_words_embedded = tf.nn.embedding_lookup(embedding_matrix, posterior_words)
        
        with tf.variable_scope('lstm'):
            posterior_c, posterior_h = lstm_sub( emb_inp=posterior_words_embedded, dropout=.01, n_hidden=1000, n_layers = 1)
    
    with tf.variable_scope('combiner'):
        doc_weights = tf.get_variable('doc_weights_lstm',[1000,1000],dtype=tf.float32)
        prior_weights = tf.get_variable('prior_weights_lstm',[1000,1000],dtype=tf.float32)
        posterior_weights = tf.get_variable('posterior_weights_lstm',[1000,1000],dtype=tf.float32)
        
        combined = tf.matmul(doc_h,doc_weights) + tf.matmul(prior_h,prior_weights) + tf.matmul(posterior_h, posterior_weights)
        combined_drpout = tf.nn.dropout(combined, keep_prob = .95, name='combined_with_dropout')
        
        with tf.variable_scope('additional_modeling'):
            comb_weights = tf.get_variable('combine_weights',[1000,1000],dtype=tf.float32)
            comb_bias = tf.get_variable('combine_bias',[1000],dtype=tf.float32)
            pred_h = tf.nn.relu(tf.matmul(combined_drpout, comb_weights) + comb_bias)
        
        #combined = tf.concat([doc_h, 
        #                      prior_h, 
        #                      posterior_h], 
        #                     axis=1, name='combined')
        #combined_expanded = tf.reshape(combined,[-1,3,1000])
        
        #with tf.variable_scope('lstm'):
        #    pred_c, pred_h = lstm_sub( emb_inp=combined_expanded, dropout=.01, n_hidden=1000, n_layers = 1)
        #
        with tf.variable_scope('convert_size'):
            combiner_weight = tf.get_variable('combiner_weight',[1000, vocab_size],dtype=tf.float32)
            pred_h_converted = tf.matmul( tf.cast(pred_h, dtype=tf.float32), combiner_weight )
            
    with tf.variable_scope('doc-vec-loss'):
        dvm_loss = tf.cast(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target_word, logits=pred_h_converted),dtype=tf.float32)
        train_dvm_loss = tf.divide(tf.reduce_sum(dvm_loss) , tf.cast(batch_var, tf.float32),
                                                            name='train_loss')
    with tf.variable_scope('doc-vec-model-optimizer'):
        dvm_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
        dvm_gradients = tf.gradients(train_dvm_loss, dvm_params)
        dvm_clipped_gradients, _ = tf.clip_by_global_norm(dvm_gradients, clip_norm)    
        optimizer = tf.train.AdamOptimizer(learning_rate)
        dvm = optimizer.apply_gradients(zip(dvm_clipped_gradients, dvm_params))
        
    with tf.variable_scope('doc_vec_opt'):
        dv_gradients = tf.gradients(train_dvm_loss, dv_params)
        dv_clipped_gradients, _ = tf.clip_by_global_norm(dv_gradients, clip_norm)    
        optimizer2 = tf.train.AdamOptimizer(learning_rate)
        dv = optimizer2.apply_gradients(zip(dv_clipped_gradients, dv_params))
        
    doc_vector_model = {'loss':train_dvm_loss, 'dv_opt':dv, 'dvm_opt':dvm}
        
    with tf.variable_scope('sentiment_analysis'):
        
        with tf.variable_scope('simple-weight'):

            with tf.variable_scope('read-doc-vec', reuse = False):
                flat_doc_vector = tf.reshape(doc_vector, shape=[-1, doc_vec_steps * embedding_size])
                weights = tf.get_variable('weights',shape=[doc_vec_steps * embedding_size, num_sentiments])
                sentiment_predictions = tf.matmul(flat_doc_vector, weights)

            with tf.variable_scope('loss-opt'):
                sentiment_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target_sentiment, logits=sentiment_predictions)
                train_sentiment_loss = tf.divide(tf.reduce_sum(sentiment_loss) , tf.cast(batch_var, tf.float32),
                                                                name='sentiment_loss')
                sentiment_params = [weights]
                sentiment_gradients = tf.gradients(train_sentiment_loss, sentiment_params)
                sentiment_clipped_gradients = tf.clip_by_global_norm(sentiment_gradients, clip_norm)

                with tf.variable_scope('SGD', reuse = False):
                    optimizer3 = tf.train.GradientDescentOptimizer(learning_rate)
                    sentiment_update_step = optimizer3.minimize(train_sentiment_loss, var_list = sentiment_params)
                
        with tf.variable_scope('lstm-small'):
            
            pre_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
            
            sentiment_c, sentiment_h = lstm_sub( emb_inp=doc_vector, dropout=0.01, n_hidden=1000, n_layers = 1)
            lstm_conversion_weights = tf.get_variable('weights',shape=[1000,2],dtype=tf.float32)
            lstm_pred = tf.matmul(sentiment_h, lstm_conversion_weights)
            
            with tf.variable_scope('loss-opt'):
                
                sentiment_lstm_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target_sentiment, logits=lstm_pred)
                train_sentiment_lstm_loss = tf.divide(tf.reduce_sum(sentiment_lstm_loss) , tf.cast(batch_var, tf.float32),
                                                                name='sentiment_loss')
                
                post_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
                sentiment_lstm_params = [pp for pp in post_params if pp not in pre_params]
                
                with tf.variable_scope('SGD', reuse = False):
                    
                    optimizer4 = tf.train.GradientDescentOptimizer(learning_rate)
                    sentiment_lstm_update_step = optimizer3.minimize(train_sentiment_lstm_loss, var_list = sentiment_lstm_params)

        with tf.variable_scope('relu'):
            
            relu_nhidden = 1000#50
            relu_weights_in = tf.get_variable('weights_in',shape=[doc_vec_steps * embedding_size, relu_nhidden])
            b_in = tf.get_variable('bias_in',shape=[relu_nhidden])
            relu_weights_out = tf.get_variable('weights_out',shape=[relu_nhidden, num_sentiments])
            b_out = tf.get_variable('bias_out',shape=[num_sentiments])
            
            sentiment_relu_prediction = tf.matmul( tf.nn.relu( tf.matmul(flat_doc_vector, relu_weights_in) + b_in ) , relu_weights_out) + b_out
            
            with tf.variable_scope('loss'):
                sentiment_relu_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target_sentiment, logits=sentiment_relu_prediction)
                train_sentiment_relu_loss = tf.divide(tf.reduce_sum(sentiment_relu_loss) , tf.cast(batch_var, tf.float32),
                                                                    name='sentiment_loss')
                
                sentiment_relu_params = [relu_weights_in, b_in, relu_weights_out, b_out]
                
                with tf.variable_scope('SGD', reuse = False):
                    
                    optimizer5 = tf.train.GradientDescentOptimizer(learning_rate)
                    sentiment_relu_update_step = optimizer5.minimize(train_sentiment_relu_loss, var_list = sentiment_relu_params)

    
    sentiment_model = {'simple_loss': train_sentiment_loss, 'simple_opt':sentiment_update_step, 'simple_pred':sentiment_predictions,
                      'lstm_loss':train_sentiment_lstm_loss, 'lstm_opt':sentiment_lstm_update_step, 'lstm_pred':lstm_pred,
                      'relu_loss':train_sentiment_relu_loss, 'relu_opt':sentiment_relu_update_step, 'relu_pred':sentiment_relu_prediction}
    
    return {'sentiment_model':sentiment_model, 'doc_vector_model':doc_vector_model, 'feed_dict':feed_dict}
            
    

    

In [7]:
# Step 4 contd: Build Model (Moved to below cell as part of generating new saver)
with tf.variable_scope('doc-vecs_lstm-madness2'):
    docs_per_batch = batch_size = 100  # required to set document-vector size within model
    model = build_lstm_doc_vec_model(docs_per_batch, prior_steps = prior_words, posterior_steps = posterior_words)


In [8]:
# Step 5: Train & Test Model
# ... First train unsupervised part (dvm)
# ... Then train supervised sentiment part (simple, lstm, relu)
# ... Then test entire model to check performance
sess = tf.Session()

model_directory = 'doc_vec_lstm_model_t9/model'
try:
    saver = tf.train.Saver()#tf.train.import_meta_graph(model_directory+'.meta')
    saver.restore(sess, model_directory)#, tf.train.latest_checkpoint(model_directory.replace('/model','')))
except:
    if 'y' in input('run new model? (y/n)').lower():
        # Step 4 contd: Build Model
        #with tf.variable_scope('doc-vecs_lstm-madness2'):
        #    docs_per_batch = batch_size = 100  # required to set document-vector size within model
        #    model = build_lstm_doc_vec_model(docs_per_batch)
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(max_to_keep=4)
    else:
        assert 1 == 0

INFO:tensorflow:Restoring parameters from doc_vec_lstm_model_t9/model


In [10]:
# training info
num_unsup_epochs = 1000#100000
num_sup_epochs = 1000#10000
num_test_epochs = 100
runs_till_new_docs = 10#0
num_to_opt_doc_vec = 30


def get_feed_dict(batch):
    feed_dict = {model['feed_dict']['batch_size']:batch_size, 
                 model['feed_dict']['prior_words']:batch['prior'], 
                 model['feed_dict']['posterior_words']:batch['posterior'], 
                 model['feed_dict']['target_word']:batch['target_word'],
                model['feed_dict']['target_sentiment']:batch['sentiment'], 
                 model['feed_dict']['embedding_matrix']:embedding_matrix}
    return feed_dict

# Unsupervised Epochs
dl_train_unsup.create_batches()
for i in range(num_unsup_epochs):
    for j in range(runs_till_new_docs):
        b = dl_train_unsup.aggregate_batch()
        feed_dict = get_feed_dict(b)
        loss, _ = sess.run( [model['doc_vector_model']['loss'],
                   model['doc_vector_model']['dvm_opt']],
                feed_dict = feed_dict)
    if i%20 == 0:
        print('unsupervised {}'.format(loss))
        saver.save(sess, model_directory)
        dl_train_unsup.create_batches()

# Supervised Epochs
dl_train_sup = data_loader(train_sup_files, train_sup_sentiments)
dl_train_sup.create_batches()
for i in range(num_sup_epochs):
    #for j in range(runs_till_new_docs):
        
        # Optimize Doc Vector for batch
        for k in range(num_to_opt_doc_vec):
            
            b = dl_train_sup.aggregate_batch()
            feed_dict = get_feed_dict(b)
            loss, _ = sess.run( [model['doc_vector_model']['loss'],
                   model['doc_vector_model']['dv_opt']],
                feed_dict = feed_dict)
        #print('doc_vector {}'.format(loss))
        saver.save(sess, model_directory)
            
        # Optimize Sentiment models for doc_vector
        for k in range(runs_till_new_docs * 4):
            b = dl_train_sup.aggregate_batch()
            feed_dict = get_feed_dict(b)
            simple_loss, _ = sess.run( [model['sentiment_model']['simple_loss'],
                   model['sentiment_model']['simple_opt']],
                feed_dict = feed_dict)
            lstm_loss, _ = sess.run( [model['sentiment_model']['lstm_loss'],
                   model['sentiment_model']['lstm_opt']],
                feed_dict = feed_dict)
            relu_loss, _ = sess.run( [model['sentiment_model']['relu_loss'],
                   model['sentiment_model']['relu_opt']],
                feed_dict = feed_dict)
        print('simple {}\tlstm {}\trelu {}'.format(simple_loss, lstm_loss, relu_loss))
        saver.save(sess, model_directory)
        dl_train_sup.create_batches()
    

# Test
losses = {'simple':0, 'relu':0, 'lstm':0, 'counts':0}
dl_test.create_batches()
for i in range(num_test_epochs):

        # Optimize Doc Vector for batch
        for k in range(num_to_opt_doc_vec):
            
            b = dl_test.aggregate_batch()
            feed_dict = get_feed_dict(b)
            loss, _ = sess.run( [model['doc_vector_model']['loss'],
                   model['doc_vector_model']['dv_opt']],
                feed_dict = feed_dict)
        print('doc_vector {}'.format(loss))
        saver.save(sess, model_directory)
            
        # Calculate Loss and return predictions
        for k in range(runs_till_new_docs):
            b = dl_test.aggregate_batch()
            feed_dict = get_feed_dict(b)
            simple_loss = sess.run( [model['sentiment_model']['simple_loss']],
                feed_dict = feed_dict)[0]
            lstm_loss = sess.run( [model['sentiment_model']['simple_loss']],
                feed_dict = feed_dict)[0]
            relu_loss = sess.run( [model['sentiment_model']['relu_loss']],
                feed_dict = feed_dict)[0]
            losses['simple'] += simple_loss
            losses['relu'] += relu_loss
            losses['lstm'] += lstm_loss
            losses['counts'] += 1
        print('simple {}\tlstm {}\trelu {}'.format(simple_loss, lstm_loss, relu_loss))
        saver.save(sess, model_directory)
        dl_test.create_batches()


unsupervised 5.228206634521484
unsupervised 4.949830532073975
unsupervised 5.045355319976807
unsupervised 4.598198413848877
unsupervised 4.399217128753662
unsupervised 4.756768703460693
unsupervised 5.021799087524414
unsupervised 4.81366491317749
unsupervised 4.413426399230957
unsupervised 4.345880508422852
unsupervised 4.425302028656006
unsupervised 5.352427959442139
unsupervised 4.593008518218994
unsupervised 4.650392055511475
unsupervised 4.724662780761719
unsupervised 4.469225883483887
unsupervised 4.9506049156188965
unsupervised 4.677937984466553
unsupervised 4.311498641967773
unsupervised 4.880538463592529
unsupervised 4.6603617668151855
unsupervised 4.173721790313721
unsupervised 4.723748207092285
unsupervised 4.347041130065918
unsupervised 4.7895307540893555
unsupervised 5.589871883392334
unsupervised 4.916631698608398
unsupervised 4.511536121368408
unsupervised 4.441699981689453
unsupervised 4.7048163414001465
unsupervised 5.0121049880981445
unsupervised 4.698641300201416
unsu

ValueError: Cannot feed value of shape (100, 3) for Tensor 'doc-vecs_lstm-madness2/inputs/prior_words:0', which has shape '(?, 5)'

In [None]:
#4.6