In [34]:
import tensorflow as tf
import numpy as np
from string import punctuation
from collections import Counter

In [35]:
with open('reviews.txt', 'r') as f:
    reviews=f.read()
    
with open('labels.txt', 'r') as f:
    labels=f.read()
    


In [36]:
reviews=''.join([c for c in reviews if c not in punctuation])
split_reviews=reviews.split('\n')
full_text=' '.join(split_reviews)
words=full_text.split()
word_count=Counter(words)
vocab=sorted(word_count, key=word_count.get, reverse=True)
vocab2int={w:i for i, w in enumerate(vocab, 1)}
review_ints=[]
for review in split_reviews:
    review_ints.append([vocab2int[word] for word in review.split()])



In [37]:
labels_=labels.split('\n')
labels_int=np.array([1 if label=='positive' else 0 for label in labels_])


In [38]:
review_lens=Counter([len(x) for x in review_ints])
print("Zero-length reviews: {}".format(review_lens[0]))
print("Maximum review length: {}".format(max(review_lens)))
#reviews of  length 200 is fed into network zero length review is padded with zeros and others are truncated to 200 
#characters

Zero-length reviews: 1
Maximum review length: 2514


In [39]:
seq_len=200
non_zero_idx=[i for i, r in enumerate(review_ints) if len(r)!=0]
review_ints=[review_ints[i] for i in non_zero_idx]
labels_int=[labels_int[i] for i in non_zero_idx]
features=np.zeros((len(review_ints), seq_len), dtype=int)

for idx, row in enumerate(review_ints):
    features[idx, -len(row):]=row[:seq_len]
    


In [40]:
split_fract=0.8
split_idx=int(len(features)*0.8)
train_x, val_x=features[:split_idx], features[split_idx:]
train_y, val_y=labels_int[:split_idx], labels_int[split_idx:]

test_idx=int(len(val_x)*0.5)
val_x, test_x=val_x[:test_idx], val_x[test_idx:]
val_y, test_y=val_y[:test_idx], val_y[test_idx:]

def get_batches(x, y, batch_size):
    n_batches=len(x)//batch_size
    x_, y_ = x[:n_batches*batch_size], y[:n_batches*batch_size]
    for i in range(0, len(x), batch_size):
        yield x_[i:i+batch_size], y_[i:i+batch_size]

In [41]:
n_units=128
lstm_layer=1
batch_size=250
embed_size=300
n_words=len(vocab2int)+1 
graph=tf.Graph()

In [42]:
with graph.as_default():
    inputs=tf.placeholder(tf.int32, [None, None])
    _labels=tf.placeholder(tf.int32, [None, None])
    keep_prob=tf.placeholder(tf.float32)
    
    embed_weight=tf.Variable(tf.random_normal((n_words, embed_size), -1, 1))
    embedding=tf.nn.embedding_lookup(embed_weight, inputs)
    
    def build_cell(n_units, keep_prob):
        lstm=tf.contrib.rnn.BasicLSTMCell(n_units)
        drop=tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
        return drop
     
    cells=tf.contrib.rnn.MultiRNNCell([build_cell(n_units, keep_prob) for _ in range(lstm_layer)])
    initial_state=cells.zero_state(batch_size, tf.float32)
    
    output, final_state=tf.nn.dynamic_rnn(cells, embedding, initial_state=initial_state)
    
    predictions=tf.contrib.layers.fully_connected(output[:, -1],1, activation_fn=tf.sigmoid)
    cost=tf.losses.mean_squared_error(_labels, predictions)
    optm=tf.train.AdamOptimizer(learning_rate=1e-3).minimize(cost)
    
    correct_pred=tf.equal(tf.cast(tf.round(predictions), tf.int32), _labels)
    accuracy=tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    saver=tf.train.Saver()

In [None]:
epochs=5

with tf.Session(graph=graph) as sess:
    
    sess.run(tf.global_variables_initializer())
    iteration=1
    for e in range(epochs):
        #batches=get_batches(train_x, train_y, batch_size)
        state=sess.run(initial_state)
        
        for ii, (x, y) in enumerate(get_batches(train_x, train_y, batch_size), 1):
        
            feed={inputs:x,
                 _labels:np.array(y)[:, None],
                 keep_prob:0.75,
                 initial_state:state}
            
            loss, state, _ = sess.run([cost, final_state, optm], feed_dict=feed)
            
            if iteration%5==0:
                print("Epoch: {}/{}".format(e, epochs),
                      "Iteration: {}".format(iteration),
                      "Train loss: {:.3f}".format(loss))
                
                
            
            if iteration%25==0:
                val_acc=[]
                valid_batches=get_batches(val_x, val_y, batch_size)
                valid_state=sess.run(initial_state)
                for x1, y1 in valid_batches:
                    feed={inputs:x1,
                         _labels:np.array(y1)[:, None],
                         keep_prob:0.6,
                         initial_state:valid_state}
                    batch_acc, valid_state = sess.run([accuracy, final_state], feed_dict=feed)
                    val_acc.append(batch_acc)
                print("Val acc: {:.3f}".format(np.mean(val_acc)))
            
            iteration+=1
    
    saver.save(sess, 'checkpoints/sentiments.ckpt')
    
    
    
    

In [32]:
with open('custom test dataset', 'r') as custom:
    file=custom.read()
    
def preprocess(file):
    file=''.join([c for c in file if c not in punctuation])
    split_reviews=file.split('\n')
    full_text=' '.join(split_reviews)
    words=full_text.split()
    review_ints=[]
    for review in split_reviews:
        review_ints.append([vocab2int[word] for word in review.split()])
    
    non_zero_idx=[i for i, r in enumerate(review_ints) if len(r)!=0]
    review_ints=[review_ints[i] for i in non_zero_idx]
    features=np.zeros((len(review_ints), seq_len), dtype=int)
    
    for idx, row in enumerate(review_ints):
        features[idx, -len(row):]=row[:seq_len]
    
    return features


user_test_reviews=preprocess(file)
   

(20000, 200)

In [33]:
with tf.Session(graph=graph) as sess:
    saver.restore(sess, tf.train.latest_checkpoint('checkpoints'))
    test_state=sess.run(cells.zero_state(batch_size, tf.float32))
    test_batches=get_batches(test_x, test_y, batch_size)
    test_acc=[]
    for x, y in test_batches:
        feed={inputs:x,
             _labels:np.array(y)[:, None],
             keep_prob:1,
             initial_state:test_state}
        batch_acc, test_state = sess.run([accuracy, final_state], feed_dict=feed)
        test_acc.append(batch_acc)
        
    print('test accuracy : {:.3f}'.format(np.mean(test_acc)))
    
    print("***************\n")
    print("Checking model on user reviews...\n")
    
    predict=[]
    #batch_size=4
    state_=sess.run(initial_state)
    values, state_=sess.run([predictions, final_state], feed_dict={inputs:user_test_reviews, initial_state:state_, keep_prob:1})
    
    for value in values:
        predict.append([tf.cast(tf.round(value), tf.int32)])
    
    print(predict)

INFO:tensorflow:Restoring parameters from checkpoints/sentiments.ckpt


InvalidArgumentError: Restoring from checkpoint failed. This is most likely due to a mismatch between the current graph and the graph from the checkpoint. Please ensure that you have not altered the graph expected based on the checkpoint. Original error:

Assign requires shapes of both tensors to match. lhs shape= [384,512] rhs shape= [428,512]
	 [[Node: save/Assign_16 = Assign[T=DT_FLOAT, _class=["loc:@rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel"], use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel/Adam_1, save/RestoreV2/_33)]]

Caused by op 'save/Assign_16', defined at:
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 497, in start
    self.io_loop.start()
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/asyncio/base_events.py", line 422, in run_forever
    self._run_once()
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/asyncio/base_events.py", line 1434, in _run_once
    handle._run()
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tornado/ioloop.py", line 758, in _run_callback
    ret = callback()
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 536, in <lambda>
    self.io_loop.add_callback(lambda : self._handle_events(self.socket, 0))
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2901, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2961, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-31-00d3d1f2e385>", line 25, in <module>
    saver=tf.train.Saver()
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1281, in __init__
    self.build()
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1293, in build
    self._build(self._filename, build_save=True, build_restore=True)
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1330, in _build
    build_save=build_save, build_restore=build_restore)
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 778, in _build_internal
    restore_sequentially, reshape)
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 419, in _AddRestoreOps
    assign_ops.append(saveable.restore(saveable_tensors, shapes))
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 112, in restore
    self.op.get_shape().is_fully_defined())
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/state_ops.py", line 216, in assign
    validate_shape=validate_shape)
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/gen_state_ops.py", line 60, in assign
    use_locking=use_locking, name=name)
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 454, in new_func
    return func(*args, **kwargs)
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3155, in create_op
    op_def=op_def)
  File "/home/prayag/miniconda3/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1717, in __init__
    self._traceback = tf_stack.extract_stack()

InvalidArgumentError (see above for traceback): Restoring from checkpoint failed. This is most likely due to a mismatch between the current graph and the graph from the checkpoint. Please ensure that you have not altered the graph expected based on the checkpoint. Original error:

Assign requires shapes of both tensors to match. lhs shape= [384,512] rhs shape= [428,512]
	 [[Node: save/Assign_16 = Assign[T=DT_FLOAT, _class=["loc:@rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel"], use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel/Adam_1, save/RestoreV2/_33)]]


array([[   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0, 

In [37]:
import tensorflow as tf
graph=tf.Graph()
with graph.as_default():
    weight=tf.Variable(tf.random_normal((4, 5), -1, 1))
    inputs=tf.placeholder(tf.int32, [None, None])


embed=tf.nn.embedding_lookup(weight, inputs)
val=np.array([[1,2,4,3],[0,2,3,1],[3,2,0,4]])
with tf.Session(graph=graph) as sess:
    sess.run(tf.global_variables_initializer())
    print(sess.run(weight))
    print("\n")
    print(sess.run(embed, feed_dict={inputs:val}))


[[-2.5841322  -1.8683769  -0.3828568  -0.8481432  -0.6080656 ]
 [-2.1439722   0.6790556  -2.8610876  -1.3981377  -1.4000592 ]
 [-1.1342047  -1.142002   -3.4780161   0.07325149 -1.1102241 ]
 [-0.58060104 -0.8401326  -1.4733518   0.26703286 -0.97050846]]


[[[-2.1439722   0.6790556  -2.8610876  -1.3981377  -1.4000592 ]
  [-1.1342047  -1.142002   -3.4780161   0.07325149 -1.1102241 ]
  [ 0.          0.          0.          0.          0.        ]
  [-0.58060104 -0.8401326  -1.4733518   0.26703286 -0.97050846]]

 [[-2.5841322  -1.8683769  -0.3828568  -0.8481432  -0.6080656 ]
  [-1.1342047  -1.142002   -3.4780161   0.07325149 -1.1102241 ]
  [-0.58060104 -0.8401326  -1.4733518   0.26703286 -0.97050846]
  [-2.1439722   0.6790556  -2.8610876  -1.3981377  -1.4000592 ]]

 [[-0.58060104 -0.8401326  -1.4733518   0.26703286 -0.97050846]
  [-1.1342047  -1.142002   -3.4780161   0.07325149 -1.1102241 ]
  [-2.5841322  -1.8683769  -0.3828568  -0.8481432  -0.6080656 ]
  [ 0.          0.          0.       

In [26]:
import numpy as np
a=np.array([[1,2,3],[4,5,6],[7,8,9]])


(2, 3)