In [1]:
import math
import numpy as np
import tensorflow as tf

from tensorflow.python.ops.rnn_cell import GRUCell
from tensorflow.python.ops.rnn_cell import LSTMCell
from tensorflow.python.ops.rnn_cell import MultiRNNCell
from tensorflow.python.ops.rnn_cell import DropoutWrapper, ResidualWrapper

from tensorflow.python.ops import array_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.layers.core import Dense
from tensorflow.python.util import nest

from tensorflow.contrib.seq2seq.python.ops import attention_wrapper
from tensorflow.contrib.seq2seq.python.ops import beam_search_decoder
from datetime import datetime

from preprocess import *
from loading_util import *
from ques_dictionary import ques_dict

  from ._conv import register_converters as _register_converters


In [2]:
#Resetter
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [3]:
#embedding parameters
embedding_size = 50
vocab_size = 400003

#data parameters
eMax_allowed_length = 64
#dMax_allowed_length = 15

#network parameters
hidden_units = 90
n_outputs = 5
depth = 1
n_epochs = 10
learning_rate = 0.001

#Saving Parameters
#Tensorboard Logs
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tmp/Classification/tf_logs"
log_dir = "{}/run-{}/".format(root_logdir,now)
#Tensorboard logs end

save_path = 'tmp/Classification/model/mymodel.ckpt'

In [4]:
#Fetching data
#default directory: 'data/data_10.csv'
X,Y= read_csv('data/ques20_classification.csv')
X_test,Y_test = read_csv('data/ques20_classification_test.csv')
X[2]

'Whenever something makes me angry, I take that moment very positively because when we are working in such an organization usually decisions taken in anger are mostly futile. So I think, rather of getting anger and spoiling yourself, one should be silent in during anger at workplace.'

In [5]:
#Fetching glove vectors
#default directory: "./glove.6B.50d.txt"
embedding_size = 50
wi,iw,wv = read_glove_vecs()
len(wi)
len(iw)
reset_graph()

In [6]:
#Adding extra tokens to glove dictionary
#IMPORTANT: Don't run more than once
go_index,eos_index,unk_index = add_extra_to_dict(wi,iw,wv,embedding_size)
emb = map_dict_to_list(iw,wv)

In [7]:
#preprocessing data
#Mapping each word in a sentence to its glove index
eInput,eLengths = fit_encoder_text(data= X[1:],word_to_index = wi,max_allowed_seq_length = eMax_allowed_length)
#dInput,dOutput,dLengths = fit_decoder_text(data= Y[1:],word_to_index = wi,max_allowed_seq_length = dMax_allowed_length)

eInput_test,eLengths_test = fit_encoder_text(data= X_test[1:],word_to_index = wi,max_allowed_seq_length = eMax_allowed_length)

eInput = np.array(eInput)
eLengths = np.array(eLengths)
#dInput = np.array(dInput)
#dOutput = np.array(dOutput)
#dLengths = np.array(dLengths)
print(eInput[2])
print(eLengths[1])

[ 55227 192972  43009 356156 257314  61721 388710 141942 185456 168565
 185456  52942 264549 209674 360914 160417  55410 264179  87774 357639
  58996 190735 386423 185456 123516  71584  55410 278019 188480  93017
 268045 389882 188480 354482 186644 185456 148426 357211 146626 240687
  58996 352024 155016 111502 151348  43009 389835 188480 386473 357809
 123516 108279 400001 400001 400001 400001 400001 400001 400001 400001
 400001 400001 400001 400001]
48


In [8]:


#encoder inputs: [batch_size, max_time_steps]
encoder_inputs = tf.placeholder(dtype = tf.int32, shape = (None,None), name = 'encoder_inputs')
#encoder_inputs_length: [batch_size]
encoder_inputs_length = tf.placeholder(dtype=tf.int32, shape=(None,) , name = 'encoder_inputs_length')

#encoder_outputs
question = tf.placeholder(tf.int32,[None])

#get dynamic batch_size
batch_size = tf.shape(encoder_inputs)[0]

In [9]:
#Testing
with tf.Session() as sess:
    masize = sess.run(batch_size,feed_dict={encoder_inputs: eInput})
    print(masize)

19


In [10]:
#Instantiating embeddings
embedding_variable = tf.Variable(tf.constant(0.0, shape = [vocab_size, embedding_size]),trainable = False, name = 'embedding')
embedding_placeholder = tf.placeholder(tf.float32, shape=[vocab_size,embedding_size], name = 'embedding_placeholder' )
encoder_embeddings = embedding_variable.assign(embedding_placeholder)

encoder_inputs_embedded=tf.nn.embedding_lookup(encoder_embeddings,encoder_inputs)

In [11]:
#Testing embedding lookup
with tf.Session() as sess:
    embed=sess.run(encoder_inputs_embedded, feed_dict={embedding_placeholder:emb ,encoder_inputs:eInput })
    print(embed.shape)
    print(embed[0][0])

(19, 64, 50)
[ 1.1891e-01  1.5255e-01 -8.2073e-02 -7.4144e-01  7.5917e-01 -4.8328e-01
 -3.1009e-01  5.1476e-01 -9.8708e-01  6.1757e-04 -1.5043e-01  8.3770e-01
 -1.0797e+00 -5.1460e-01  1.3188e+00  6.2007e-01  1.3779e-01  4.7108e-01
 -7.2874e-02 -7.2675e-01 -7.4116e-01  7.5263e-01  8.8180e-01  2.9561e-01
  1.3548e+00 -2.5701e+00 -1.3523e+00  4.5880e-01  1.0068e+00 -1.1856e+00
  3.4737e+00  7.7898e-01 -7.2929e-01  2.5102e-01 -2.6156e-01 -3.4684e-01
  5.5841e-01  7.5098e-01  4.9830e-01 -2.6823e-01 -2.7443e-03 -1.8298e-02
 -2.8096e-01  5.5318e-01  3.7706e-02  1.8555e-01 -1.5025e-01 -5.7512e-01
 -2.6671e-01  9.2121e-01]


In [12]:
basic_cell = tf.contrib.rnn.BasicRNNCell(num_units = hidden_units)
outputs,states = tf.nn.dynamic_rnn(basic_cell,encoder_inputs_embedded,dtype=tf.float32, sequence_length=encoder_inputs_length)

In [13]:
logits = tf.layers.dense(states,n_outputs)
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=question,logits=logits)
loss = tf.reduce_mean(xentropy)

In [14]:
#Testing Purposes
probs = tf.nn.softmax(logits,1)
test_ques = tf.argmax(probs,1,output_type=tf.int64)

In [15]:
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss) ;
correct = tf.nn.in_top_k(logits,question,1)
accuracy = tf.reduce_mean(tf.cast(correct,tf.float32))

In [16]:
acc_summary = tf.summary.scalar('Accuracy',accuracy)
file_writer = tf.summary.FileWriter(log_dir+'_train',tf.get_default_graph()) 
file_writer_test = tf.summary.FileWriter(log_dir+'_test',tf.get_default_graph()) 

In [17]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [18]:
with tf.Session() as sess:
    init.run()
    print('Ans:',X_test[2])
    for epoch in range(n_epochs):
        sess.run(training_op,feed_dict = {encoder_inputs: eInput,encoder_inputs_length: eLengths,question:Y[1:],embedding_placeholder:emb})
        acc_train = accuracy.eval(feed_dict = {encoder_inputs: eInput,encoder_inputs_length: eLengths,question:Y[1:],embedding_placeholder:emb})
        print(epoch,"Train accuracy:",acc_train)
        testing = test_ques.eval(feed_dict = {encoder_inputs: eInput_test,encoder_inputs_length: eLengths_test,embedding_placeholder:emb})
        print('Next Question:',ques_dict[testing[1]])
        
    saver.save(sess,save_path)

Ans: I am a hard working person, and I am ambitious about my goals. I would love to fulfill organization objectives so that I am recognised as someone significant to the company. I can work under pressure when things are not my side and I never give up.
0 Train accuracy: 0.31578946
Next Question: Why should I hire you?
1 Train accuracy: 0.5263158
Next Question: Why should I hire you?
2 Train accuracy: 0.47368422
Next Question: Why should I hire you?
3 Train accuracy: 0.5263158
Next Question: Why should I hire you?
4 Train accuracy: 0.5263158
Next Question: Tell me more about your goals.
5 Train accuracy: 0.5263158
Next Question: Tell me more about your goals.
6 Train accuracy: 0.6315789
Next Question: Tell me more about your goals.
7 Train accuracy: 0.7894737
Next Question: Tell me more about your goals.
8 Train accuracy: 0.84210527
Next Question: Tell me more about your goals.
9 Train accuracy: 0.84210527
Next Question: Tell me more about your goals.


In [19]:
with tf.Session() as sess:
    saver.restore(sess,save_path)
    testing = test_ques.eval(feed_dict = {encoder_inputs: eInput_test,encoder_inputs_length: eLengths_test,embedding_placeholder:emb})
    print(ques_dict[testing[1]])

INFO:tensorflow:Restoring parameters from tmp/Classification/model/mymodel.ckpt
Tell me more about your goals.


In [26]:
def network_pass(candidate_ans):
    
    candidate_ans = [candidate_ans]
    candidate_input,input_length = fit_encoder_text(data= candidate_ans,word_to_index = wi,max_allowed_seq_length = eMax_allowed_length)
    new_Q = test_ques.eval(feed_dict = {encoder_inputs: candidate_input,encoder_inputs_length: input_length,embedding_placeholder:emb})
    return ques_dict[new_Q[0]] 
    

In [27]:
def activate_bot():
    with tf.Session() as sess:
        saver.restore(sess,save_path)
        
        count = 1 
        Q = 'Tell me about yourself.'
        while(count!=5):
            candidate_ans = input(Q)
            Q = network_pass(candidate_ans)
            count+=1
        
activate_bot()

INFO:tensorflow:Restoring parameters from tmp/Classification/model/mymodel.ckpt
Tell me about yourself.I acquired my degree in finance at a reputable university. My courses in finance and business have given me a solid base upon which I plan to build my career. During my college internship at a bank, I dealt numerous administrative duties. The experience allowed me to learn important skills and to develop the confidence needed to succeed in a competitive environment.
Can you work under pressure?Yes sure
Tell me more about your goals.nah
Tell me more about your goals.why
