# Demonstrate Seq2Seq Wrapper with Cornell Movie Dialog Corpus

In [1]:
import tensorflow as tf
import numpy as np

# preprocessed data
from datasets.cornell_corpus import data
import data_utils

In [2]:
import importlib
importlib.reload(data)

<module 'datasets.cornell_corpus.data' from 'C:\\Users\\ricsi\\Documents\\GitHub\\Python-projects\\seq2seq_chatbot_projects\\practical_seq2seq_cornell\\datasets\\cornell_corpus\\data.py'>

In [3]:
# load data from pickle and npy files
metadata, idx_q, idx_a = data.load_data(PATH='datasets/cornell_corpus/')
(trainX, trainY), (testX, testY), (validX, validY) = data_utils.split_dataset(idx_q, idx_a)

In [4]:
# parameters 
xseq_len = trainX.shape[-1]
yseq_len = trainY.shape[-1]
batch_size = 16
xvocab_size = len(metadata['idx2w'])  
yvocab_size = xvocab_size
emb_dim = 128
num_units=512
num_heads=16
epochs=100000
min_lr=0.000001
max_lr=0.001
lr_step=5000

In [5]:
import seq2seq_wrapper

In [6]:
importlib.reload(seq2seq_wrapper)
model = seq2seq_wrapper.Seq2Seq(xseq_len=xseq_len,
                               yseq_len=yseq_len,
                               xvocab_size=xvocab_size,
                               yvocab_size=yvocab_size,
                               ckpt_path='ckpt/cornell_corpus/',
                               emb_dim=emb_dim,
                               num_units=num_units,
                               num_heads=num_heads,
                               batch_size=batch_size,
                               epochs=epochs,
                               max_lr=max_lr,
                               min_lr=min_lr,
                               lr_step=lr_step,
                               num_layers=3,
                               )

<log> Building Graph </log>

In [7]:
val_batch_gen = data_utils.rand_batch_gen(validX, validY, batch_size)
test_batch_gen = data_utils.rand_batch_gen(testX, testY, batch_size)
train_batch_gen = data_utils.rand_batch_gen(trainX, trainY, batch_size)

In [None]:
sess = model.train(train_batch_gen, val_batch_gen)

In [8]:
sess = model.restore_last_session()

In [66]:
input_ = test_batch_gen.__next__()[0]
output = model.predict(sess, input_)
print(output.shape)

(128, 25, 8002)


In [12]:
replies = []
for ii, oi in zip(input_.T, output):
    q = data_utils.decode(sequence=ii, lookup=metadata['idx2w'], separator=' ')
    decoded = data_utils.decode(sequence=oi, lookup=metadata['idx2w'], separator=' ').split(' ')
    if decoded.count('unk') == 0:
        #if decoded not in replies:
            print('q : [{0}]; a : [{1}]'.format(q, ' '.join(decoded)))
            replies.append(decoded)

q : [frightening thought]; a : [i dont know]
q : [whys that]; a : [you know what you want]
q : [im sorry]; a : [i dont know]
q : [open that door damn you]; a : [i dont know]
q : [what do you mean]; a : [i dont know]
q : [andy before we begin id just like to say everyone in this room is your friend]; a : [what are you talking about]
q : [yeah i understand hes been up here the last six weeks]; a : [i dont know what you mean]
q : [they had me from the gate]; a : [yes sir]
q : [yes hes fine too you saw them at easter]; a : [yes sir]
q : [i need to i need to believe something this is the first time ive ever come up against anything i couldnt understand]; a : [i dont know]
q : [dont get mental man]; a : [i dont know]
q : [you havent said anything]; a : [i dont know]
q : [oh good]; a : [i dont know what you want]
q : [things change]; a : [you know what you want]
q : [yeah well miss breakfast]; a : [you know what you want]
q : [my father had farm land it was the only thing we owned of any valu

In [10]:
import tensorflow as tf
import numpy as np
import random

# preprocessed data
from datasets.cornell_corpus import data
import data_utils

# load data from pickle and npy files
metadata, idx_q, idx_a = data.load_data(PATH='datasets/cornell_corpus/')
(trainX, trainY), (testX, testY), (validX, validY) = data_utils.split_dataset(idx_q, idx_a)


input_ = test_batch_gen.__next__()[0]
output = model.predict(sess, input_)
output_max=np.argmax(output,axis=2)
print(output_max[0,:])

pred=np.zeros((batch_size,25),dtype=np.int32)
for inst in range(batch_size):
    for word in range(25):
        #output[0,word,output[0,word,:]<0]=0
        #output[0,word,:]+=np.abs(np.amin(output[0,word,:]))
        sum_all=np.sum(np.exp(output[inst,word,:]))
        output[inst,word,:]=np.exp(output[inst,word,:])/sum_all
    
        r=random.uniform(0,1)
        s=0
        broke=False
        for i in range(8002):
            s+=output[inst,word,i]
            if s>=r:
                pred[inst,word]=i
                broke =True
                break
        if not broke:
            pred[inst,word]=8001

output=pred
print(pred[0,:])

replies = []
for ii, oi_max,oi_pred in zip(input_.T, output_max,pred):
    q = data_utils.decode(sequence=ii, lookup=metadata['idx2w'], separator=' ')
    decoded_max = data_utils.decode(sequence=oi_max, lookup=metadata['idx2w'], separator=' ').split(' ')
    decoded_pred = data_utils.decode(sequence=oi_pred, lookup=metadata['idx2w'], separator=' ').split(' ')
    #if decoded.count('unk') == 0:
        #if decoded not in replies:
    print('q : [{0}]; max a : [{1}]'.format(q, ' '.join(decoded_max)))
    print('q : [{0}]; pred a : [{1}]'.format(q, ' '.join(decoded_pred)))
    replies.append(decoded_max)

[ 3 15 20  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
[  3 101  50  10   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0]
q : [joe zack is a good prospect exciting boy]; max a : [i dont know]
q : [joe zack is a good prospect exciting boy]; pred a : [i mean think of]
q : [ah no its fine]; max a : [i dont know]
q : [ah no its fine]; pred a : [and sing know well]
q : [you okay huh jimmy]; max a : [i dont know]
q : [you okay huh jimmy]; pred a : [i dont put so]
q : [who did you kill]; max a : [i dont know]
q : [who did you kill]; pred a : [she cant know everything]
q : [christ they even got my first grade report card here]; max a : [i dont know]
q : [christ they even got my first grade report card here]; pred a : [no guess want]
q : [mr carter the head butler sits there]; max a : [i dont know]
q : [mr carter the head butler sits there]; pred a : [why didnt know who]
q : [i hated to go to that place i almost went to the police statio