In [22]:
from tensorflow import keras
import os, pickle, numpy
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
import pandas as pd



with open('train_qa.txt','rb') as file:
    train_data = pickle.load(file)
print('----------------------------------------------------------------------')
print(f'Train data len: {len(train_data)}')

with open('test_qa.txt','rb') as file:
    test_data = pickle.load(file)
print('----------------------------------------------------------------------')
print(f'Test data len: {len(test_data)}')
print('----------------------------------------------------------------------')

all_data = test_data + train_data


stories = []
questions = []

for story, question, answer in all_data:
    stories.append(len(story))
    questions.append(len(question))

max_story_len = max(stories)
max_question_len = max(questions)

print('\nMax story length is:', max_story_len,'words')
print('Max question length is:',max_question_len,'words')

vocabulary = set()

# add unique words to vocabulary
for story, question, answer in all_data:
    vocabulary = vocabulary.union(set(story))
    vocabulary = vocabulary.union(set(question))

vocabulary.add('no')
vocabulary.add('yes')

vocab_size = len(vocabulary)+1 # +1 because in keras paddind function it is required to have a placeholder

print('\nTotal number of unique words in questions and stories:',vocab_size-1)

from tensorflow.keras.preprocessing.sequence import pad_sequences

def vectorize_data(data,
                    word_index,
                    max_story_len,
                    max_question_len):

    stories = []
    questions = []

    answers = []

    for story, question, answer in data:
        stories_part = [word_index[word.lower()] for word in story]         # return index of each word according to their position in word index for stories
        questions_part = [word_index[word.lower()] for word in question]    # return index of each word according to their position in word index for questions

        answers_part = np.zeros(len(word_index)+1)                          # placeholder
        answers_part[word_index[answer]] = 1                                # in the index position of 'yes' or 'no' put 1 

        stories.append(stories_part)
        questions.append(questions_part)
        answers.append(answers_part)

    return (pad_sequences(stories,maxlen=max_story_len),pad_sequences(questions,maxlen=max_question_len),np.array(answers))     # return padded data


qa_tokenizer = pd.read_csv('model_checkpoints/tokenizer.csv',dtype={'word':str})
qa_tokenizer = dict(list(zip(qa_tokenizer['word'],qa_tokenizer['word_index'])))

model = keras.models.load_model('model_checkpoints/memory_network.h5')



def generate_question():
    import random

    x = random.randint(a=1,b=len(test_data))

    print('\nStory:',' '.join(test_data[x-1][0]))
    s = test_data[x-1][0]
    print('Question:',' '.join(test_data[x-1][1]))
    q = test_data[x-1][1]
    print('Answer:',test_data[x-1][2])
    a = test_data[x-1][2]

    return s, q, a, [(test_data[x-1])]


_,_,_,question = generate_question()

my_story,my_ques,my_ans = vectorize_data(question, qa_tokenizer,max_story_len,max_question_len)

pred_results = model.predict(([ my_story, my_ques]))

yes_prob = pred_results[0][qa_tokenizer['yes']]
no_prob = pred_results[0][qa_tokenizer['no']]

if yes_prob > no_prob:
    k = 'yes'
    prob = yes_prob
else:
    k = 'no'
    prob = no_prob

print("Predicted answer is: ", k)
print("Probability of certainty was: ", round(prob*100,2),"%")


----------------------------------------------------------------------
Train data len: 10000
----------------------------------------------------------------------
Test data len: 1000
----------------------------------------------------------------------

Max story length is: 156 words
Max question length is: 6 words

Total number of unique words in questions and stories: 37

Story: Mary moved to the bedroom . Daniel picked up the milk there . John went to the hallway . Mary travelled to the bathroom . Daniel put down the milk . Sandra travelled to the bathroom . John travelled to the office . Sandra went to the bedroom .
Question: Is Mary in the garden ?
Answer: no
Predicted answer is:  no
Probability of certainty was:  99.31 %
