# Building the Question Answering System

In [1]:
from collections import defaultdict
import re
import numpy as np
import pandas as pd
import sklearn
from nltk.tokenize import word_tokenize
from keras.models import Model
from keras.layers import Input, Dense, GRU, Masking, Lambda, TimeDistributed
from keras.preprocessing.sequence import pad_sequences
import keras.backend as K
import tensorflow as tf

from attention_gru import SoftAttnGRU

Using TensorFlow backend.


# Preprocessing

#### Hyperparameter/TO DO Notes
- Word vector size (50,100,200,300)
- Still need to find a way feed in all of the sentences into input module

In [2]:
train_df=pd.read_json("C:/Users/Lukas Buteliauskas/Desktop/training_data.json").reset_index(drop=True)
dev_df=pd.read_json("C:/Users/Lukas Buteliauskas/Desktop/validation_data.json").reset_index(drop=True)

## Word Vectorization
To be able to use words, phrases, questions or other natural language constructs in our model we require a to provide our neural network a numerical representation of our words (as these are the elemental NLP 'particles'). The simplest implementation would be to use 'one hot encoding' and define each word as a vector the size of our dictionary (the number of unique words found in our collection of documents, our corpus). However, this approach will most likely be insufficient for the purposes of a question answering system. word2vec and GloVe are 2 popular choices sophisticated options for word embeddings that also capture word similarities. I will not go into the details of either architecture other than to say that we will not be re-training the word vectors due to the insufficient size of the dataset, and we will begin with the GloVe word embeddings due to it's superior performance in most 'downstream' modelling tasks. Having said that, given the simplicity of swapping word vector representations we will also test out performance with word2vec (providing we can do so in a time-efficient manner).

Info and download links for GloVe can be found at: https://nlp.stanford.edu/projects/glove/

### 'Word Vector' Custom Functions

In [3]:
def get_word_vector_dict(url_or_path):
    """Takes a URL or a local path and returns a dictionary of GloVe word vectors where the key is the word and the value is the 
    word vector with the dimension specified in the input file."""
    with open(url_or_path, encoding="utf8") as glove_text:
        word_embeddings=[line.split(" ") for line in glove_text.readlines()]
    word_vector_dict={element[0]:list(map(float, element[1:])) for element in word_embeddings}
    
    return word_vector_dict


def get_word_vector_df(url_path_or_dict):
    """Takes a URL or path like the previous function, or can take a word vector dictionary and returns a word vector dataframe.
    Rows of the dataframe are the word vectors, columns are the dimensions of the word vector, indices are the words."""
    
    if type(url_path_or_dict) is str:
        with open(url_path_or_dict, encoding="utf8") as glove_text:
            word_lines=glove_text.readlines()
        word_embeddings=[line.split(" ") for line in word_lines]
        word_vector_dict={element[0]:list(map(float, element[1:])) for element in word_embeddings}
        word_vector_df=pd.DataFrame(word_vector_dict).transpose()
    
    else:
        word_vector_df=pd.DataFrame(url_path_or_dict).transpose()
    
    return word_vector_df

### Setting up the Word Vectors
As mentioned above with regards to what model we use for the word vectors, it's important to note that the dimention of the word vectors is a hyperparameter of the Neural Networks to come, so to keep our options open we imported a few different word vectors representations and the custom functions defined above make this a 'one line of code' affair (dictionary or dataframe).


In [4]:
word_vector_50_dict=get_word_vector_dict("C:/Users/Lukas Buteliauskas/Desktop/glove.6B.50d.txt")
word_vector_50_df=get_word_vector_df(word_vector_50_dict)
vocab=np.array(word_vector_50_dict.keys()) #400k words as per the documentation.

word_vector_100_dict=get_word_vector_dict("C:/Users/Lukas Buteliauskas/Desktop/glove.6B.100d.txt")
word_vector_100_df=get_word_vector_df(word_vector_100_dict)

In [5]:
"""word_vector_200_dict=get_word_vector_dict("C:/Users/Lukas Buteliauskas/Desktop/glove.6B.200d.txt")
word_vector_200_df=get_word_vector_df(word_vector_200_dict)

word_vector_300_dict=get_word_vector_dict("C:/Users/Lukas Buteliauskas/Desktop/glove.6B.300d.txt")
word_vector_300_df=get_word_vector_df(word_vector_300_dict)"""

'word_vector_200_dict=get_word_vector_dict("C:/Users/Lukas Buteliauskas/Desktop/glove.6B.200d.txt")\nword_vector_200_df=get_word_vector_df(word_vector_200_dict)\n\nword_vector_300_dict=get_word_vector_dict("C:/Users/Lukas Buteliauskas/Desktop/glove.6B.300d.txt")\nword_vector_300_df=get_word_vector_df(word_vector_300_dict)'

## Pre-Processing

### Tokenization, Embedding Custom Functions

In [6]:
def tokenize(string):
    """Takes a string (either context, question or answer) and returns a list of tokens."""
    tokens=[token.replace("``", '"').replace("''", '"').lower() for token in word_tokenize(string)]
    
    split_tokens=[]
    for token in tokens:
        split_tokens.extend(re.split('(\W+)', token))
    return [token for token in split_tokens if token!=" " and token!=""]


def string_to_embedding(string, word_vector_dict=word_vector_50_dict):
    """Takes a context, question or answer and returns a list of word tokens in their vectorized form.
    Updated to accept tokenizations for faster training."""
    tokens=[]
    embedding=[]
    
    if isinstance(string, str) is True:
        tokens=np.array(tokenize(string))
    else:
        tokens=np.array(string)
    
    for token in tokens:
        if token in word_vector_dict.keys():
            embedding.extend(word_vector_dict[token])
        else:
            # We are vectorizing words with no embedding with the 'unk' token vectorization (already in GloVe)
            embedding.extend(word_vector_dict["unk"])
    
    return np.array(embedding)


def get_sent_end_idx(context_tokenizations):
    """ Get indices of tokens that are '.' (sentence end tokens). For one or many contexts."""
    return np.array([np.where(np.array(context)==".") for context in context_tokenizations])


def get_padded_contexts(dataframe_indices_or_contexts):
    """Take a list of indices or a list of contexts and return a list of padded context embeddings ready for the Input Module."""
    context_index_pairs=list(zip(train_df.context.values, train_df.index.values))
    padded_contexts_fresh=[]
    
    if isinstance(dataframe_indices_or_contexts, int): # if a single index is provided
        context=context_index_pairs[dataframe_indices_or_contexts][0]
        padding_index=int(np.where(unique_contexts==context)[0])
        padded_contexts_fresh.append(unique_padded_contexts[padding_index])
        
    elif isinstance(dataframe_indices_or_contexts[0], int): # if we are given a list of indices.
          for index in dataframe_indices_or_contexts:
                context=context_index_pairs[index][0]
                padding_index=int(np.where(unique_contexts==context)[0])
                padded_contexts_fresh.append(unique_padded_contexts[padding_index])
    
    elif isinstance(dataframe_indices_or_contexts[0], str): # if a list of contexts or a single context is provided
        if len(dataframe_indices_or_contexts[0])!=1: # if a list of contexts
            for context in dataframe_indices_or_contexts:
                padding_index=int(np.where(unique_contexts==context)[0])
                padded_contexts_fresh.append(unique_padded_contexts[padding_index])   
        else: # if a single context
            padding_index=int(np.where(unique_contexts==dataframe_indices_or_contexts)[0])
            padded_contexts_fresh.append(unique_padded_contexts[padding_index])
    
    else:
        print("NO EMBEDDING PROVIDED, INCORRECT PARAMETER DATA TYPE")
      
    return np.array(padded_contexts_fresh).reshape(len(padded_contexts_fresh),-1,50)

The nltk tokenizer generated around 110,00 unique tokens from our contexts, questions and answers in our dataset. ~31,000 of those tokens did not have pre-trained word vectorizations in the GloVe model. Some of these tokens were numbers, expressed as strings in an unfamiliar format, some of these tokens were misspelled words, some of these tokens were works in other languages, or symbols from other alphibets and so on. 
The ideal case would be to assign to each token in our corpus a 'meaningful' word vectorization, however given the time constraint on this project, we will try to provide meaningful word vectorizations to as many of these tokens as possible through pre-processing steps following the initial word tokenization (using the nltk tokenizer).

### Tokenizing, Embedding and Padding Contexts, Questions and Answers

In [7]:
"""In this section we seperate out the contexts, questions and answers, we embed finally embed all our words into 
vector representations and pad the sequences to fulfil Keras' input requirements."""

# Context Stuff (we embed and pad unique contexts and not all, due to memory limits)
contexts, questions, answers=(train_df.context.values, train_df.question.values, train_df.answer_text.values)
contexts_tokenized=np.array([tokenize(context) for context in contexts])
#embedded_contexts=[string_to_embedding(tokenized_context) for tokenized_context in contexts_tokenized]
unique_contexts=train_df.context.unique()
unique_contexts_tokenized=np.array([tokenize(context) for context in unique_contexts])
max_context_len=np.max([len(context) for context in unique_contexts_tokenized])
unique_embedded_contexts=[string_to_embedding(tokenized_context) for tokenized_context in unique_contexts_tokenized]  
unique_padded_contexts=pad_sequences(unique_embedded_contexts, max_context_len*50, padding="post",
                              dtype=np.dtype('f4')).reshape(len(unique_contexts),-1,50)


# Questions Stuff
questions_tokenized=np.array([tokenize(question) for question in questions])
max_question_len=np.max([len(question) for question in questions_tokenized])
embedded_questions=np.array([string_to_embedding(tokenized_question) for tokenized_question in questions_tokenized])
padded_questions=pad_sequences(embedded_questions, max_question_len*50, padding="post",
                               dtype="float32").reshape(len(questions),-1,50)


# Answers Stuff
answers_tokenized=np.array([tokenize(answer) for answer in answers])
embedded_answers=np.array([string_to_embedding(tokenized_answer) for tokenized_answer in answers_tokenized])
max_answer_len=np.max([len(answer) for answer in answers_tokenized])

# Other useful variables
sent_end_indeces=get_sent_end_idx(contexts_tokenized)
sentence_lengths=sentence_lengths=[len(indices[0]) for indices in sent_end_indeces]
max_num_sentences=np.max(sentence_lengths)
"""#padded_contexts_full=get_padded_contexts(train_df.index) Still can't fix the memory issue"""

"#padded_contexts_full=get_padded_contexts(train_df.index) Still can't fix the memory issue"

## Building the Dynamic Memory Network

In [8]:
                                                """ Hyperparameters"""
word_vector_size=50
hidden_units=10
num_memory_passes=3
num_of_samples=10

In [120]:
params = {'indices': sent_end_indeces[0:num_of_samples]} # WILL NEED TO CHANGE THIS LATER
setattr(K, 'params', params)
sent_end_indeces = K.params['indices']

def get_fact_representations(input_module_output, sent_end_indeces):
    """ We take in an N-Dim Array of shape (num samples, number of timesteps/words, size of fact embedding) and return the 
    facts of each sample (context) at the timesteps corresponding to 'ends of sentences' (at '.')."""
    """ Do not mistake the method parameter for the array of sent_end_indeces. The sent_end_indeces here is a subset of the
    original array."""
    assert input_module_output.shape[0]==sent_end_indeces.shape[0] #number of samples is equal to the given number of indices.
    complete_fact_representations=[]
    
    if input_module_output.shape[0]!=1: # for a list of samples
        for idx, timesteps in enumerate(input_module_output):
            sample_fact_representations=timesteps[sent_end_indeces[idx][0]] # get the timesteps at the end of sentences
            complete_fact_representations.append(sample_fact_representations)
    else: # for a single sample
         complete_fact_representations.append(input_module_output[0][sent_end_indeces[0]])
    
    return np.array(complete_fact_representations)


def get_facts(facts_output):
    """Input: facts_output, is a 3D tensor of all the timesteps/samples. Shape=(num samples, max # words, 50)
       Output: facts_tensor_list, a tensor list of the facts for each sample. Each 2D tensor has different shape. """
    facts_tensor_list=[]
    
    for sample_index in range(num_of_samples): #iterate over each sample
        facts=tf.nn.embedding_lookup(facts_output[sample_index], ids=sent_end_indeces[sample_index][0])
        facts_tensor_list.append(facts)
    
    return facts_tensor_list


def compute_attention(facts, question, memory):
    """Input: facts, 2D Tensor of the facts for each sample. question, 1D tensor of the question. memory, 1D tensor of memory.
       Output: attentions, 1D tensor of attention scores (scalars)"""
    def get_single_attention(fact):
        z=[tf.multiply(fact, question), tf.multiply(fact, memory), K.abs(fact-question), K.abs(fact-memory)]
        z_new=Lambda((lambda x: K.stack(x)))([K.concatenate(z, axis=0)])
        g_t_i = Dense(units=word_vector_size, activation='tanh')(K.concatenate(z, axis=0)) # LOOK HERE BRANKO. shape=(40,)
        #g_t_i = Dense(units=word_vector_size, activation='tanh')(z_new) has shape (1,40)
        g_t_i = Dense(units=1, activation="sigmoid")(g_t_i)
        return g_t_i
    
    attentions=tf.map_fn(fn=get_single_attention, elems=facts)
    return attentions

### Input Module

In [49]:
context_input=Input(shape=(max_context_len, word_vector_size))
context_mask=Masking(mask_value=0.0)(context_input)
facts_output=GRU(units=hidden_units, return_sequences=True)(context_mask)
facts_tensor_list=get_facts(facts_output)

input_model=Model(inputs=context_input, outputs=facts_output)
input_module_outputs=input_model.predict(get_padded_contexts([x for x in range(num_of_samples)]))
#facts_collection_numpy=get_fact_representations(input_module_outputs, sent_end_indeces[0:num_of_samples])

### Question Module

In [11]:
# question module input shape=[rows, timesteps, columns] or [num of samples, Max # of words in q, word vector size]

question_input=Input(shape=(max_question_len, word_vector_size))
question_mask=Masking(mask_value=0.0)(question_input)
question_output=GRU(units=hidden_units)(question_mask)

question_model=Model(inputs=question_input, outputs=question_output)
question_model_outputs=question_model.predict(padded_questions[0:num_of_samples].reshape(num_of_samples,-1,word_vector_size))

#EVERYTHING HERE IS FINE, USE QUESTION_OUTPUT

In [114]:
                                                    """ TESTING """
"""This is a simulation of the error happening in the 'compute_attention' method. The dense layer expects a 2D tensor. Is """
facts=facts_tensor_list[0]
print(K.concatenate([facts[0], facts[1], facts[2], facts[3]], axis=0).shape)
print(K.stack([K.concatenate([facts[0], facts[1], facts[2], facts[3]])], axis=0).shape)

(40,)
(1, 40)


In [121]:
                                        """ Semantic Memory Module """
for sample_index in range(num_of_samples): # Iterating over each sample index (we can't iterate over context-question)
    memory=question_output[sample_index]
    
    for memory_iteration in range(num_memory_passes):
        facts=facts_tensor_list[sample_index]
        question=question_output[sample_index]
        print("Sample iter %d Memory iter %d" %(sample_index, memory_iteration))
        
        """Error is in the line below"""
        attentions=compute_attention(facts, question , memory) # Get attention scores for each fact in a sample.
    
    
        # new_memory=GRU(units=hidden_units)(e^i)
        #memory=new_memory


Sample iter 0 Memory iter 0


ValueError: Input 0 is incompatible with layer dense_19: expected min_ndim=2, found ndim=1

In [None]:
"""
def get_unique_tokens(df):
    #Given a dataframe containing contexts, questions and answers, the function returns a list of unique tokens.
    pieces_of_text=list(df["context"].unique()) + list(df["title"].unique()) + list(df["question"].unique()) 
    pieces_of_text+=list(df["answer_text"].unique())

    non_unique_tokens=[]
    for text in pieces_of_text:
        temp_tokens=word_tokenize(text)
        non_unique_tokens.append(temp_tokens)

    unique_tokens=set()
    for token in non_unique_tokens:
        unique_tokens.update(set(token))
    
    return [token.replace("``", '"').replace("''", '"').lower() for token in list(unique_tokens)]

def split_keep_sep(tokens, sep, only_unique=True):
    Takes a string or a list of tokens, and splits on 'sep' while keeping sep. Returns a set of unique tokens, or a list
    of tokens after splitting.
    DO NOT PASS IN A STRING OR IT WILL RETURN A BUNCH OF CHARACTERS
    split_tokens=[]
    for token in tokens:
        if only_unique==True:
            for sub_token in re.split("("+ sep + ")", token):
                if sub_token not in split_tokens:
                    split_tokens.append(sub_token)
        else: # if we want just a list of all the tokens after seperation
            split_tokens.extend(re.split("("+ sep + ")", token))
                
    return [token for token in split_tokens if token!=""]"""

"""def tokenize_further(tokens, seperator_list=["-","–","—","'","£","/",":"]):
    Further splits the tokens on the seperators, while keeping the seperators as tokens.
    Returns a further tokenized list of tokens.
    split_tokens=split_keep_sep(tokens, seperator_list[0], only_unique=False)
    for sep in seperator_list[1:]:
        split_tokens=split_keep_sep(split_tokens, sep, only_unique=False)
    
    return [token for token in split_tokens if token!=""] # filters out "" (empty space tokens)"""

"""
unique_tokens=get_unique_tokens(train_df)
no_embeddings=[token for token in unique_tokens if token not in word_vector_50_dict.keys()]
split_unique_tokens=split_keep_sep(unique_tokens, "-")
split_unique_tokens2=split_keep_sep(split_unique_tokens, "–")
split_unique_tokens3=split_keep_sep(split_unique_tokens2, "—")
split_unique_tokens4=split_keep_sep(split_unique_tokens3, "'")
split_unique_tokens5=split_keep_sep(split_unique_tokens4, "£")
split_unique_tokens6=split_keep_sep(split_unique_tokens5, "/")
split_unique_tokens7=split_keep_sep(split_unique_tokens6, ":")

print("Num of initial tokens:", len(unique_tokens), "\nNum of tokens after '-' split:", len(split_unique_tokens))
print("Num of tokens after 2nd split", len(split_unique_tokens2),"\nNum of tokens after 3rd split", len(split_unique_tokens3))
print("Num of tokens after 4th split", len(split_unique_tokens4), "\nNum of tokens after 5th split", len(split_unique_tokens5))
print("Num of tokens after 6th split", len(split_unique_tokens6), "\nNum of tokens after 7th split", len(split_unique_tokens7),"\n")

no_embeddings_2=[token for token in split_unique_tokens if token not in word_vector_50_dict.keys()]
no_embeddings_3=[token for token in split_unique_tokens2 if token not in word_vector_50_dict.keys()]
no_embeddings_4=[token for token in split_unique_tokens3 if token not in word_vector_50_dict.keys()]
no_embeddings_5=[token for token in split_unique_tokens4 if token not in word_vector_50_dict.keys()]
no_embeddings_6=[token for token in split_unique_tokens5 if token not in word_vector_50_dict.keys()]
no_embeddings_7=[token for token in split_unique_tokens6 if token not in word_vector_50_dict.keys()]
no_embeddings_8=[token for token in split_unique_tokens7 if token not in word_vector_50_dict.keys()]

print("Number of tokens with no embedding in GloVe (with the current tokenization):")
print("with '-' with words:",len(no_embeddings),"\nwith '-' seperate:", len(no_embeddings_2),)
print("with 2 - with words:", len(no_embeddings_3),"\nwith all - with words:", len(no_embeddings_4))
print("with all - and ' with words:", len(no_embeddings_5) ,"\nwith all -, ', £ with words:", len(no_embeddings_6))
print("with all -, ', £, / with words:", len(no_embeddings_7),"\nwith all -, ', £, /, : with words:", len(no_embeddings_8),"\n")

print(no_embeddings_8[0:100])


def string_to_embedding(string, word_vector_dict=word_vector_50_dict, context=False):
    Takes a context, question or answer and returns a list of word tokens in their vectorized form.
    If context, return a list of sequences. Updated to accept tokenizations for faster training.
    tokens=[]
    embedding=[]
    if isinstance(string, str) is True:
        tokens=np.array(tokenize(string))
    else:
        tokens=np.array(string)
    
    if context is False:
        for token in tokens:
            if token in word_vector_dict.keys():
                embedding.extend(word_vector_dict[token])
            else:
                # We are vectorizing words with no embedding with the 'unk' token vectorization (already in GloVe)
                embedding.extend(word_vector_dict["unk"])
    else:
        #Seperate out the sentences (on the '.')
        dot_indeces=np.nonzero(tokens==".")[0]
        prev_dot=0
        sentences=[]
        for dot in dot_indeces:
            sentences.append(tokens[prev_dot:dot+1])
            prev_dot=dot+1
    
        #Embed the sentences     
        for sentence in sentences:
            sentence_embedding=[]
            
            for token in sentence:
                if token in word_vector_dict.keys():
                    sentence_embedding.extend(word_vector_dict[token])
                else:
                    sentence_embedding.extend(word_vector_dict["unk"])
        
            embedding.append(sentence_embedding)
            sentence_embedding=[]
    
    return np.array(embedding)
    
def get_triplet(df, row_idx, cols=["context", "question", "answer_text"]):
    #Takes a dataframe and row index, and returns a (context, question, answer) triplet as a tuple.
    triplet_list=train_df.loc[row_idx, cols].values
    return (triplet_list[0], triplet_list[1], triplet_list[2])
    
    
    
    
context_index_dict=defaultdict(list)
context_index_pairs=list(zip(train_df.context.values, train_df.index.values))
for context, index in context_index_pairs:
    context_index_dict[context].append(index)
    
    
    
    
def get_z(c_t,m,q):
     Takes a facts array (c_t) for a single sample, a memory (m^i-1) and the question (q) and computes a 'similarity array'.
    Implimentation as in https://arxiv.org/pdf/1603.01417.pdf 
    z_s=[]
    for c in c_t: #length of each z is 4*len(c,m,q)
        z=[]
        z.extend(np.multiply(c,q))
        z.extend(np.multiply(c,m))
        z.extend(np.absolute(c-q))
        z.extend(np.absolute(c-m))
        z_s.append(z)
    return np.array(z_s)
    
    
    
    

def get_fact_representations_new(input_module_output):
     We take in an N-Dim Array of shape (num samples, number of timesteps/words, size of fact embedding) and return the 
    facts of each sample (context) at the timesteps corresponding to 'ends of sentences' (at '.')."""
    """ Do not mistake the method parameter for the array of sent_end_indeces. The sent_end_indeces here is a subset of the
    original array.
    sent_end_indeces = K.params['indices']
    facts_tensor=input_module_output
    #assert input_module_output.shape[0]==sent_end_indeces.shape[0] #number of samples is equal to the given number of indices.
    
    if facts_tensor.shape[0]!=1: # for a list of samples
        for idx, timesteps in enumerate(facts_tensor):
            timesteps=timesteps[sent_end_indeces[idx][0]] # get the timesteps at the end of sentences
    else: # for a single sample
        print("Reached else")
        print("facts_tensor[0]", facts_tensor[0])
        print("facts_tensor[0] shape:", facts_tensor[0].shape)
        print("sent_end_indeces[0]:", sent_end_indeces[0])
        facts_tensor=facts_tensor[0][sent_end_indeces[0]]
    

    return facts_tensor


def get_facts(facts_output):
    Input: inputs is a 3D tensor of all the timesteps/samples. Shape=(num samples, max # words, 50)
       Output: 3D tensor of the facts for each sample (context). Each 2D tensor has different shape. 
    sent_end_indeces = K.params['indices']
    #assert input_module_output.shape[0]==sent_end_indeces.shape[0] #number of samples is equal to the given number of indices.
    sample_idx=0
    def extract_facts(sample_timesteps):
        #facts=sample_timesteps[sent_end_indeces[sample_idx][0]] #extract timesteps corresponding to <EOS> token, ".". 
        facts=sample_timesteps[sent_end_indeces[0][0]] #extract timesteps corresponding to <EOS> token, ".". 
        #print("Sample idx:", sample_idx)
        print("Inside")
        print(sample_timesteps.shape)
        #sample_idx+=1
        return sample_timesteps
    
    print("Sample idx:", sample_idx)
    facts_tensor=K.map_fn(fn=extract_facts, elems=facts_output) 
    
    return facts_tensor
    
    
    
def get_z_tensors(c_t,m,q):
     Input: facts array (c_t) for a SINGLE SAMPLE, Memory (m^i-1), Question (q) and computes a 'similarity tensors'.
        Output: z(c_t,m,q) evaluated for each fact, so z_s len is number of facts c_t for that context.
        Implimentation as in https://arxiv.org/pdf/1603.01417.pdf 
    
    def compute_z(c_t): # 'tensor function' to compute z array for each fact.
        return K.concatenate([tf.multiply(c_t,q), tf.multiply(c_t,m), K.abs(c_t-q), K.abs(c_t-m)], axis=0)
    
    z_s=K.map_fn(fn=compute_z, elems=c_t) #for each fact, compute z(c_t, m, q). This is basically the tensor 'for loop'.
    print("z_s of a single sample:",z_s)
    
    return z_s
    
    
    
def get_facts_(facts_output):
    MAYBE WORK ON THIS TO GET A 3D TENSOR OUTPUT
    sent_end_indeces = K.params['indices']
    sample_index=0
        
    def extract_facts(sample_timesteps): 
        facts=tf.nn.embedding_lookup(facts_output[sample_index], ids=indices)
        sample_idx+=1
        return facts
    
    print("Sample idx:", sample_idx)
    facts_tensor=K.map_fn(fn=extract_facts, elems=facts_output) 
       
    return facts_tensor


"""