In [1]:
import os
import sys

module_path = os.path.abspath("..")
if module_path not in sys.path:
    sys.path.append(module_path)

import tensorflow as tf
from tensorflow.keras import backend as K
from interactions import ExactInteractions

import numpy as np


## Exact layer 

Creates a interaction matrix S were each entry $i$ and $j$ are defined by:

$s_{ij}=\begin{cases}1 & q_i = d_i\\0 & otherwise\end{cases}$

In [2]:
   
K.clear_session()

input_query = tf.keras.layers.Input((10,), dtype="int32")
input_sentence = tf.keras.layers.Input((12,), dtype="int32")

exact_interaction = ExactInteractions()

_out = exact_interaction([input_query, input_sentence])

model = tf.keras.models.Model(inputs=[input_query, input_sentence], outputs=_out)
model.summary()
    

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 10)]         0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 12)]         0                                            
__________________________________________________________________________________________________
exact_interactions (ExactIntera (None, 10, 12)       0           input_1[0][0]                    
                                                                 input_2[0][0]                    
Total params: 0
Trainable params: 0
Non-trainable params: 0
__________________________________________________________________________________________________


In [3]:
query = np.random.randint(0,4,(1, 10))
document = np.random.randint(0,4,(1, 12))

In [4]:
y = model.predict([query, document])
y

array([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 1., 0., 1., 0., 0., 1., 1., 1.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 1., 0., 1., 0., 0., 1., 1., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 1., 0., 1., 0., 0., 1., 1., 1.],
        [1., 0., 0., 0., 1., 0., 1., 0., 0., 1., 1., 1.]]], dtype=float32)

In [5]:
# add padding
query[:,5:] = np.zeros(5,)
document[:,6:] = np.zeros(6,)
y = model.predict([query, document])
y

array([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]], dtype=float32)

# Semantic Layer

In [2]:
from interactions import SemanticInteractions

In [3]:
from nir.embeddings import FastText
from nir.tokenizers import Regex

cache_folder = "/backup/IR"
prefix_name = "disk4_5"

# load tokenizer
tk = Regex.load_from_json(cache_folder=cache_folder, prefix_name=prefix_name)

# load embedding matrix
ft = FastText.maybe_load(cache_folder = cache_folder,
                         prefix_name = prefix_name,
                         path = "/backup/pre-trained_embeddings/fasttext/wiki.en.bin",
                         tokenizer = tk)

emb_matrix = ft.embedding_matrix()

DEBUG created tokenizer disk4_5_RegexTokenizer
False False
[LOAD FROM CACHE] Load embedding matrix from /backup/IR/embedding_wiki_disk4_5_RegexTokenizer


In [4]:

K.clear_session()

input_query = tf.keras.layers.Input((8,), dtype="int32")
input_sentence = tf.keras.layers.Input((4,), dtype="int32")

semantic_interaction = SemanticInteractions(emb_matrix)

_out = semantic_interaction([input_query, input_sentence])

model = tf.keras.models.Model(inputs=[input_query, input_sentence], outputs=_out)
model.summary()
    

[EMBEDDING MATRIX SHAPE] (228107, 300)
Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 8)]          0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 4)]          0                                            
__________________________________________________________________________________________________
semantic_interactions (Semantic (None, 8, 4, 3)      600         input_1[0][0]                    
                                                                 input_2[0][0]                    
Total params: 600
Trainable params: 600
Non-trainable params: 0
__________________________________________________________________________________________

In [5]:
query = np.random.randint(0,100000,(1, 8))
document = np.random.randint(0,100000,(1, 4))

In [6]:
y = model.predict([query, document])
print(y[0,:,:,0])
print(y[0,:,:,1])
print(y[0,:,:,2])

[[0.16927525 0.09146953 0.23411486 0.07597737]
 [0.15624669 0.22478929 0.32109836 0.17741339]
 [0.1435098  0.16997162 0.12787491 0.21293262]
 [0.18161502 0.22154315 0.16198552 0.12080882]
 [0.1844356  0.13915682 0.35902923 0.30264208]
 [0.16886978 0.1289281  0.15510595 0.2033602 ]
 [0.3272297  0.09994481 0.34669426 0.2559813 ]
 [0.11897507 0.18751553 0.14743946 0.14826363]]
[[-0.04177718 -0.04177718 -0.04177718 -0.04177718]
 [ 0.01132592  0.01132592  0.01132592  0.01132592]
 [-0.02593002 -0.02593002 -0.02593002 -0.02593002]
 [ 0.05383598  0.05383598  0.05383598  0.05383598]
 [-0.0068503  -0.0068503  -0.0068503  -0.0068503 ]
 [-0.00918168 -0.00918168 -0.00918168 -0.00918168]
 [-0.09145492 -0.09145492 -0.09145492 -0.09145492]
 [-0.02940275 -0.02940275 -0.02940275 -0.02940275]]
[[-0.00983386 -0.00423064 -0.05770767 -0.06851834]
 [-0.00983386 -0.00423064 -0.05770767 -0.06851834]
 [-0.00983386 -0.00423064 -0.05770767 -0.06851834]
 [-0.00983386 -0.00423064 -0.05770767 -0.06851834]
 [-0.00983

In [8]:
# add padding
query[:,4:] = np.zeros(4,)
document[:,2:] = np.zeros(2,)
y = model.predict([query, document])
print(y[0,:,:,0])
print(y[0,:,:,1])
print(y[0,:,:,2])

[[ 0.16927525  0.09146953 -0.         -0.        ]
 [ 0.15624669  0.22478929 -0.         -0.        ]
 [ 0.1435098   0.16997162 -0.         -0.        ]
 [ 0.18161502  0.22154315 -0.         -0.        ]
 [-0.          0.          0.          0.        ]
 [-0.          0.          0.          0.        ]
 [-0.          0.          0.          0.        ]
 [-0.          0.          0.          0.        ]]
[[-0.04177718 -0.04177718 -0.         -0.        ]
 [ 0.01132592  0.01132592  0.          0.        ]
 [-0.02593002 -0.02593002 -0.         -0.        ]
 [ 0.05383598  0.05383598  0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.        ]]
[[-0.00983386 -0.00423064  0.          0.        ]
 [-0.00983386 -0.00423064  0.          0.        ]
 [-0.00983386 -0.00423064  0.          0.        ]
 [-0.00983386 -0.00423064  0.