In [1]:
import json
import time
import traceback
import numpy as np
from tqdm import tqdm
from utils import tensor_utils as tu
from utils import natural_language_utilities as nlutils

#Torch related functionalities
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
# Loading the dataset
dataset = json.load(open('resources/dataset_with_paths.json'))

# Model

# Data Preprocessing

### 1. Convert URIs to their surface forms

In [3]:
# Snippet for finding the surface forms of the hops in the dataset and adding it to the dataset.
for index,d in tqdm(enumerate(dataset)):
    hop1_sf = []
    hop2_sf = []
    
    for hop1 in d['hop1']:
        # ['+', 'http://dbpedia.org/ontology/office']
        sf1 = nlutils.get_label_via_parsing(hop1[1],lower=True)
        hop1_sf.append([hop1[0],sf1])
    
    for hop2 in d['hop2']:
        # ['+', 'http://dbpedia.org/ontology/office', '-', 'http://dbpedia.org/property/isCitedBy']
        sf1 = nlutils.get_label_via_parsing(hop2[1],lower=True)
        sf2 = nlutils.get_label_via_parsing(hop2[3],lower=True)
        hop2_sf.append([hop2[0],sf1,hop2[2],sf2])
    
    dataset[index]['hop1_sf'] = hop1_sf
    dataset[index]['hop2_sf'] = hop2_sf
    
    # Convert Positive paths as well
    dataset[index]['path_sf'] = [d['path'][0]]
    dataset[index]['path_sf'].append(nlutils.get_label_via_parsing(d['path'][1],lower=True))
    if len(d['path']) > 2:
        dataset[index]['path_sf'].append(d['path'][2])
        dataset[index]['path_sf'].append(nlutils.get_label_via_parsing(d['path'][3],lower=True))

2000it [00:38, 51.34it/s]


### 2. Create a vocabulary 

In [4]:
vocab = {'_pad_':0, '_unk_':1, '+':2, '-':3, '/':4, 'uri':5, 'x':6}

In [5]:
for d in tqdm(dataset):
    
    # Get words from the question
    for token in d['node']['corrected_question'].split():
        vocab.setdefault(token.lower(), len(vocab))
        
    # Get words from the positive path
    for token in d['path_sf']:
        for word in token.split(" "):
            vocab.setdefault(word.lower(), len(vocab))
        
    # Get words from the negative paths
    for path in d['hop1_sf'] + d['hop2_sf']:
        for token in path:
            for word in token.split(" "):
                vocab.setdefault(word.lower(), len(vocab))

itos = {value:key for key,value in vocab.items()}

100%|██████████| 2000/2000 [00:03<00:00, 500.14it/s]


In [6]:
len(itos)

10279

### 3. Create Trainable Data
**Q, P, N** pairs. E.g.

`Who is the president of USA`; `+ president`; `+ capital`

#### Step 3.1 idfy everything. 

In [7]:
for index,d in tqdm(enumerate(dataset)):
    
    # Idfyng question
    question_id = [vocab[word.lower()] for word in d['node']['corrected_question'].split()]
    
    # Idfying the positive path
    positive_path_id = []
    for token in d['path_sf']:
        for word in token.split(" "):
            positive_path_id.append(vocab[word.lower()])
    
    #idfying the generated negative path
    hop1_id = []
    
    for path in d['hop1_sf']:
        path_id = []
        for tokens in path:
            for word in tokens.split(" "):
                path_id.append(vocab[word.lower()])
        hop1_id.append(path_id)
    
    #idfying the generated negative path
    hop2_id = []
    
    for path in d['hop2_sf']:
        path_id = []
        for tokens in path:
            for word in tokens.split(" "):
                path_id.append(vocab[word.lower()])
        hop2_id.append(path_id)
    
    dataset[index]['question_id'] = question_id
    dataset[index]['hop1_id'] = hop1_id
    dataset[index]['hop2_id'] = hop2_id
    dataset[index]['positive_path_id'] = positive_path_id

2000it [00:04, 476.62it/s]


#### Step 3.2 split the data in train and test split

In [8]:
train_data,test_data = dataset[:int(len(dataset)*.80)],dataset[int(len(dataset)*.80):] 

#### Step 3.3 create triples Question,correct_path,incorrect_path 

In [9]:
train_question = []
train_positive_path = []
train_negative_path = []

for d in train_data:
    for path in d['hop1_id'] + d['hop2_id']:
        train_question.append(d['question_id'])
        train_positive_path.append(d['positive_path_id'])
        train_negative_path.append(path)

print(f"length of question, positive path and negative paths respecitvely are "
      f"{len(train_question)},{len(train_positive_path)},{len(train_negative_path)}")

length of question, positive path and negative paths respecitvely are 1147053,1147053,1147053


# Neural Network Model

In [10]:
class Model(object):
    """
        Boilerplate class which helps others have some common functionality.
        These are made with some debugging/loading and with corechains in mind

    """

    def prepare_save(self):
        pass

    def load_from(self, location):
        # Pull the data from disk
        model_dump = torch.load(location)

        # Load parameters
        for key in self.prepare_save():
            key[1].load_state_dict(model_dump[key[0]])

    def get_parameter_sum(self):

        sum = 0
        for model in self.prepare_save():

            model_sum = 0
            for x in list(model[1].parameters()):

                model_sum += np.sum(x.data.cpu().numpy().flatten())

            sum += model_sum

        return sum

    def freeze_layer(self,layer):
        for params in layer.parameters():
            params.requires_grad = False

    def unfreeze_layer(self,layer):
        for params in layer.parameters():
            params.requires_grad = True

class BiLstmDot(Model):

    def __init__(self, _parameter_dict, _word_to_id, _device, _pointwise=False, _debug=False):

        self.debug = _debug
        self.parameter_dict = _parameter_dict
        self.device = _device
        self.pointwise = _pointwise
        self.word_to_id = _word_to_id

        if self.debug:
            print("Init Models")

        self.encoder = NotSuchABetterEncoder(
            number_of_layer=1,
            bidirectional=self.parameter_dict['bidirectional'],
            embedding_dim=self.parameter_dict['embedding_dim'],
            max_length = self.parameter_dict['max_length'],
            hidden_dim=self.parameter_dict['hidden_size'],
            vocab_size=self.parameter_dict['vocab_size'],
            dropout=self.parameter_dict['dropout'],
            vectors=self.parameter_dict['vectors'],
            enable_layer_norm=False,
            mode = 'LSTM',
            debug = self.debug).to(self.device)


    def train(self, data, optimizer, loss_fn, device):
        '''
            Given data, passes it through model, inited in constructor, returns loss and updates the weight
            :params data: {batch of question, pos paths, neg paths and dummy y labels}
            :params optimizer: torch.optim object
            :params loss fn: torch.nn loss object
            :params device: torch.device object

            returns loss
        '''

        # Unpacking the data and model from args
        ques_batch, pos_batch, neg_batch, y_label = data['ques_batch'], data['pos_batch'], data['neg_batch'], data['y_label']
        neg_batch = tu.no_one_left_behind(neg_batch)
        
        optimizer.zero_grad()

        # Encoding all the data
        hidden = self.encoder.init_hidden(ques_batch.shape[0],self.device)
        _, ques_batch_encoded, _, _ = self.encoder(tu.trim(ques_batch), hidden)
        _, pos_batch_encoded, _, _ = self.encoder(tu.trim(pos_batch), hidden)
        _, neg_batch_encoded, _, _  = self.encoder(tu.trim(neg_batch), hidden)

        # Calculating dot score
        pos_scores = torch.sum(ques_batch_encoded * pos_batch_encoded, -1)
        neg_scores = torch.sum(ques_batch_encoded * neg_batch_encoded, -1)

        '''
            If `y == 1` then it assumed the first input should be ranked higher
            (have a larger value) than the second input, and vice-versa for `y == -1`
        '''
        try:
            loss = loss_fn(pos_scores, neg_scores, y_label)
        except RuntimeError:
            traceback.print_exc()
            print(pos_scores.shape, neg_scores.shape, y_label.shape,  ques_batch.shape, pos_batch.shape, neg_batch.shape)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(self.encoder.parameters(), .5)
        optimizer.step()
        return loss

    def predict(self, ques, paths, device):
        """
            prediction function.
        """
        with torch.no_grad():

            self.encoder.eval()
            hidden = self.encoder.init_hidden(ques.shape[0], self.device)

            _, question, _, _ = self.encoder(tu.trim(ques.long()), hidden)
            _, paths, _, _ = self.encoder(tu.trim(paths.long()), hidden)

            if self.pointwise:
                score = torch.sum(question * paths, -1)
            else:
                score = torch.sum(question * paths, -1)

            self.encoder.train()
            return score

    def prepare_save(self):
        """

            This function is called when someone wants to save the underlying models.
            Returns a tuple of key:model pairs which is to be interpreted within save model.

        :return: [(key, model)]
        """
        return [('encoder', self.encoder)]

    def load_from(self, location):
        # Pull the data from disk
        if self.debug: print("loading Bilstmdot model from", location)
        self.encoder.load_state_dict(torch.load(location)['encoder'])
        if self.debug: print("model loaded with weights ,", self.get_parameter_sum())

In [11]:
class NotSuchABetterEncoder(nn.Module):
    def __init__(self, max_length, hidden_dim, number_of_layer,
                 embedding_dim, vocab_size, bidirectional,
                 dropout=0.0, mode='LSTM', enable_layer_norm=False,
                 vectors=None, debug=False, residual=False):
        '''
            :param max_length: Max length of the sequence.
            :param hidden_dim: dimension of the output of the LSTM.
            :param number_of_layer: Number of LSTM to be stacked.
            :param embedding_dim: The output dimension of the embedding layer/ important only if vectors=none
            :param vocab_size: Size of vocab / number of rows in embedding matrix
            :param bidirectional: boolean - if true creates BIdir LStm
            :param vectors: embedding matrix
            :param debug: Bool/ prints shapes and some other meta data.
            :param enable_layer_norm: Bool/ layer normalization.
            :param mode: LSTM/GRU.
            :param residual: Bool/ return embedded state of the input.

        TODO: Implement multilayered shit someday.
        '''
        super(NotSuchABetterEncoder, self).__init__()

        self.max_length, self.hidden_dim, self.embedding_dim, self.vocab_size = int(max_length), int(hidden_dim), int(embedding_dim), int(vocab_size)
        self.enable_layer_norm = enable_layer_norm
        self.number_of_layer = number_of_layer
        self.bidirectional = bidirectional
        self.dropout = dropout
        self.debug = debug
        self.mode = mode
        self.residual = residual


        assert self.mode in ['LSTM', 'GRU']

        if vectors is not None:
            self.embedding_layer = nn.Embedding.from_pretrained(torch.FloatTensor(vectors))
            self.embedding_layer.weight.requires_grad = True
        else:
            # Embedding layer
            self.embedding_layer = nn.Embedding(self.vocab_size, self.embedding_dim)

        # Mode
        if self.mode == 'LSTM':
            self.rnn = torch.nn.LSTM(input_size=self.embedding_dim,
                                     hidden_size=self.hidden_dim,
                                     num_layers=1,
                                     bidirectional=self.bidirectional)
        elif self.mode == 'GRU':
            self.rnn = torch.nn.GRU(input_size=self.embedding_dim,
                                    hidden_size=self.hidden_dim,
                                    num_layers=1,
                                    bidirectional=self.bidirectional)
        self.dropout = torch.nn.Dropout(p=self.dropout)
        self.reset_parameters()

    def init_hidden(self, batch_size, device):
        """
            Hidden states to be put in the model as needed.
        :param batch_size: desired batchsize for the hidden
        :param device: torch device
        :return:
        """
        if self.mode == 'LSTM':
            return (torch.ones((1+self.bidirectional , batch_size, self.hidden_dim), device=device),
                    torch.ones((1+self.bidirectional, batch_size, self.hidden_dim), device=device))
        else:
            return torch.ones((1+self.bidirectional, batch_size, self.hidden_dim), device=device)

    def reset_parameters(self):
        """
        Here we reproduce Keras default initialization weights to initialize Embeddings/LSTM weights
        """
        ih = (param for name, param in self.named_parameters() if 'weight_ih' in name)
        hh = (param for name, param in self.named_parameters() if 'weight_hh' in name)
        b = (param for name, param in self.named_parameters() if 'bias' in name)
        for t in ih:
            torch.nn.init.xavier_uniform_(t)
        for t in hh:
            torch.nn.init.orthogonal_(t)
        for t in b:
            torch.nn.init.constant_(t, 0)

    def forward(self, x, h):
        """

        :param x: input (batch, seq)
        :param h: hiddenstate (depends on mode. see init hidden)
        :param device: torch device
        :return: depends on booleans passed @ init.
        """

        if self.debug:
            print ("\tx:\t", x.shape)
            if self.mode is "LSTM":
                print ("\th[0]:\t", h[0].shape)
            else:
                print ("\th:\t", h.shape)

        mask = tu.compute_mask(x)

        x = self.embedding_layer(x).transpose(0, 1)

        if self.debug: print ("x_emb:\t\t", x.shape)

        if self.enable_layer_norm:
            seq_len, batch, input_size = x.shape
            x = x.view(-1, input_size)
            x = self.layer_norm(x)
            x = x.view(seq_len, batch, input_size)

        if self.debug: print("x_emb bn:\t", x.shape)

        # get sorted v
        lengths = mask.eq(1).long().sum(1)
        lengths_sort, idx_sort = torch.sort(lengths, dim=0, descending=True)
        _, idx_unsort = torch.sort(idx_sort, dim=0)

        x_sort = x.index_select(1, idx_sort)
        h_sort = (h[0].index_select(1, idx_sort), h[1].index_select(1, idx_sort)) \
            if self.mode is "LSTM" else h.index_select(1, idx_sort)

        x_pack = torch.nn.utils.rnn.pack_padded_sequence(x_sort, lengths_sort)
        x_dropout = self.dropout.forward(x_pack.data)
        x_pack_dropout = torch.nn.utils.rnn.PackedSequence(x_dropout, x_pack.batch_sizes)

        if self.debug:
            print("\nidx_sort:", idx_sort.shape)
            print("idx_unsort:", idx_unsort.shape)
            print("x_sort:", x_sort.shape)
            if self.mode is "LSTM":
                print ("h_sort[0]:\t\t", h_sort[0].shape)
            else:
                print ("h_sort:\t\t", h_sort.shape)


        o_pack_dropout, h_sort = self.rnn.forward(x_pack_dropout, h_sort)
        o, _ = torch.nn.utils.rnn.pad_packed_sequence(o_pack_dropout)

        # Unsort o based ont the unsort index we made
        o_unsort = o.index_select(1, idx_unsort)  # Note that here first dim is seq_len
        h_unsort = (h_sort[0].index_select(1, idx_unsort), h_sort[1].index_select(1, idx_unsort)) \
            if self.mode is "LSTM" else h_sort.index_select(1, idx_unsort)


        # @TODO: Do we also unsort h? Does h not change based on the sort?

        if self.debug:
            if self.mode is "LSTM":
                print("h_sort\t\t", h_sort[0].shape)
            else:
                print("h_sort\t\t", h_sort.shape)
            print("o_unsort\t\t", o_unsort.shape)
            if self.mode is "LSTM":
                print("h_unsort\t\t", h_unsort[0].shape)
            else:
                print("h_unsort\t\t", h_unsort.shape)

        len_idx = (lengths - 1).view(-1, 1).expand(-1, o_unsort.size(2)).unsqueeze(0)

        if self.debug:
            print("len_idx:\t", len_idx.shape)

        # Need to also return the last embedded state. Wtf. How?

        if self.residual:
            len_idx = (lengths - 1).view(-1, 1).expand(-1, x.size(2)).unsqueeze(0)
            x_last = x.gather(0, len_idx)
            x_last = x_last.squeeze(0)
            return o_unsort, h_unsort[0].transpose(1,0).contiguous().view(h_unsort[0].shape[1], -1) , h_unsort, mask, x, x_last
        else:
            return o_unsort, h_unsort[0].transpose(1,0).contiguous().view(h_unsort[0].shape[1], -1) , h_unsort, mask

    @property
    def layers(self):
        return torch.nn.ModuleList([
            torch.nn.ModuleList([self.embedding_layer, self.rnn, self.dropout]),
        ])

In [12]:
if True:
    parameter_dict = {}
    parameter_dict['bidirectional'] = True
    parameter_dict['embedding_dim'] = 300
    parameter_dict['max_length'] = 25
    parameter_dict['hidden_size'] = 256
    parameter_dict['vocab_size'] = 1000
    parameter_dict['dropout'] = 0.3
    parameter_dict['vectors'] = None
    model = BiLstmDot(parameter_dict, None, torch.device('cpu'), _pointwise=False, _debug=False)


    # Data making 
    BS = 64 # Batch Size
    SL = 25 # Maximum Sequence Length and also the sequence length of all tensor.
    data = {}
    data['ques_batch'] = torch.randint(0,999, (BS,SL), dtype=torch.long)
    data['pos_batch'] = torch.randint(0,999, (BS,SL), dtype=torch.long)
    data['neg_batch'] = torch.randint(0,999, (BS,SL), dtype=torch.long)
    data['y_label'] = torch.ones(BS)

    # Setting up optimizer and loss function
    optimizer = torch.optim.Adam(list(filter(lambda p: p.requires_grad, model.encoder.parameters())))
    loss_fn = nn.MarginRankingLoss(margin=1,size_average=False)

    # Passing it through models forward, train in this case.
    output = model.train(data,optimizer,loss_fn,torch.device('cpu'))
    
    # Printing the output 
    print(output)



tensor(74.2771, grad_fn=<SumBackward0>)


#### Sampler

In [13]:
class SimplestSampler:
    """
        Given X and Y matrices (or lists of lists),
            it returns a batch worth of stuff upon __next__
    :return:
    """

    def __init__(self, data, bs: int = 64):
        '''
            data has question, positive_path and negative paths as fields.
        '''
        self.question = data["question"]
        self.posp = data["positive_path"]
        self.negp = data["negative_path"]
        self.n = len(self.question)
        self.bs = bs  # Batch Size

    def __len__(self):
        return self.n // self.bs - (1 if self.n % self.bs else 0)

    def __iter__(self):
        self.i, self.iter = 0, 0
        return self

    def __next__(self):
        if self.i + self.bs >= self.n:
            raise StopIteration

        _q, _p, _n = self.question[self.i:self.i + self.bs], \
        self.posp[self.i:self.i + self.bs], self.negp[self.i:self.i + self.bs]
        self.i += self.bs
        return _q, _p, _n

In [14]:
# testing the sampler
data = {
    "question":train_question,
    "positive_path": train_positive_path,
    "negative_path": train_negative_path
}
ss = SimplestSampler(data,4000)

for i,j,k in ss:
    print(len(i),len(j),len(k))
    break

4000 4000 4000


#### Training loop

In [17]:
def pad(data_list, max_length = 25):
    
    padded_data = np.zeros((len(data_list),max_length))
    
    for ind,d in enumerate(data_list):
        padded_data[ind][:min(len(padded_data[ind]),len(d))] = d[:min(len(d),len(padded_data[ind]))]

    return padded_data

def evaluation(data, modeler, max_length, device):
    # We will follow the same procedure as that of training data preparation
    
    precision = []
    
    for d in data:
        ques, posp, negp = d['question_id'], d['positive_path_id'], d['hop1_id']+d['hop2_id']
        
        # padding the data
        negp_p = pad(negp,max_length)
        posp_p = pad([posp],max_length)
        ques_p = pad([ques],max_length)
        
        # stack negative path and positive path.
        # The reason to stack them is that the model while predicting expects just a set of paths.
        paths = np.vstack((negp_p,posp_p))
        
        # Repeating question "path" number of times.
        ques_p = np.repeat(ques_p, len(paths), axis=0)
        
        # Converting them into pytorch tensor.
        ques_p = torch.tensor(ques_p, dtype=torch.long, device=device)
        paths = torch.tensor(paths, dtype=torch.long, device=device)
        
        # Passing the data through predict function. 
        score = modeler.predict(ques_p, paths, device)
        
        # Find the index of highest scoring question-corechain (path) pair.
        arg_max = torch.argmax(score)
        
        if arg_max.item() == len(paths)-1:
            precision.append(1)
        else:
            precision.append(0)
            
    print(f"the current precision of the system is, {np.average(precision)}")
        
def training_loop(parameter_dict, train_loader, modeler, optimizer, loss_func, test_data, device):
    '''
        parameter_dict['epochs'] = 10
    
    '''
    for epoch in range(parameter_dict['epochs']):
        
        print("Epoch: ", epoch, "/", parameter_dict['epochs'])
        
        epoch_loss = []
        epoch_time = time.time()
        
        accuracy = evaluation(test_data, modeler, parameter_dict['max_length'], device)
        print(accuracy)
        
        i_batch = 0
        for q_b, pp_b, np_b in train_loader:
            
            
            batch_time = time.time()
            
            # pad them
            q_b, pp_b, np_b = pad(q_b,parameter_dict['max_length']),\
            pad(pp_b,parameter_dict['max_length']), pad(np_b,parameter_dict['max_length'])
            
            # convert them into torch tensor
            
            ques_batch = torch.tensor(np.reshape(q_b, (-1, parameter_dict['max_length'])),
                                              dtype=torch.long, device=device)
            pos_batch = torch.tensor(np.reshape(pp_b, (-1, parameter_dict['max_length'])),
                                             dtype=torch.long, device=device)
            neg_batch = torch.tensor(np.reshape(np_b, (-1, parameter_dict['max_length'])),
                                             dtype=torch.long, device=device)
            y = torch.ones(q_b.shape[0], device=device) #check if view(-1) is necessary
            
            # pass it through model
            
            data_batch = {
                            'ques_batch': ques_batch,
                            'pos_batch': pos_batch,
                            'neg_batch': neg_batch,
                            'y_label': y
            }
            
            loss = modeler.train(data=data_batch,
                                  optimizer=optimizer,
                                  loss_fn=loss_func,
                                  device=device)
            epoch_loss.append(loss.item())
            
            
            print("Batch:\t%d" % i_batch, "/%d\t: " % (parameter_dict['batch_size']),
                      "%s" % (time.time() - batch_time),
                      "\t%s" % (time.time() - epoch_time),
                      "\t%s" % (str(loss.item())),
                      end=None if i_batch + 1 == int(int(i_batch) / parameter_dict['batch_size']) else "\n")
            
            i_batch = i_batch + 1
        accuracy = evaluation(test_data, modeler, parameter_dict['max_length'], device)
        print(accuracy)
#         

In [18]:

parameter_dict = {}
parameter_dict['bidirectional'] = True
parameter_dict['embedding_dim'] = 300
parameter_dict['max_length'] = 25
parameter_dict['hidden_size'] = 256
parameter_dict['vocab_size'] = len(vocab)
parameter_dict['dropout'] = 0.3
parameter_dict['vectors'] = None
parameter_dict['epochs'] = 5
parameter_dict['batch_size'] = 4000

device = torch.device('cpu')
model = BiLstmDot(parameter_dict, None, device, _pointwise=False, _debug=False)
    
# Setting up optimizer and loss function
optimizer = torch.optim.Adam(list(filter(lambda p: p.requires_grad, model.encoder.parameters())))
loss_fn = nn.MarginRankingLoss(margin=1,size_average=False)

# Sampler
data = {
    "question":train_question,
    "positive_path": train_positive_path,
    "negative_path": train_negative_path
}

ss = SimplestSampler(data,parameter_dict['batch_size'])


# Training
training_loop(parameter_dict=parameter_dict, train_loader=ss, modeler=model, 
              optimizer=optimizer, loss_func=loss_fn, test_data=test_data,device = device)



Epoch:  0 / 5
the current precision of the system is, 0.075
None
Batch:	0 /4000	:  5.197627544403076 	85.30159902572632 	3830.44482421875
Batch:	1 /4000	:  5.387554407119751 	90.68939280509949 	4347.18212890625
Batch:	2 /4000	:  3.4218385219573975 	94.11154127120972 	823.1513061523438
Batch:	3 /4000	:  4.2060205936431885 	98.31787896156311 	1646.2049560546875
Batch:	4 /4000	:  4.0779993534088135 	102.3963212966919 	314.9248352050781
Batch:	5 /4000	:  4.269907236099243 	106.66671586036682 	1758.103515625
Batch:	6 /4000	:  4.062440872192383 	110.72955870628357 	420.0933532714844
Batch:	7 /4000	:  4.854046583175659 	115.58400654792786 	3784.54931640625
Batch:	8 /4000	:  3.9621925354003906 	119.54663109779358 	339.4937744140625
Batch:	9 /4000	:  3.4537179470062256 	123.00073504447937 	353.0498962402344
Batch:	10 /4000	:  5.024760484695435 	128.0258984565735 	1223.7586669921875
Batch:	11 /4000	:  4.64821195602417 	132.67453527450562 	1450.707763671875
Batch:	12 /4000	:  4.490438222885132 	1

KeyboardInterrupt: 

In [None]:
d = test_data[1]
max_length = 25
ques, posp, negp = d['question_id'], d['positive_path_id'], d['hop1_id']+d['hop2_id']

# padding the data
negp_p = pad(negp,max_length)
posp_p = pad([posp],max_length)
ques_p = pad([ques],max_length)

# stack negative path and positive path.
# The reason to stack them is that the model while predicting expects just a set of paths.
paths = np.vstack((negp_p,posp_p))

# Repeating question "path" number of times.
ques_p = np.repeat(ques_p, len(paths), axis=0)

# Converting them into pytorch tensor.
ques_p = torch.tensor(ques_p, dtype=torch.long, device=device)
paths = torch.tensor(paths, dtype=torch.long, device=device)

# Passing the data through predict function. 
score = model.predict(ques_p, paths, device)

# Find the index of highest scoring question-corechain (path) pair.
arg_max = torch.argmax(score)