In [1]:
import tempfile
import sys
import subprocess
import shutil
from elasticsearch import Elasticsearch, helpers
from nir.utils import create_filter_query_function, change_bm25_parameters

import os
import sys
from collections import defaultdict
import pickle
import math
import pandas as pd
from os.path import join

import tensorflow as tf
from tensorflow.keras import backend as K

from mmnrm.evaluation import TREC_Evaluator
from mmnrm.utils import set_random_seed, load_model_weights, load_model
from mmnrm.dataset import TestCollectionV2, sentence_splitter_builderV2
from mmnrm.evaluation import BioASQ_Evaluator
from mmnrm.modelsv2 import deep_rank
from mmnrm.text import TREC_goldstandard_transform, TREC_queries_transform, TREC_results_transform


import numpy as np
import time

import json

es = Elasticsearch(["http://193.136.175.98:8125"])

index_name = "trec-pm-2020-synonym"
change_bm25_parameters(0.9, 0.7, index_name, es)

In [2]:
import xmltodict
import ctypes
from collections import defaultdict

def load_TREC_topics(topics_file, query_build):
    with open(topics_file) as f:
        xml_dict=xmltodict.parse(f.read())["topics"]["topic"]

    topics_json = []

    for topic in xml_dict:
        topics_json.append({"id":topic["@number"],
                           "disease":topic["disease"],
                           "gene":topic["gene"],
                           "treatment":topic["treatment"]})

    return TREC_queries_transform(topics_json, number_parameter="id", fn=query_build)

def save_answers_to_file(answers, name):

    
    with open(name,"w", encoding="utf-8") as f:
        for line in answers:
            f.write(line+"\n")
        
    return name

In [3]:
def execute_queries(queries, top_k=1000):
    filter_query_string = create_filter_query_function()

    documents = {}

    for j,query_data in enumerate(queries):

        if not j%10:
            print(j, end="\r")

        query = filter_query_string(query_data["query"])
        query_es = {
                  "query": {
                    "bool": {
                      "must": [
                        {
                          "query_string": {
                            "query": query, 
                            "analyzer": "english",
                            "fields": ["text"]
                          }
                        }
                      ], 
                      "filter": [], 
                      "should": [], 
                      "must_not": []
                    }
                  }
                }



        retrieved = es.search(index=index_name, body=query_es, size=top_k, request_timeout=200)

        documents[query_data["id"]] = list(map(lambda x:{"id":x["_source"]["id"], 
                                                         "text":x["_source"]["text"],
                                                         "title":x["_source"]["title"],
                                                         "score":x['_score']}, retrieved['hits']['hits']))

        # just to ensure the elastic search order is mantained
        validate_order = lambda x:all(x[i] >= x[i+1] for i in range(len(x)-1))
        assert validate_order(list(map(lambda x: x['_score'], retrieved['hits']['hits'])))
        
    return documents


In [4]:
# Topics
topics = load_TREC_topics("topics2020.xml", lambda x:x["treatment"]+" "+x["disease"]+" "+x["gene"])
#topics = load_TREC_topics("topics2020.xml", lambda x:"Is {} effective for treatment of {} {}".format(x["treatment"],x["disease"],x["gene"]))


In [5]:
retrieved = execute_queries(topics)

tag = "BIT.UA-baseline"

answers = []
for q in topics:
    
    for j,doc_info in enumerate(retrieved[q["id"]]):
        answers.append("{} Q0 {} {} {} {}".format(q["id"],
                                         doc_info["id"],
                                         j+1,
                                         doc_info["score"],
                                         tag))

save_answers_to_file(answers, "BIT.UA-baseline.txt")

30

In [6]:
#trec_evaluator = TREC_Evaluator("qrels-treceval-abstracts.2019.txt", '/backup/TREC/TestSet/trec_eval-9.0.7/trec_eval')
#test_collection = TestCollectionV2(topics, retrieved, trec_evaluator).batch_size(100)

#output_metrics=["recall_100","recall_500","recall_1000", "map_cut_1000", "ndcg_cut_10", "P_5"]
#metrics = test_collection.evaluate_pre_rerank(output_metris=output_metrics)

#[ (m, metrics[m]) for m in output_metrics]

In [6]:

#change query
topics = load_TREC_topics("topics2020.xml", lambda x:"Is {} effective for treatment of {} {}".format(x["treatment"],x["disease"],x["gene"],x["gene"]))

trec_evaluator = TREC_Evaluator(None, '/backup/TREC/TestSet/trec_eval-9.0.7/trec_eval')
test_collection = TestCollectionV2(topics, retrieved, trec_evaluator).batch_size(100)

In [7]:
topics

[{'id': '1',
  'query': 'Is Regorafenib effective for treatment of colorectal cancer ABL1'},
 {'id': '2',
  'query': 'Is Alectinib effective for treatment of non-small cell carcinoma ALK'},
 {'id': '3',
  'query': 'Is Gilteritinib effective for treatment of acute myeloid leukemia ALK'},
 {'id': '4',
  'query': 'Is Binimetinib effective for treatment of melanoma BRAF'},
 {'id': '5',
  'query': 'Is Sorafenib effective for treatment of hepatocellular carcinoma BRAF'},
 {'id': '6',
  'query': 'Is Carboplatin effective for treatment of ovarian carcinoma BRCA1'},
 {'id': '7',
  'query': 'Is Olaparib effective for treatment of ovarian cancer BRCA1'},
 {'id': '8',
  'query': 'Is Rucaparib effective for treatment of ovarian cancer BRCA1'},
 {'id': '9',
  'query': 'Is Carboplatin effective for treatment of ovarian carcinoma BRCA2'},
 {'id': '10',
  'query': 'Is Olaparib effective for treatment of ovarian cancer BRCA2'},
 {'id': '11',
  'query': 'Is Abemaciclib effective for treatment of breast c

In [8]:
# Auxiliar function

def load_neural_model(path_to_weights):
    
    rank_model = load_model(path_to_weights, change_config={"return_snippets_score":True})
    tk = rank_model.tokenizer
    
    model_cfg = rank_model.savable_config["model"]
    
    max_input_query = model_cfg["max_q_length"]
    max_input_sentence = model_cfg["max_s_length"]
    max_s_per_q_term = model_cfg["max_s_per_q_term"]
    
    # redundant code... replace
    max_sentences_per_query = model_cfg["max_s_per_q_term"]

    pad_query = lambda x, dtype='int32': tf.keras.preprocessing.sequence.pad_sequences(x, 
                                                                                       maxlen=max_input_query,
                                                                                       dtype=dtype, 
                                                                                       padding='post', 
                                                                                       truncating='post', 
                                                                                       value=0)

    pad_sentences = lambda x, dtype='int32': tf.keras.preprocessing.sequence.pad_sequences(x, 
                                                                                           maxlen=max_input_sentence,
                                                                                           dtype=dtype, 
                                                                                           padding='post', 
                                                                                           truncating='post', 
                                                                                           value=0)

    pad_docs = lambda x, max_lim, dtype='int32': x[:max_lim] + [[]]*(max_lim-len(x))

    idf_from_id_token = lambda x: math.log(tk.document_count/tk.word_docs[tk.index_word[x]])

    train_sentence_generator, test_sentence_generator = sentence_splitter_builderV2(tk, 
                                                                                      max_sentence_size=max_input_sentence,
                                                                                      mode=4)


    def test_input_generator(data_generator):

        data_generator = test_sentence_generator(data_generator)

        for _id, query, docs in data_generator:

            # tokenization
            query_idf = list(map(lambda x: idf_from_id_token(x), query))

            tokenized_docs = []
            ids_docs = []
            offsets_docs = []
            for doc in docs:

                padded_doc = pad_docs(doc["text"], max_lim=max_input_query)
                for q in range(len(padded_doc)):
                    padded_doc[q] = pad_docs(padded_doc[q], max_lim=max_sentences_per_query)
                    padded_doc[q] = pad_sentences(padded_doc[q])
                tokenized_docs.append(padded_doc)
                ids_docs.append(doc["id"])
                offsets_docs.append(doc["offset"])

            # padding
            query = pad_query([query])[0]
            query = [query] * len(tokenized_docs)
            query_idf = pad_query([query_idf], dtype="float32")[0]
            query_idf = [query_idf] * len(tokenized_docs)

            yield _id, [np.array(query), np.array(tokenized_docs), np.array(query_idf)], ids_docs, offsets_docs
    
    return rank_model, test_input_generator

def rank(model, t_collection):

    generator_Y = t_collection.generator()
                
    q_scores = defaultdict(list)

    for i, _out in enumerate(generator_Y):
        query_id, Y, docs_info, offsets_docs = _out
        s_time = time.time()
        
        scores, q_sentence_attention = model.predict(Y)
        scores = scores[:,0].tolist()
            
        print("\rEvaluation {} | time {}".format(i, time.time()-s_time), end="\r")
        #q_scores[query_id].extend(list(zip(docs_ids,scores)))
        for i in range(len(docs_info)):
            q_scores[query_id].append((docs_info[i], scores[i], q_sentence_attention[i], offsets_docs[i]))

    # sort the rankings
    for query_id in q_scores.keys():
        q_scores[query_id].sort(key=lambda x:-x[1])
        q_scores[query_id] = q_scores[query_id]
    
    return q_scores

In [16]:
#path = "/backup/NIR_BioASQ/best_validation_models/still-butterfly-1_batch0_map"
#path = "/backup/NIR_BioASQ/best_validation_models/electric-resonance-5_batch0_map"
path = "trained_models/exalted-resonance-2_val_collection0_P_5"

ranking_model, test_input_generator = load_neural_model(path)

test_collection.set_transform_inputs_fn(test_input_generator)
    
result = rank(ranking_model, test_collection)

DEBUG created tokenizer bioasq_2020_RegexTokenizer
False False
[LOAD FROM CACHE] Load embedding matrix from /backup/NIR_BioASQ/embeddings/WORD2VEC_embedding_bioasq_gensim_iter_15_freq0_200_Regex_word2vec_bioasq_2020_RegexTokenizer
[EMBEDDING MATRIX SHAPE] (4978472, 200)
Evaluation 399 | time 0.7395062446594238

In [17]:
tag = "BIT.UA-nn-run3"

answers = []
for q in topics:
    
    for j,doc_info in enumerate(result[q["id"]]):
        answers.append("{} Q0 {} {} {} {}".format(q["id"],
                                         doc_info[0],
                                         j+1,
                                         doc_info[1],
                                         tag))

save_answers_to_file(answers, "BIT.UA-nn-run3.txt")

'BIT.UA-nn-run3.txt'

In [15]:
#output_metrics=["recall_100","recall_500","recall_1000", "map_cut_1000", "ndcg_cut_10", "P_5"]
#metrics = test_collection.evaluate(result)
#[ (m, metrics[m]) for m in output_metrics]

Remove /tmp/tmp5zoadnnf


[('recall_100', 0.3264),
 ('recall_500', 0.58),
 ('recall_1000', 0.6523),
 ('map_cut_1000', 0.2314),
 ('ndcg_cut_10', 0.5105),
 ('P_5', 0.56)]

In [21]:
output_metrics=["recall_100","recall_500","recall_1000", "map_cut_1000", "ndcg_cut_10", "P_5"]
metrics = test_collection.evaluate(result)
[ (m, metrics[m]) for m in output_metrics]

Remove /tmp/tmp_t12qp0c


[('recall_100', 0.3461),
 ('recall_500', 0.5907),
 ('recall_1000', 0.6523),
 ('map_cut_1000', 0.2547),
 ('ndcg_cut_10', 0.5321),
 ('P_5', 0.59)]

In [27]:
output_metrics=["recall_100","recall_500","recall_1000", "map_cut_1000", "ndcg_cut_10", "P_5"]
metrics = test_collection.evaluate(result)
[ (m, metrics[m]) for m in output_metrics]

Remove /tmp/tmpwxqlnfq7


[('recall_100', 0.1529),
 ('recall_500', 0.5085),
 ('recall_1000', 0.7916),
 ('map_cut_1000', 0.1349),
 ('ndcg_cut_10', 0.2653),
 ('P_5', 0.33)]