In [2]:
import argparse
import os
import sys
import pickle
from pytorch_transformers import BertTokenizer
from collections import Counter, defaultdict
from tqdm.auto import tqdm
from itertools import product
from gensim.models import KeyedVectors
from gensim.similarities import SoftCosineSimilarity
import numpy as np
from scipy.spatial.distance import cosine
import random

parser = argparse.ArgumentParser()
parser.add_argument("--data_home", type=str, default="/ssd2/arthur/TREC2019/data")
parser.add_argument("--docs_file", type=str, default="docs/tokenized-msmarco-docs.tsv")
parser.add_argument("--queries_file", type=str, default="queries/test_queries.tsv")
parser.add_argument("--axioms", type=str, default="TFC1,TFC2,MTDC,LNC1,LNC2,LB1,LB2,STMC1,STMC2,STMC3,TP")
parser.add_argument("--top100", type=str, default="runs/indri_test_10_10.run")
parser.add_argument("--cpus", type=int, default=1)
parser.add_argument("--total_docs", type=int, default=3213835)
parser.add_argument("--delta", type=int, default=10)
parser.add_argument("--idf_file", type=str, default="docs/IDFS/IDFS-FULL")
parser.add_argument("--embeddings_path", type=str, default="GloVe/w2v.txt")
parser.add_argument("--stmc_sim", type=float, default=0.2)
parser.add_argument("--delta_lb", type=float, default=0.2)
parser.add_argument("--min_df_lb", type=int, default=100000)

argv = ["--cpus", "1",
        "--axioms", "TFC1"]
args = parser.parse_args(argv)
sys.path.insert(0, "/ssd2/arthur/TREC2019/scripts/")

In [3]:
def getcontent(doc_id, docs_file, offset_dict):
    offset = offset_dict[doc_id]
    with open(docs_file) as f:
        f.seek(offset)
        doc = f.readline()
    return doc

docs_path = os.path.join(args.data_home, args.docs_file)
offset_dict = pickle.load(open(docs_path+".offset", 'rb'))
top_100_path = os.path.join(args.data_home, args.top100)
assert os.path.isfile(top_100_path)
assert os.path.isfile(docs_path)
#set of valid documents
tuples = defaultdict(lambda: set())
all_docs = {}
docs_lens = {}
scores = {}
for i in tqdm(open(top_100_path), total=155800):
    topic_id, _ , doc_id, _, score, _ = i.split()
    tuples[topic_id].add(doc_id)
    scores[f"{topic_id}-{doc_id}"] = score
    if doc_id in all_docs:
        continue  
    all_docs[doc_id] = getcontent(doc_id, docs_path, offset_dict).split()
    docs_lens[doc_id] = len(all_docs[doc_id])
args.scores = scores

HBox(children=(IntProgress(value=0, max=155800), HTML(value='')))




In [15]:
# create a new trec_parameter file.https://33d36b12.ngrok.io/notebooks/LNC_ALL%20dataset.ipynb
# 
args.delta_lb = 0.01
import re
import random
def LB1(topic_id, tokenized_query, all_docs, tuples, docs_lens, args):
    query = [x.replace("##", "") for x in tokenized_query]
    query_terms = set(query)
    instances = []
    _docs = list(tuples[topic_id])
    formatter = "<query>\n<number>{}</number>\n<text>#combine({})</text>\n{}\n</query>\n"
    for i, di_id in enumerate(_docs):
        di_terms = set(all_docs[di_id])
        di_terms_not_q = di_terms.difference(query_terms) 
        score_di = float(args.scores[f"{topic_id}-{di_id}"])
        if not os.path.isfile(os.path.join(args.data_home, "queries/LB1.indriparam")):
            with open(os.path.join(args.data_home, "queries/LB1.indriparam"), 'w') as outf:
                outf.write("<parameters>\n<threads> 48 </threads>\n")
        with open(os.path.join(args.data_home, "queries/LB1.indriparam"), 'a') as outf:
            for dj_id in _docs[i:]:
                score_dj = float(args.scores[f"{topic_id}-{dj_id}"])
                score_diff = abs(float(args.scores[f"{topic_id}-{di_id}"])-float(args.scores[f"{topic_id}-{dj_id}"]))
                try:
                    score_diff /= max(float(args.scores[f"{topic_id}-{di_id}"]), float(args.scores[f"{topic_id}-{dj_id}"]))
                except:
                # normalization broke this example. both are actually the exact same score.
                    if max(float(args.scores[f"{topic_id}-{di_id}"]), float(args.scores[f"{topic_id}-{dj_id}"])) == 0:
                        score_diff = 0
                if score_diff >= args.delta_lb:
                    continue
                dj_terms = set(all_docs[dj_id])
                dj_terms_not_q = dj_terms.difference(query_terms)
                # pick a term from Dj, that is not in Di, and add to query.
                dj_unique_terms = dj_terms_not_q.difference(di_terms_not_q)
                if len(dj_unique_terms) !=0:
                    random_dj_term = random.sample(dj_unique_terms, 1)
                    #avoid non-alpha (indri is not happy with it)
                    while re.match('^[\w-]+$', random_dj_term[0]) is None:
                        random_dj_term = random.sample(dj_unique_terms, 1)
                    q_dj = " ".join(query + random_dj_term)
                    q_dj = re.sub(r'([^\s\w]|_)+', '', q_dj)

                    q_dj_id = topic_id+"-"+dj_id+"-"+di_id # in this case, the query was modified to help dj
                    instances.append((q_dj_id, dj_id, di_id)) 
                    working_set = "<workingSetDocno>{}</workingSetDocno>\n<workingSetDocno>{}</workingSetDocno>".format(di_id, dj_id)
                    query_format_j = formatter.format(q_dj_id, q_dj, working_set)
                    outf.write(query_format_j)

                di_unique_terms = di_terms_not_q.difference(dj_terms_not_q)
                if len(di_unique_terms) != 0:
                    random_di_term = random.sample(di_unique_terms, 1)
                    # Avoid non-alphanumeric
                    while re.match('^[\w-]+$', random_di_term[0]) is None:
                        random_di_term = random.sample(di_unique_terms, 1)
                    q_di = " ".join(query + random_di_term)
                    q_di = re.sub(r'([^\s\w]|_)+', '', q_di)
                    if len(q_di) == 0:
                        print("FIND")
                    q_di_id = topic_id+"-"+di_id+"-"+dj_id # in this case, the query was modified to help di
                    instances.append((q_di_id, di_id, dj_id)) 
                    query_format_i = formatter.format(q_di_id, q_di, working_set)
                    outf.write(query_format_i)
    return instances

queries_file = os.path.join(args.data_home, args.queries_file)
assert os.path.isfile(queries_file)
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

instances = []
try:
    os.remove("/ssd2/arthur/TREC2019/data/queries/LB1.indriparam")
except:
    pass
for counter, line in tqdm(enumerate(open(queries_file)), total=1558):
    q_id, query_text = line.strip().split("\t")
    tokenized_query = tokenizer.tokenize(query_text)
    instances += LB1(q_id, tokenized_query, all_docs, tuples, docs_lens, args)
    if counter > 1:
        break
with open(os.path.join(args.data_home, "queries/LB1.indriparam"), 'a') as outf:
    outf.write("</parameters>")
pickle.dump(instances, open("/ssd2/arthur/TREC2019/data/diagnostics/LB1-instances", 'wb'))

indri_path = "/ssd2/arthur/indri/bin/IndriRunQuery"
param_file = "/ssd2/arthur/TREC2019/data/queries/LB1.indriparam"
indri_cmd = "{} -index=/ssd2/arthur/TREC2019/data/index/indri-tokenized-bert-index/ {} -trecFormat=true > /ssd2/arthur/TREC2019/data/runs/LB1.run".format(indri_path, param_file)
subprocess.check_output(indri_cmd.split())

HBox(children=(IntProgress(value=0, max=1558), HTML(value='')))

CalledProcessError: Command '['/ssd2/arthur/indri/bin/IndriRunQuery', '-index=/ssd2/arthur/TREC2019/data/index/indri-tokenized-bert-index/', '/ssd2/arthur/TREC2019/data/queries/LB1.indriparam', '-trecFormat=true', '>', '/ssd2/arthur/TREC2019/data/runs/LB1.run']' returned non-zero exit status 255.

In [12]:
indri_cmd

'/ssd2/arthur/indri/bin/IndriRunQuery -index=/ssd2/arthur/TREC2019/data/index/indri-tokenized-bert-index/ /ssd2/arthur/TREC2019/data/queries/LB1.indriparam -trecFormat=true > /ssd2/arthur/TREC2019/data/runs/LB1.run'

In [5]:
pickle.dump(instances, open("/ssd2/arthur/TREC2019/data/diagnostics/LB1-instances", 'wb'))

In [4]:
indri_cmd

'/ssd2/arthur/indri/bin/IndriRunQuery -index=/ssd2/arthur/TREC2019/data/index/indri-tokenized-bert-index/ /ssd2/arthur/TREC2019/data/queries/LB1.indriparam -trecFormat=true > /ssd2/arthur/TREC2019/data/runs/LB1.run'

In [6]:
# Run indri with the new indriparam file
import subprocess
indri_path = "/ssd2/arthur/indri/bin/IndriRunQuery"
param_file = "/ssd2/arthur/TREC2019/data/queries/LB1.indriparam"
indri_cmd = "{} -index=/ssd2/arthur/TREC2019/data/index/indri-tokenized-bert-index/ {} -trecFormat=true > /ssd2/arthur/TREC2019/data/runs/LB1.run".format(indri_path, param_file)
subprocess.run(indri_cmd.split())

CompletedProcess(args=['/ssd2/arthur/indri/bin/IndriRunQuery', '-index=/ssd2/arthur/TREC2019/data/index/indri-tokenized-bert-index/', '/ssd2/arthur/TREC2019/data/queries/LB1.indriparam', '-trecFormat=true', '>', '/ssd2/arthur/TREC2019/data/runs/LB1.run'], returncode=255)

In [5]:
instances = pickle.load(open("/ssd2/arthur/TREC2019/data/diagnostics/LB1-instances", 'rb'))

In [8]:
from pytorch_transformers import DistilBertForSequenceClassification
model = DistilBertForSequenceClassification.from_pretrained("/ssd2/arthur/TREC2019/data/models/distilbert-cut/")

In [10]:
def text_to_tokens(query, document):
    tokens_a = query
    tokens_b = document
    _truncate_seq_pair(tokens_a, tokens_b)
    tokens = ["[CLS]"] + tokens_a + ["[SEP]"] + tokens_b + ["[SEP]"]
    return tokens
def _truncate_seq_pair(tokens_a, tokens_b, max_length=509):
    """Truncates a sequence pair in place to the maximum length."""
    while True:
        total_length = len(tokens_a) + len(tokens_b)
        if total_length <= max_length:
            break
        if len(tokens_a) > len(tokens_b):
            tokens_a.pop()
        else:
            tokens_b.pop()

In [11]:
buffer = ""
with open("/ssd2/arthur/TREC2019/data/triples-tokenized/LB1", 'w') as outf:
    for counter, line in tqdm(enumerate(open("/ssd2/arthur/TREC2019/data/queries/LB1.indriparam")), total=92545202):
        if "</query>" in line:
            topic_id, dj, di = buffer.split("<number>")[1].split("</")[0].split("-")
            query = tokenizer.tokenize(buffer.split("#combine(")[1].split(")<")[0])
            pair_id = buffer.split("<number>")[1].split("</")[0]
            di_doc = all_docs[di]
            di_triple = text_to_tokens(query, di_doc)
            outf.write("{}\t{}\n".format(pair_id, di_triple))
            
            dj = buffer.split("Docno>")[3].split("</")[0]
            dj_doc = all_docs[di]
            dj_triple = text_to_tokens(query, dj_doc)
            outf.write("{}\t{}\n".format(pair_id, dj_triple))
            buffer = ""
        else:
            buffer += line

HBox(children=(IntProgress(value=0, max=10730450), HTML(value='')))




In [11]:
from pytorch_transformers import DistilBertForSequenceClassification
model = DistilBertForSequenceClassification.from_pretrained("/ssd2/arthur/TREC2019/data/models/distilbert-cut/")
batch_size = 1024
from msmarco_dataset import MsMarcoDataset
from torch.utils.data import DataLoader
dataset = MsMarcoDataset("/ssd2/arthur/TREC2019/data/triples-tokenized/LB1", args.data_home, distil=True, invert_label=True, labeled=False)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)§b

HBox(children=(IntProgress(value=1, bar_style='info', description='Counting lines on file...', max=1, style=Pr…




In [None]:
import torch
softmax = torch.nn.Softmax(dim=1)

alphas = np.arange(0.0, 1.0)
runs_format = "{} Q0 {} {} {} DISTILBERT_QL\n" #topic_id, doc_id, ranking, score
ql_run_file = os.path.join("/ssd2/arthur/TREC2019/data/runs/LB1.run")
preds = softmax(torch.as_tensor(_preds))[:,0].cpu().numpy()
for alpha in alphas:
    print(alpha)
    beta = 1-alpha
    out_run_file = os.path.join("/ssd2/arthur/TREC2019/data/runs/LB1_distilBert-{}.run".format(alpha))
    topic_results = [] 
    last_topic = -1
    with open(ql_run_file, 'r') as inf, open(out_run_file, 'w') as outf:
        for counter, (example, score) in enumerate(zip(inf, preds)):
            topic_id, _, doc_id, _, _, _ = example.split()
            guid = "{}-{}".format(topic_id, doc_id)
            if topic_id != last_topic and len(topic_results) > 0:
                topic_results.sort(key=lambda x:x['score'], reverse=True)
                for rank, topic in enumerate(topic_results):
                    outf.write(runs_format.format(topic['topic_id'], topic['doc_id'], rank, topic['score']))
                topic_results = []
            topic_results.append({'topic_id':topic_id, 'doc_id':doc_id, 'score':alpha*score + beta*ql_scores[guid]})
            last_topic = topic_id
        topic_results.sort(key=lambda x:x['score'], reverse=True)
        for rank, topic in enumerate(topic_results):
            outf.write(runs_format.format(topic['topic_id'], topic['doc_id'], rank, topic['score']))

In [11]:
import re

instances = []
def LB2(topic_id, tokenized_query, all_docs, tuples, docs_lens, args):
    working_set_f = "<workingSetDocno>{}</workingSetDocno>\n<workingSetDocno>{}</workingSetDocno>"
    formatter = "<query>\n<number>{}</number>\n<text>#combine({})</text>\n{}\n</query>\n"
    DFs = pickle.load(open(os.path.join(args.data_home, args.idf_file),'rb'))
    query = [x.replace("##", "") for x in tokenized_query]
    query_terms = set(query)
    query_terms_with_high_idfs = set([w for w in query_terms if DFs[w]<args.min_df_lb])
    instances = []
    docs_created = dict()
    _docs = list(tuples[topic_id])
    #consider only terms with DF < 100k
    for i, di_id in enumerate(_docs):
        di_terms = set(all_docs[di_id])
        di_query_terms = query_terms.intersection(di_terms)
        di_query_terms_idfs = {w:DFs[w] for w in di_query_terms if DFs[w]<args.min_df_lb}
        # This document already has every possible query term. 
        if set(di_query_terms_idfs.keys()) == query_terms_with_high_idfs:
            continue
        missing_query_terms = query_terms_with_high_idfs.difference(di_query_terms_idfs.keys())
        score_di = float(args.scores[f"{topic_id}-{di_id}"])
        for dj_id in _docs[i:]:
            if di_id == dj_id:
                continue
            # Documents score must be similar
            score_diff = abs(float(args.scores[f"{topic_id}-{di_id}"])-float(args.scores[f"{topic_id}-{dj_id}"]))
            try:
                score_diff /= max(float(args.scores[f"{topic_id}-{di_id}"]), float(args.scores[f"{topic_id}-{dj_id}"]))
            except:
                # normalization broke this example. both are actually the exact same score.
                if max(float(args.scores[f"{topic_id}-{di_id}"]), float(args.scores[f"{topic_id}-{dj_id}"])) == 0:
                    score_diff = 0
            if score_diff > args.delta_lb:
                continue
            dj_terms = set(all_docs[dj_id])
            dj_query_terms = query_terms.intersection(dj_terms)
            dj_query_terms_idfs = {w:DFs[w] for w in dj_query_terms if DFs[w]<args.min_df_lb}
            
            # This document already has every possible query term. 
            if set(dj_query_terms_idfs.keys()) == query_terms_with_high_idfs:
                continue

            # Docs must contain the same query terms overall
            if di_query_terms_idfs != dj_query_terms_idfs:
                continue

            #Pick a query term PAIR with similar IDF
            valid_term_pairs = []
            for q1, q2 in product(dj_query_terms_idfs.keys(), missing_query_terms):
                if IDFs[q1] == 0:
                    continue
                difference = abs(IDFs[q1]-IDFs[q2])/IDFs[q1]
                if difference < args.delta_lb:
                    valid_term_pairs.append((q1, q2))
            # Create new documents. Add q2 to D2, repeat q1 in D1. Score for D2 must become larger than D1.
            # Add to start of document to avoid BERT issues.
            for q1, q2 in valid_term_pairs:
                di_prime = [q1] + all_docs[di_id]
                di_prime_id = di_id + "-" + q1 
                dj_prime = [q2] + all_docs[dj_id]
                dj_prime_id = dj_id + "-" + q2
                docs_created[di_prime_id] = di_prime
                docs_created[dj_prime_id] = dj_prime
                instances.append((topic_id, dj_prime_id, di_prime_id))

                # Do the opposite now.
                dj_prime = [q1] + all_docs[dj_id]
                dj_prime_id = dj_id + "-" + q1
                di_prime = [q2] + all_docs[di_id]
                di_prime_id = di_id + "-" + q2
                docs_created[di_prime_id] = di_prime
                docs_created[dj_prime_id] = dj_prime
                instances.append((topic_id, di_prime_id, dj_prime_id))
    # Write documents to file
    if len(docs_created) == 0:
        return []
    trectext_format = "<DOC>\n<DOCNO>{}</DOCNO>\n<TEXT>{}\n</TEXT>\n</DOC>\n"
    with open("/ssd2/arthur/TREC2019/data/docs/LB2/{}.trectext".format(topic_id), 'w') as outf:
        for d_id in docs_created:
            outf.write((trectext_format.format(d_id, docs_created[d_id])))
    # Write queries to file
    if not os.path.isfile("/ssd2/arthur/TREC2019/data/queries/LB2.indriparam"):
        with open("/ssd2/arthur/TREC2019/data/queries/LB2.indriparam", 'w') as outf:
            outf.write("<parameters>\n<threads> 48 </threads>\n")
    with open("/ssd2/arthur/TREC2019/data/queries/LB2.indriparam", 'a') as outf: 
        for _, di, dj in instances:
            working_set = working_set_f.format(di, dj)
            _query = re.sub(r'([^\s\w]|_)+', '', " ".join(query))
            outf.write(formatter.format(topic_id, _query, working_set))
    return instances

os.remove("/ssd2/arthur/TREC2019/data/queries/LB2.indriparam")
for line in tqdm(open(queries_file), total=1558):
    q_id, query_text = line.strip().split("\t")
    tokenized_query = tokenizer.tokenize(query_text)
    instances += LB2(q_id, tokenized_query, all_docs, tuples, docs_lens, args)
with open("/ssd2/arthur/TREC2019/data/queries/LB2.indriparam", 'a') as outf: 
    outf.write("</parameters>")

with open("/ssd2/arthur/TREC2019/data/diagnostics/LB2-instances" , 'wb') as outf:
    pickle.dump(instances, outf)

HBox(children=(IntProgress(value=0, max=1558), HTML(value='')))

In [12]:
instances = pickle.load(open("/ssd2/arthur/TREC2019/data/diagnostics/LB2-instances", 'rb'))

In [13]:
LB2_scores = {}
run_file = "/ssd2/arthur/TREC2019/data/runs/LB2.run"
for line in open(run_file):
#     print(line)
    topic_id, _, doc_id, _, score, _ = line.split()
    pair_id = "{}-{}".format(topic_id, doc_id)
    LB2_scores[pair_id] = float(score)

In [19]:
dataset_path = os.path.join(args.data_home, "diagnostics/LB2-instances")
dataset = pickle.load(open(dataset_path, 'rb'))
agreements = 0
for topic_id, di_id, dj_id in dataset:
    guid1 = f"{topic_id}-{di_id}"
    guid2 = f"{topic_id}-{dj_id}"
    if LB2_scores[guid1] > LB2_scores[guid2]:
        agreements+=1
len(dataset), agreements, agreements / len(dataset)

(89642, 84167, 0.9389237187925303)

In [46]:
buffer.split("Docno>")[3].split("</")[0].split("-")[0]

'D3342241'

In [61]:


text_to_tokens(query, di_doc)

In [68]:
# LB2 on BERT
# create triples file based on trec params file
buffer = ""
with open("/ssd2/arthur/TREC2019/data/triples-tokenized/LB2", 'w') as outf:
    for counter, line in tqdm(enumerate(open("/ssd2/arthur/TREC2019/data/queries/LB2.indriparam"))):
        if "</query>" in line:
            topic_id = buffer.split("<number>")[1].split("</")[0]
            query = tokenized_queries[topic_id]
            
            di, extra_token = buffer.split("Docno>")[1].split("</")[0].split("-")
            pair_id = "{}-{}-{}".format(topic_id, di, extra_token)
            if extra_token not in query:
                extra_token = "##"+extra_token
            if extra_token not in query:
                print("WTF")
            new_di = [extra_token] + all_docs[di]
            di_triple = text_to_tokens(query, new_di)
            outf.write("{}\t{}\n".format(pair_id, di_triple))
            
            dj, extra_token = buffer.split("Docno>")[3].split("</")[0].split("-")
            pair_id = "{}-{}-{}".format(topic_id, dj, extra_token)
            if extra_token not in query:
                extra_token = "##"+extra_token
            if extra_token not in query:
                print("WTF")
            new_dj = [extra_token] + all_docs[dj]
            dj_triple = text_to_tokens(query, new_dj)
            outf.write("{}\t{}\n".format(pair_id, dj_triple))
            buffer = ""
        else:
            buffer += line

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

In [69]:
from pytorch_transformers import DistilBertForSequenceClassification
model = DistilBertForSequenceClassification.from_pretrained("/ssd2/arthur/TREC2019/data/models/distilbert-cut/")

In [None]:
args.data_home

In [80]:
sys.path.insert(0, "/ssd2/arthur/TREC2019/scripts/")
batch_size = 1024
from msmarco_dataset import MsMarcoDataset
from torch.utils.data import DataLoader
dataset = MsMarcoDataset("/ssd2/arthur/TREC2019/data/triples-tokenized/LB2", args.data_home, distil=True, invert_label=True, labeled=False)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

HBox(children=(IntProgress(value=1, bar_style='info', description='Counting lines on file...', max=1, style=Pr…

In [78]:
import torch
seed = 42
gpus_to_use = [0, 1, 3, 6, 7]
torch.cuda.manual_seed_all(seed)
n_gpu = torch.cuda.device_count()
device = torch.device("cuda" if (torch.cuda.is_available() and n_gpu > 0) else "cpu")
model = torch.nn.DataParallel(model, device_ids=gpus_to_use)
model = model.to(device)

In [81]:
import torch
softmax = torch.nn.Softmax(dim=1)

preds = {}
eval_loss = 0.0
nb_eval_steps = 0
preds[dataset] = None
out_label_ids = 0
_preds = None
for index, batch in tqdm(enumerate(dataloader), desc="{} Dataset".format(dataset), total = len(dataloader)):
    model.eval()
    with torch.no_grad():
        inputs = {'input_ids': batch[0].to(device),
                  'attention_mask': batch[1].to(device)}
        outputs = model(**inputs)
        logits = outputs[0]
        if _preds is None:
            _preds = logits.detach().cpu().numpy()
        else:
            batch_predictions = logits.detach().cpu().numpy()
            _preds = np.append(_preds, batch_predictions, axis=0)
torch.save(_preds, os.path.join(args.data_home, 'predictions', 'LB2_.distilBERT.tensor'))

HBox(children=(IntProgress(value=0, description='<msmarco_dataset.MsMarcoDataset object at 0x7f8901022f28> Dat…

NameError: name 'data_dir' is not defined

In [87]:
len(_preds)

179284

In [108]:
# load QL scores and normalize
from collections import defaultdict
ql_scores = defaultdict(lambda:[])
ordered_topics = []
scores_per_topic = defaultdict(lambda:[])
QL_run_file = "/ssd2/arthur/TREC2019/data/runs/LB2.run"
last_topic = None
normalized_scores = []
with open(QL_run_file, 'r') as inf:
    for counter, line in tqdm(enumerate(inf), desc="reading run file", total=len(dataset)):
        [topic_id, _, doc_id, _, score, _] = line.split()
        if topic_id not in ordered_topics:
            ordered_topics.append(topic_id)
        scores_per_topic[topic_id].append((doc_id, score))
assert sum([len(scores_per_topic[x]) for x in scores_per_topic]) == len(_preds)
#normalize
for _id in tqdm(scores_per_topic, desc="normalizing"):
    _scores = np.asarray([float(x[1]) for x in scores_per_topic[_id]])
    normalized_scores = (_scores - np.min(_scores))/np.ptp(_scores)
    for (did, _), score in zip(scores_per_topic[_id], normalized_scores):
        guid = "{}-{}".format(_id, did)
        ql_scores[guid] = score

HBox(children=(IntProgress(value=0, description='normalizing', max=319, style=ProgressStyle(description_width=…

In [122]:
alphas = [0.85, 1.0]
runs_format = "{} Q0 {} {} {} DISTILBERT_QL\n" #topic_id, doc_id, ranking, score
ql_run_file = os.path.join("/ssd2/arthur/TREC2019/data/runs/LB2.run")
preds = softmax(torch.as_tensor(_preds))[:,0].cpu().numpy()
for alpha in alphas:
    beta = 1-alpha
    out_run_file = os.path.join("/ssd2/arthur/TREC2019/data/runs/LB2_distilBert-{}.run".format(alpha))
    topic_results = [] 
    last_topic = -1
    with open(ql_run_file, 'r') as inf, open(out_run_file, 'w') as outf:
        for counter, (example, score) in enumerate(zip(inf, preds)):
            topic_id, _, doc_id, _, _, _ = example.split()
            guid = "{}-{}".format(topic_id, doc_id)
            if topic_id != last_topic and len(topic_results) > 0:
                topic_results.sort(key=lambda x:x['score'], reverse=True)
                for rank, topic in enumerate(topic_results):
                    outf.write(runs_format.format(topic['topic_id'], topic['doc_id'], rank, topic['score']))
                topic_results = []
            topic_results.append({'topic_id':topic_id, 'doc_id':doc_id, 'score':alpha*score + beta*ql_scores[guid]})
            last_topic = topic_id
        topic_results.sort(key=lambda x:x['score'], reverse=True)
        for rank, topic in enumerate(topic_results):
            outf.write(runs_format.format(topic['topic_id'], topic['doc_id'], rank, topic['score']))

In [124]:
alphas = [0.0, 0.85, 1.0]
for alpha in alphas:
    LB2_scores = {}
    run_file = "/ssd2/arthur/TREC2019/data/runs/LB2_distilBert-{}.run".format(alpha)
    for line in open(run_file):
        topic_id, _, doc_id, _, score, _ = line.split()
        pair_id = "{}-{}".format(topic_id, doc_id)
        LB2_scores[pair_id] = float(score)
    dataset_path = os.path.join(args.data_home, "diagnostics/LB2-instances")
    dataset = pickle.load(open(dataset_path, 'rb'))
    agreements = 0
    for topic_id, di_id, dj_id in dataset:
        guid1 = f"{topic_id}-{di_id}"
        guid2 = f"{topic_id}-{dj_id}"
        if LB2_scores[guid1] > LB2_scores[guid2]:
            agreements+=1
    print(alpha, len(dataset), agreements, agreements / len(dataset))

0.0 89642 84167 0.9389237187925303
0.85 89642 78655 0.8774346846344347
1.0 89642 43679 0.48726043595635976


In [42]:
tokenized_queries = {}
for line in tqdm(open(queries_file), total=1558):
    q_id, query_text = line.strip().split("\t")
    tokenized_query = tokenizer.tokenize(query_text)
    tokenized_queries[q_id] = tokenized_query


HBox(children=(IntProgress(value=0, max=1558), HTML(value='')))

In [84]:
#LB1
instances = pickle.load(open("/ssd2/arthur/TREC2019/data/diagnostics/LB1-instances", 'rb'))
buffer = ""
with open("/ssd2/arthur/TREC2019/data/triples-tokenized/LB1", 'w') as outf:
    for counter, line in tqdm(enumerate(open("/ssd2/arthur/TREC2019/data/queries/LB1.indriparam2")), total=10730450):
        if "</query>" in line:
            topic_id, dj, di = buffer.split("<number>")[1].split("</")[0].split("-")
            query = tokenizer.tokenize(buffer.split("#combine(")[1].split(")<")[0])
            pair_id = buffer.split("<number>")[1].split("</")[0]
            di_doc = all_docs[di]
            di_triple = text_to_tokens(query, di_doc)
            outf.write("{}\t{}\n".format(pair_id, di_triple))
            
            dj = buffer.split("Docno>")[3].split("</")[0]
            dj_doc = all_docs[di]
            dj_triple = text_to_tokens(query, dj_doc)
            outf.write("{}\t{}\n".format(pair_id, dj_triple))
            buffer = ""
        else:
            buffer += line

HBox(children=(IntProgress(value=0, max=10730450), HTML(value='')))

In [92]:
from pytorch_transformers import DistilBertForSequenceClassification
model = DistilBertForSequenceClassification.from_pretrained("/ssd2/arthur/TREC2019/data/models/distilbert-cut/")
batch_size = 1024
from msmarco_dataset import MsMarcoDataset
from torch.utils.data import DataLoader
# dataset = MsMarcoDataset("/ssd2/arthur/TREC2019/data/triples-tokenized/LB1", args.data_home, distil=True, invert_label=True, labeled=False)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

HBox(children=(IntProgress(value=1, bar_style='info', description='Counting lines on file...', max=1, style=Pr…

In [93]:
import torch
seed = 42
gpus_to_use = [0, 1, 3, 5, 6]
torch.cuda.manual_seed_all(seed)
n_gpu = torch.cuda.device_count()
device = torch.device("cuda" if (torch.cuda.is_available() and n_gpu > 0) else "cpu")
model = torch.nn.DataParallel(model, device_ids=gpus_to_use)
model = model.to(device)

In [94]:
import torch
softmax = torch.nn.Softmax(dim=1)

preds = {}
eval_loss = 0.0
nb_eval_steps = 0
preds[dataset] = None
out_label_ids = 0
_preds = None
for index, batch in tqdm(enumerate(dataloader), desc="{} Dataset".format(dataset), total = len(dataloader)):
    model.eval()
    with torch.no_grad():
        inputs = {'input_ids': batch[0].to(device),
                  'attention_mask': batch[1].to(device)}
        outputs = model(**inputs)
        logits = outputs[0]
        if _preds is None:
            _preds = logits.detach().cpu().numpy()
        else:
            batch_predictions = logits.detach().cpu().numpy()
            _preds = np.append(_preds, batch_predictions, axis=0)
torch.save(_preds, os.path.join(args.data_home, 'predictions', 'LB1_.distilBERT.tensor'))

HBox(children=(IntProgress(value=0, description='<msmarco_dataset.MsMarcoDataset object at 0x7f67096b6dd8> Dat…

In [13]:
import torch
_preds = torch.load(os.path.join(args.data_home, 'predictions', 'LB1_.distilBERT.tensor'))

In [11]:
batch_size = 1024
from msmarco_dataset import MsMarcoDataset
from torch.utils.data import DataLoader
dataset = MsMarcoDataset("/ssd2/arthur/TREC2019/data/triples-tokenized/LB1", args.data_home, distil=True, invert_label=True, labeled=False)
# dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

HBox(children=(IntProgress(value=1, bar_style='info', description='Counting lines on file...', max=1, style=Pr…




In [24]:
from collections import defaultdict
ql_scores = defaultdict(lambda:[])
ordered_topics = []
scores_per_topic = defaultdict(lambda:[])
QL_run_file = "/ssd2/arthur/TREC2019/data/runs/LB1.run2"
last_topic = None
normalized_scores = []
# pre-load keys. Will make the growth fase of the dictionary faster! (Nice tip!)
# all_keys = list()
# for line in tqdm(open(QL_run_file, 'r'), total=len(dataset)):
#     all_keys.append(line.split()[0])

    
# all_keys = set(all_keys)  

# scores_per_topic = dict.fromkeys(all_keys)
with open(QL_run_file, 'r') as inf:
    for counter, line in tqdm(enumerate(inf), desc="reading run file", total=len(dataset)):
        [topic_id, _, doc_id, _, score, _] = line.split()
        if topic_id not in scores_per_topic:
            ordered_topics.append(topic_id)
        try:
            scores_per_topic[topic_id].append((doc_id, score))
        except:
            scores_per_topic[topic_id] = [] 
            scores_per_topic[topic_id].append((doc_id, score))
assert sum([len(scores_per_topic[x]) for x in scores_per_topic]) == len(_preds)
# normalize
for _id in tqdm(scores_per_topic, desc="normalizing"):
    _scores = np.asarray([float(x[1]) for x in scores_per_topic[_id]])
    normalized_scores = (_scores - np.min(_scores))/np.ptp(_scores)
    for (did, _), score in zip(scores_per_topic[_id], normalized_scores):
        guid = "{}-{}".format(_id, did)
        ql_scores[guid] = score

HBox(children=(IntProgress(value=0, description='reading run file', max=3576816, style=ProgressStyle(descripti…

HBox(children=(IntProgress(value=0, description='normalizing', max=1788408, style=ProgressStyle(description_wi…



In [35]:
import numpy as np
np.arange(0, 1, 0.05)

array([0.  , 0.05, 0.1 , 0.15, 0.2 , 0.25, 0.3 , 0.35, 0.4 , 0.45, 0.5 ,
       0.55, 0.6 , 0.65, 0.7 , 0.75, 0.8 , 0.85, 0.9 , 0.95])

In [39]:
import torch
softmax = torch.nn.Softmax(dim=1)

alphas = np.arange(0.95, 1.05, 0.05)
runs_format = "{} Q0 {} {} {} DISTILBERT_QL\n" #topic_id, doc_id, ranking, score
ql_run_file = os.path.join("/ssd2/arthur/TREC2019/data/runs/LB1.run2")
preds = softmax(torch.as_tensor(_preds))[:,0].cpu().numpy()
for alpha in alphas:
    print(alpha)
    beta = 1-alpha
    out_run_file = os.path.join("/ssd2/arthur/TREC2019/data/runs/LB1_distilBert-{}.run".format(alpha))
    topic_results = [] 
    last_topic = -1
    with open(ql_run_file, 'r') as inf, open(out_run_file, 'w') as outf:
        for counter, (example, score) in enumerate(zip(inf, preds)):
            topic_id, _, doc_id, _, _, _ = example.split()
            guid = "{}-{}".format(topic_id, doc_id)
            if topic_id != last_topic and len(topic_results) > 0:
                topic_results.sort(key=lambda x:x['score'], reverse=True)
                for rank, topic in enumerate(topic_results):
                    outf.write(runs_format.format(topic['topic_id'], topic['doc_id'], rank, topic['score']))
                topic_results = []
            topic_results.append({'topic_id':topic_id, 'doc_id':doc_id, 'score':alpha*score + beta*ql_scores[guid]})
            last_topic = topic_id
        topic_results.sort(key=lambda x:x['score'], reverse=True)
        for rank, topic in enumerate(topic_results):
            outf.write(runs_format.format(topic['topic_id'], topic['doc_id'], rank, topic['score']))

0.95
1.0
1.05


In [54]:
alphas = [1.0]
for alpha in alphas:
    LB1_scores = {}
    run_file = "/ssd2/arthur/TREC2019/data/runs/LB1_distilBert-{}.run".format(alpha)
    for line in open(run_file):
        topic_id, _, doc_id, _, score, _ = line.split()
        pair_id = "{}-{}".format(topic_id, doc_id)
        LB1_scores[pair_id] = float(score)
    dataset_path = os.path.join(args.data_home, "diagnostics/LB1-instances")
    dataset = pickle.load(open(dataset_path, 'rb'))
    agreements = 0
    for topic_id, di_id, dj_id in dataset:
        guid1 = f"{topic_id}-{di_id}"
        guid2 = f"{topic_id}-{dj_id}"
        print(LB1_scores[guid1], LB1_scores[guid2])
        if LB1_scores[guid1] >= LB1_scores[guid2]:
            agreements+=1
    print(alpha, len(dataset), agreements, agreements / len(dataset))

0.0005734225851483643 0.0005734225851483643
0.1915506273508072 0.1915506273508072
0.0006296511855907738 0.0006296511855907738
0.006296343170106411 0.006296343170106411
0.0007116979104466736 0.0007116979104466736
2.4944336473708972e-05 2.4944336473708972e-05
0.0003654638712760061 0.0003654638712760061
0.0022488024551421404 0.0022488024551421404
0.0003014164976775646 0.0003014164976775646
0.00042772843153215945 0.00042772843153215945
0.0004893834120593965 0.0004893834120593965
0.4114294648170471 0.4114294648170471
0.006836877670139074 0.006836877670139074
0.00012900662841275334 0.00012900662841275334
0.003710501827299595 0.003710501827299595
6.533959094667807e-05 6.533959094667807e-05
0.005533524323254824 0.005533524323254824
0.04466388002038002 0.04466388002038002
0.014779866673052311 0.014779866673052311
0.00021240560454316437 0.00021240560454316437
0.004494111053645611 0.004494111053645611
9.989992395276204e-05 9.989992395276204e-05
0.003169981762766838 0.003169981762766838
0.00151615

0.14539475739002228 0.14539475739002228
0.030954310670495033 0.030954310670495033
0.1756039261817932 0.1756039261817932
0.1876656860113144 0.1876656860113144
0.1922445148229599 0.1922445148229599
0.03548819199204445 0.03548819199204445
0.1381203681230545 0.1381203681230545
0.09593021124601364 0.09593021124601364
0.19532907009124756 0.19532907009124756
0.20348988473415375 0.20348988473415375
0.11936572194099426 0.11936572194099426
0.08674150705337524 0.08674150705337524
0.12443513423204422 0.12443513423204422
0.07764073461294174 0.07764073461294174
0.17207340896129608 0.17207340896129608
0.03647048398852348 0.03647048398852348
0.13929219543933868 0.13929219543933868
0.1273449808359146 0.1273449808359146
0.17423732578754425 0.17423732578754425
0.006424020044505596 0.006424020044505596
0.1416301429271698 0.1416301429271698
0.1072847917675972 0.1072847917675972
0.21081259846687317 0.21081259846687317
0.23243048787117004 0.23243048787117004
0.17366336286067963 0.17366336286067963
0.04138884

0.00029521287069655955 0.00029521287069655955
0.0017964928410947323 0.0017964928410947323
0.00019997914205305278 0.00019997914205305278
0.0013807839713990688 0.0013807839713990688
0.00011087559687439352 0.00011087559687439352
0.00035705266054719687 0.00035705266054719687
0.0002388746797805652 0.0002388746797805652
0.004821377340704203 0.004821377340704203
0.00021862174617126584 0.00021862174617126584
0.0021990956738591194 0.0021990956738591194
0.00016579624207224697 0.00016579624207224697
0.0018329123267903924 0.0018329123267903924
0.0011490547331050038 0.0011490547331050038
0.0021907715126872063 0.0021907715126872063
0.0010061076609417796 0.0010061076609417796
0.0007535050390288234 0.0007535050390288234
0.00014779462071601301 0.00014779462071601301
0.003763108979910612 0.003763108979910612
8.772790897637606e-05 8.772790897637606e-05
0.0012801679549738765 0.0012801679549738765
3.798388934228569e-05 3.798388934228569e-05
0.00035961094545200467 0.00035961094545200467
0.000415040907682850

0.0010052722645923495 0.0010052722645923495
0.0006513610132969916 0.0006513610132969916
0.0016779620200395584 0.0016779620200395584
0.0006513610132969916 0.0006513610132969916
0.0003921438183169812 0.0003921438183169812
0.003694778075441718 0.003694778075441718
0.0002948421170003712 0.0002948421170003712
0.0014280001632869244 0.0014280001632869244
0.0002033612399827689 0.0002033612399827689
0.0004314585239626467 0.0004314585239626467
0.0006938741425983608 0.0006938741425983608
0.0015732066240161657 0.0015732066240161657
0.003941099159419537 0.003941099159419537
0.0019900857005268335 0.0019900857005268335
6.981768819969147e-05 6.981768819969147e-05
0.00239932956174016 0.00239932956174016
0.0014587575569748878 0.0014587575569748878
0.0017865365371108055 0.0017865365371108055
0.0017506685107946396 0.0017506685107946396
0.003064302494749427 0.003064302494749427
0.0005876405048184097 0.0005876405048184097
0.000971702509559691 0.000971702509559691
5.986703035887331e-05 5.986703035887331e-05


8.590646757511422e-05 8.590646757511422e-05
0.0008851105812937021 0.0008851105812937021
0.0011094613000750542 0.0011094613000750542
0.0003270354063715786 0.0003270354063715786
0.015566809102892876 0.015566809102892876
0.0001426610251655802 0.0001426610251655802
6.420929275918752e-05 6.420929275918752e-05
0.00072383665246889 0.00072383665246889
5.772812073701061e-05 5.772812073701061e-05
0.0001259857672266662 0.0001259857672266662
6.497467256849632e-05 6.497467256849632e-05
0.00012166143278591335 0.00012166143278591335
0.0006681226659566164 0.0006681226659566164
0.000680830969940871 0.000680830969940871
2.3067472284310497e-05 2.3067472284310497e-05
0.00016206511645577848 0.00016206511645577848
4.172677654423751e-05 4.172677654423751e-05
0.00023693867842666805 0.00023693867842666805
4.4323991460260004e-05 4.4323991460260004e-05
0.00032193431979976594 0.00032193431979976594
0.0012122668558731675 0.0012122668558731675
5.2370749472174793e-05 5.2370749472174793e-05
7.811729301465675e-05 7.81

0.009173016063869 0.009173016063869
0.0998319461941719 0.0998319461941719
0.0046178363263607025 0.0046178363263607025
0.48409053683280945 0.48409053683280945
0.005457581486552954 0.005457581486552954
0.17780545353889465 0.17780545353889465
0.005569399334490299 0.005569399334490299
0.35233819484710693 0.35233819484710693
0.007453475147485733 0.007453475147485733
0.0068301912397146225 0.0068301912397146225
0.006755514536052942 0.006755514536052942
0.5002090334892273 0.5002090334892273
0.011842881329357624 0.011842881329357624
0.010847670957446098 0.010847670957446098
0.009906507097184658 0.009906507097184658
0.0005423987749963999 0.0005423987749963999
0.005703178234398365 0.005703178234398365
0.3933117687702179 0.3933117687702179
0.001449703355319798 0.001449703355319798
0.014520492404699326 0.014520492404699326
0.0020263311453163624 0.0020263311453163624
0.5423069000244141 0.5423069000244141
0.0024941794108599424 0.0024941794108599424
0.24367980659008026 0.24367980659008026
0.0038889683

0.104485884308815 0.104485884308815
0.4863874614238739 0.4863874614238739
0.4451287090778351 0.4451287090778351
0.42819085717201233 0.42819085717201233
0.2310725897550583 0.2310725897550583
0.5039563179016113 0.5039563179016113
0.4759666621685028 0.4759666621685028
0.00969229731708765 0.00969229731708765
0.1656220406293869 0.1656220406293869
0.06208205968141556 0.06208205968141556
0.00398462126031518 0.00398462126031518
0.08701169490814209 0.08701169490814209
0.1466173529624939 0.1466173529624939
0.13790009915828705 0.13790009915828705
0.03843294084072113 0.03843294084072113
0.09343109279870987 0.09343109279870987
0.0011426334967836738 0.0011426334967836738
0.025870565325021744 0.025870565325021744
0.17215904593467712 0.17215904593467712
0.03134133294224739 0.03134133294224739
0.002227925229817629 0.002227925229817629
0.11571202427148819 0.11571202427148819
0.47608911991119385 0.47608911991119385
0.04579399898648262 0.04579399898648262
0.007333722896873951 0.007333722896873951
0.020430

0.03428466245532036 0.03428466245532036
0.010629549622535706 0.010629549622535706
0.006079227663576603 0.006079227663576603
0.006774491630494595 0.006774491630494595
0.037659987807273865 0.037659987807273865
0.029407421126961708 0.029407421126961708
0.006550010293722153 0.006550010293722153
0.001366717740893364 0.001366717740893364
0.012393299490213394 0.012393299490213394
0.10055766999721527 0.10055766999721527
0.01897786185145378 0.01897786185145378
0.0016991395968943834 0.0016991395968943834
0.013631667010486126 0.013631667010486126
0.003684768918901682 0.003684768918901682
0.017390431836247444 0.017390431836247444
0.006024475675076246 0.006024475675076246
0.007065795361995697 0.007065795361995697
0.052322160452604294 0.052322160452604294
0.0066724601201713085 0.0066724601201713085
0.005066835321485996 0.005066835321485996
0.005981827154755592 0.005981827154755592
0.04016762226819992 0.04016762226819992
0.015741653740406036 0.015741653740406036
0.004281992558389902 0.004281992558389

0.002576724160462618 0.002576724160462618
0.012796849012374878 0.012796849012374878
0.01698177680373192 0.01698177680373192
0.03595706447958946 0.03595706447958946
0.0072621069848537445 0.0072621069848537445
0.0019584582187235355 0.0019584582187235355
0.014725424349308014 0.014725424349308014
0.09863515198230743 0.09863515198230743
0.005908525083214045 0.005908525083214045
0.022479349747300148 0.022479349747300148
0.013377810828387737 0.013377810828387737
0.0029313622508198023 0.0029313622508198023
0.0330650694668293 0.0330650694668293
0.02195662260055542 0.02195662260055542
0.049528222531080246 0.049528222531080246
0.002361044054850936 0.002361044054850936
0.01098930649459362 0.01098930649459362
0.012749209068715572 0.012749209068715572
0.0015957761788740754 0.0015957761788740754
0.03513813018798828 0.03513813018798828
0.017534183338284492 0.017534183338284492
0.012837899848818779 0.012837899848818779
0.012785877101123333 0.012785877101123333
0.0011143834562972188 0.001114383456297218

0.0018670274876058102 0.0018670274876058102
0.018644016236066818 0.018644016236066818
0.004577923566102982 0.004577923566102982
0.001806649030186236 0.001806649030186236
0.001012176158837974 0.001012176158837974
0.0006549031822942197 0.0006549031822942197
0.0003214291937183589 0.0003214291937183589
0.002229672856628895 0.002229672856628895
0.0012328751618042588 0.0012328751618042588
1.7486676370026544e-05 1.7486676370026544e-05
0.0004409718094393611 0.0004409718094393611
0.001619679038412869 0.001619679038412869
0.0006865891627967358 0.0006865891627967358
0.006381944287568331 0.006381944287568331
0.0014117277460172772 0.0014117277460172772
0.00011568865011213347 0.00011568865011213347
0.0012297115754336119 0.0012297115754336119
0.00011613443348323926 0.00011613443348323926
0.000413521658629179 0.000413521658629179
0.0012110539246350527 0.0012110539246350527
0.0006812853971496224 0.0006812853971496224
0.0002942827413789928 0.0002942827413789928
0.0007954020402394235 0.000795402040239423

0.026337895542383194 0.026337895542383194
0.006848718971014023 0.006848718971014023
0.019754115492105484 0.019754115492105484
0.050282206386327744 0.050282206386327744
0.005275392439216375 0.005275392439216375
0.0013489807024598122 0.0013489807024598122
0.0022733458317816257 0.0022733458317816257
0.005020749289542437 0.005020749289542437
0.0002137336414307356 0.0002137336414307356
8.141824946505949e-05 8.141824946505949e-05
0.0009524131310172379 0.0009524131310172379
0.007031164597719908 0.007031164597719908
0.0014398344792425632 0.0014398344792425632
0.0014140058774501085 0.0014140058774501085
0.02653517946600914 0.02653517946600914
0.001288295374251902 0.001288295374251902
0.00038829425466246903 0.00038829425466246903
0.002934124320745468 0.002934124320745468
0.0006909133517183363 0.0006909133517183363
0.014485584571957588 0.014485584571957588
0.0019063125364482403 0.0019063125364482403
0.10193639248609543 0.10193639248609543
0.0035232643131166697 0.0035232643131166697
0.011607035994

0.008749788627028465 0.008749788627028465
0.005089601967483759 0.005089601967483759
0.017939290031790733 0.017939290031790733
0.007789228111505508 0.007789228111505508
0.0019342907471582294 0.0019342907471582294
0.008520514704287052 0.008520514704287052
0.0006703262333758175 0.0006703262333758175
0.01071853470057249 0.01071853470057249
0.014660390093922615 0.014660390093922615
0.015036399476230145 0.015036399476230145
0.024467861279845238 0.024467861279845238
0.009496754966676235 0.009496754966676235
0.0007089194259606302 0.0007089194259606302
0.004627440124750137 0.004627440124750137
0.00023672508541494608 0.00023672508541494608
0.0024958003778010607 0.0024958003778010607
0.0030529017094522715 0.0030529017094522715
0.0020956702064722776 0.0020956702064722776
0.0012633234728127718 0.0012633234728127718
0.0014575410168617964 0.0014575410168617964
0.0005296438466757536 0.0005296438466757536
0.000780274101998657 0.000780274101998657
0.005131383892148733 0.005131383892148733
0.001614205073

0.024532049894332886 0.024532049894332886
0.0014466267311945558 0.0014466267311945558
0.015592009760439396 0.015592009760439396
0.0006659034406766295 0.0006659034406766295
0.31783705949783325 0.31783705949783325
0.002737930044531822 0.002737930044531822
0.09408494830131531 0.09408494830131531
0.0016504963859915733 0.0016504963859915733
0.00261864741332829 0.00261864741332829
0.0009849501075223088 0.0009849501075223088
0.011211042292416096 0.011211042292416096
0.0020201902370899916 0.0020201902370899916
0.0030207287054508924 0.0030207287054508924
0.0036278972402215004 0.0036278972402215004
0.022096751257777214 0.022096751257777214
0.0032169397454708815 0.0032169397454708815
0.19692069292068481 0.19692069292068481
0.002172463107854128 0.002172463107854128
0.0009982093470171094 0.0009982093470171094
0.003575181122869253 0.003575181122869253
0.020537136122584343 0.020537136122584343
0.0009569172980263829 0.0009569172980263829
0.222529798746109 0.222529798746109
0.0012500350130721927 0.0012

1.5412702850881033e-05 1.5412702850881033e-05
0.02808145247399807 0.02808145247399807
7.003733480814844e-05 7.003733480814844e-05
0.01847522146999836 0.01847522146999836
0.00575668690726161 0.00575668690726161
0.004197605419903994 0.004197605419903994
8.747671381570399e-05 8.747671381570399e-05
0.02883630059659481 0.02883630059659481
1.0912927791650873e-05 1.0912927791650873e-05
0.01868082396686077 0.01868082396686077
7.247680969157955e-06 7.247680969157955e-06
0.02511150948703289 0.02511150948703289
0.0019240599358454347 0.0019240599358454347
0.0052242944948375225 0.0052242944948375225
6.963271152926609e-05 6.963271152926609e-05
0.02465413510799408 0.02465413510799408
4.4354987039696425e-05 4.4354987039696425e-05
0.02022840641438961 0.02022840641438961
4.472756700124592e-06 4.472756700124592e-06
0.03901178762316704 0.03901178762316704
4.9767048039939255e-05 4.9767048039939255e-05
0.014843380078673363 0.014843380078673363
0.00772456917911768 0.00772456917911768
0.01755484752357006 0.01

0.027328573167324066 0.027328573167324066
3.7900965708104195e-06 3.7900965708104195e-06
0.006873754318803549 0.006873754318803549
3.907003247149987e-06 3.907003247149987e-06
0.01598227769136429 0.01598227769136429
4.006121798738604e-06 4.006121798738604e-06
0.08448879420757294 0.08448879420757294
4.0145087041310035e-06 4.0145087041310035e-06
0.03231436759233475 0.03231436759233475
3.902318894688506e-06 3.902318894688506e-06
0.017487479373812675 0.017487479373812675
3.9418960113835055e-06 3.9418960113835055e-06
0.02696874924004078 0.02696874924004078
4.012216322735185e-06 4.012216322735185e-06
0.058658670634031296 0.058658670634031296
4.009099029644858e-06 4.009099029644858e-06
0.006576729938387871 0.006576729938387871
3.717162371685845e-06 3.717162371685845e-06
0.16979913413524628 0.16979913413524628
3.927434136130614e-06 3.927434136130614e-06
0.01940707489848137 0.01940707489848137
3.966422355006216e-06 3.966422355006216e-06
0.12698641419410706 0.12698641419410706
3.851486326311715e-0

0.029814306646585464 0.029814306646585464
0.0004617229278665036 0.0004617229278665036
0.01572011597454548 0.01572011597454548
0.4477636218070984 0.4477636218070984
0.012420374900102615 0.012420374900102615
0.00020006705017294735 0.00020006705017294735
0.0034794670064002275 0.0034794670064002275
0.00017411430599167943 0.00017411430599167943
0.010024793446063995 0.010024793446063995
0.013443968258798122 0.013443968258798122
0.005910716485232115 0.005910716485232115
0.44985783100128174 0.44985783100128174
0.004550470504909754 0.004550470504909754
0.7745091319084167 0.7745091319084167
0.005573298782110214 0.005573298782110214
0.00016803362814243883 0.00016803362814243883
0.010759791359305382 0.010759791359305382
0.0013486344832926989 0.0013486344832926989
0.00825431477278471 0.00825431477278471
1.9975941540906206e-05 1.9975941540906206e-05
0.006500280927866697 0.006500280927866697
2.140624746971298e-05 2.140624746971298e-05
0.006150476634502411 0.006150476634502411
0.0004677436372730881 0.

0.13749778270721436 0.13749778270721436
0.0005620404263027012 0.0005620404263027012
0.21559196710586548 0.21559196710586548
0.0008873465703800321 0.0008873465703800321
0.10385366529226303 0.10385366529226303
0.07668022811412811 0.07668022811412811
0.07769723236560822 0.07769723236560822
0.019376903772354126 0.019376903772354126
0.00012532198161352426 0.00012532198161352426
0.0001425391819793731 0.0001425391819793731
0.0008997742552310228 0.0008997742552310228
0.0008997605764307082 0.0008997605764307082
0.0026523680426180363 0.0026523680426180363
0.00046929490054026246 0.00046929490054026246
0.0008101364364847541 0.0008101364364847541
2.8469354219851084e-05 2.8469354219851084e-05
0.00015550637908745557 0.00015550637908745557
0.009063324891030788 0.009063324891030788
0.0003233387542422861 0.0003233387542422861
0.0005357386544346809 0.0005357386544346809
0.00028815498808398843 0.00028815498808398843
0.0005763645167462528 0.0005763645167462528
0.00016843965568114072 0.00016843965568114072


0.000328163179801777 0.000328163179801777
0.0008575724787078798 0.0008575724787078798
0.00012921936286147684 0.00012921936286147684
0.00043752286001108587 0.00043752286001108587
0.0007437270251102746 0.0007437270251102746
0.010338247753679752 0.010338247753679752
0.0006392015493474901 0.0006392015493474901
0.012296956963837147 0.012296956963837147
4.382145561976358e-05 4.382145561976358e-05
0.010346083901822567 0.010346083901822567
0.0001013205765048042 0.0001013205765048042
0.00021194537112023681 0.00021194537112023681
0.0005417626816779375 0.0005417626816779375
0.001760728657245636 0.001760728657245636
7.748237840132788e-05 7.748237840132788e-05
0.010950830765068531 0.010950830765068531
0.0005718022002838552 0.0005718022002838552
0.0024679629132151604 0.0024679629132151604
0.00010448297689436004 0.00010448297689436004
0.0010480438359081745 0.0010480438359081745
3.382568684173748e-05 3.382568684173748e-05
0.0006951363757252693 0.0006951363757252693
0.00020776025485247374 0.00020776025

0.00011085086589446291 0.00011085086589446291
0.0005324018420651555 0.0005324018420651555
0.13532133400440216 0.13532133400440216
0.0001222856662934646 0.0001222856662934646
0.008689858950674534 0.008689858950674534
0.004121891222894192 0.004121891222894192
0.1738719344139099 0.1738719344139099
5.724851234845119e-06 5.724851234845119e-06
0.023417484015226364 0.023417484015226364
0.002752647502347827 0.002752647502347827
0.08454424887895584 0.08454424887895584
0.000766125216614455 0.000766125216614455
0.23854787647724152 0.23854787647724152
0.059647344052791595 0.059647344052791595
0.19284455478191376 0.19284455478191376
0.00024082073650788516 0.00024082073650788516
0.04778458923101425 0.04778458923101425
0.027907663956284523 0.027907663956284523
0.11984188109636307 0.11984188109636307
0.2912312150001526 0.2912312150001526
0.03434497490525246 0.03434497490525246
0.002074476797133684 0.002074476797133684
0.1411333680152893 0.1411333680152893
1.6451764167868532e-05 1.6451764167868532e-05


5.3682040743296966e-05 5.3682040743296966e-05
0.005013774614781141 0.005013774614781141
0.00010684139851946384 0.00010684139851946384
0.0016730272909626365 0.0016730272909626365
5.254348070593551e-05 5.254348070593551e-05
0.0036927214823663235 0.0036927214823663235
0.0005762628861702979 0.0005762628861702979
0.004209053702652454 0.004209053702652454
0.054209522902965546 0.054209522902965546
0.010112839750945568 0.010112839750945568
0.0004328171198721975 0.0004328171198721975
0.022779516875743866 0.022779516875743866
0.002642776118591428 0.002642776118591428
0.001691250829026103 0.001691250829026103
0.009363504126667976 0.009363504126667976
0.00433409633114934 0.00433409633114934
0.00018447912589181215 0.00018447912589181215
0.0002701093617361039 0.0002701093617361039
0.00026373370201326907 0.00026373370201326907
0.01850304566323757 0.01850304566323757
0.001456056721508503 0.001456056721508503
0.0013868646929040551 0.0013868646929040551
7.065247336868197e-05 7.065247336868197e-05
0.0034

0.0011441735550761223 0.0011441735550761223
0.0013093699235469103 0.0013093699235469103
0.0013463373761624098 0.0013463373761624098
0.013584340922534466 0.013584340922534466
0.004901487845927477 0.004901487845927477
0.015784619376063347 0.015784619376063347
0.0009462752495892346 0.0009462752495892346
0.009614141657948494 0.009614141657948494
0.021688686683773994 0.021688686683773994
0.0037292055785655975 0.0037292055785655975
0.002559442538768053 0.002559442538768053
0.026012953370809555 0.026012953370809555
0.00020690445671789348 0.00020690445671789348
0.015061680227518082 0.015061680227518082
0.00234389491379261 0.00234389491379261
0.0038298103027045727 0.0038298103027045727
5.107239121571183e-05 5.107239121571183e-05
0.021846352145075798 0.021846352145075798
0.0012366187293082476 0.0012366187293082476
0.0062012686394155025 0.0062012686394155025
0.006779305636882782 0.006779305636882782
0.02701840177178383 0.02701840177178383
0.022255197167396545 0.022255197167396545
0.01869851537048

0.003577758790925145 0.003577758790925145
3.574551737983711e-05 3.574551737983711e-05
0.10695341974496841 0.10695341974496841
7.111135346349329e-05 7.111135346349329e-05
0.06603048741817474 0.06603048741817474
6.487691280199215e-05 6.487691280199215e-05
8.29437849461101e-05 8.29437849461101e-05
5.188389332033694e-05 5.188389332033694e-05
8.71513329911977e-05 8.71513329911977e-05
4.6044991904636845e-05 4.6044991904636845e-05
2.3051881726132706e-05 2.3051881726132706e-05
4.608887684298679e-05 4.608887684298679e-05
0.00011069684842368588 0.00011069684842368588
6.529841630253941e-05 6.529841630253941e-05
0.0006679787184111774 0.0006679787184111774
5.32830017618835e-05 5.32830017618835e-05
6.258772918954492e-05 6.258772918954492e-05
0.0014928954187780619 0.0014928954187780619
3.177232792950235e-05 3.177232792950235e-05
0.0016092124860733747 0.0016092124860733747
7.360923336818814e-05 7.360923336818814e-05
0.0015627667307853699 0.0015627667307853699
0.000554066791664809 0.000554066791664809


0.02200796827673912 0.02200796827673912
0.000486643984913826 0.000486643984913826
4.283365706214681e-05 4.283365706214681e-05
0.0005883763078600168 0.0005883763078600168
0.004802153445780277 0.004802153445780277
0.0005204966873861849 0.0005204966873861849
0.032950740307569504 0.032950740307569504
0.00024283638049382716 0.00024283638049382716
0.0028974474407732487 0.0028974474407732487
0.00014704762725159526 0.00014704762725159526
0.0002818286884576082 0.0002818286884576082
0.000420729978941381 0.000420729978941381
0.004875498358160257 0.004875498358160257
0.00016778417921159416 0.00016778417921159416
0.0007666872115805745 0.0007666872115805745
0.00031559242052026093 0.00031559242052026093
0.01224509160965681 0.01224509160965681
0.0006508065853267908 0.0006508065853267908
0.29405951499938965 0.29405951499938965
0.00021603013738058507 0.00021603013738058507
0.0055907913483679295 0.0055907913483679295
0.00028800719883292913 0.00028800719883292913
0.00011902800906682387 0.00011902800906682

0.0007628269377164543 0.0007628269377164543
1.5785451978445053e-05 1.5785451978445053e-05
0.0007341466844081879 0.0007341466844081879
0.0002127430634573102 0.0002127430634573102
0.0009303134866058826 0.0009303134866058826
4.80125418107491e-05 4.80125418107491e-05
0.0007076052716001868 0.0007076052716001868
6.946146459085867e-05 6.946146459085867e-05
0.0008935537771321833 0.0008935537771321833
4.7013501898618415e-05 4.7013501898618415e-05
0.0004887722316198051 0.0004887722316198051
0.00015769574383739382 0.00015769574383739382
0.0005922065465711057 0.0005922065465711057
2.334933014935814e-05 2.334933014935814e-05
0.0010188266169279814 0.0010188266169279814
5.406168565968983e-05 5.406168565968983e-05
6.457736162701622e-05 6.457736162701622e-05
0.0005723304930143058 0.0005723304930143058
3.99466443923302e-05 3.99466443923302e-05
0.0015353814233094454 0.0015353814233094454
6.079124432289973e-05 6.079124432289973e-05
4.846219235332683e-05 4.846219235332683e-05
8.449218148598447e-05 8.449218

0.013178905472159386 0.013178905472159386
1.4931246369087603e-05 1.4931246369087603e-05
0.01040534395724535 0.01040534395724535
0.0001263498270418495 0.0001263498270418495
0.010179963894188404 0.010179963894188404
0.0008879259112291038 0.0008879259112291038
0.0005082822171971202 0.0005082822171971202
0.0029732058756053448 0.0029732058756053448
0.0036925075110048056 0.0036925075110048056
0.00011690005339914933 0.00011690005339914933
0.0017728405073285103 0.0017728405073285103
3.8227288314374164e-05 3.8227288314374164e-05
0.005932149011641741 0.005932149011641741
7.454756268998608e-05 7.454756268998608e-05
0.005135209299623966 0.005135209299623966
0.00935826450586319 0.00935826450586319
0.0028927673120051622 0.0028927673120051622
1.3193473932915367e-05 1.3193473932915367e-05
0.011565103195607662 0.011565103195607662
0.0007188766612671316 0.0007188766612671316
0.002993049332872033 0.002993049332872033
0.3571453392505646 0.3571453392505646
0.002559700747951865 0.002559700747951865
0.000232

0.14900065958499908 0.14900065958499908
4.512749001150951e-05 4.512749001150951e-05
0.16651804745197296 0.16651804745197296
0.0037932698614895344 0.0037932698614895344
0.14743947982788086 0.14743947982788086
7.443807135132374e-06 7.443807135132374e-06
0.0005053171189501882 0.0005053171189501882
0.0008851492893882096 0.0008851492893882096
0.0004622190026566386 0.0004622190026566386
0.007820745930075645 0.007820745930075645
0.00015828762843739241 0.00015828762843739241
0.004867703653872013 0.004867703653872013
0.0003317270602565259 0.0003317270602565259
0.0005608801729977131 0.0005608801729977131
0.0003650568251032382 0.0003650568251032382
0.0028582620434463024 0.0028582620434463024
0.0009685755940154195 0.0009685755940154195
0.0003849724307656288 0.0003849724307656288
0.0007143522961996496 0.0007143522961996496
0.000758505193516612 0.000758505193516612
0.00011797599290730432 0.00011797599290730432
0.042220260947942734 0.042220260947942734
0.0008577890694141388 0.0008577890694141388
0.00

0.00024444967857562006 0.00024444967857562006
0.00046546835801564157 0.00046546835801564157
0.0015075956471264362 0.0015075956471264362
0.0027794588822871447 0.0027794588822871447
0.0001964707626029849 0.0001964707626029849
0.0026028791908174753 0.0026028791908174753
0.0007838528254069388 0.0007838528254069388
0.0019946452230215073 0.0019946452230215073
0.0012449963251128793 0.0012449963251128793
0.0013412665575742722 0.0013412665575742722
0.00031316111562773585 0.00031316111562773585
0.0008000977686606348 0.0008000977686606348
0.00035907962592318654 0.00035907962592318654
0.00016154826153069735 0.00016154826153069735
0.001938340486958623 0.001938340486958623
0.0013945742975920439 0.0013945742975920439
0.00011566020839381963 0.00011566020839381963
0.0017385927494615316 0.0017385927494615316
0.00016521582438144833 0.00016521582438144833
0.0005533742951229215 0.0005533742951229215
0.00022919051116332412 0.00022919051116332412
0.0011503437999635935 0.0011503437999635935
0.0001517259952379

0.0010840405011549592 0.0010840405011549592
0.001243854290805757 0.001243854290805757
7.628717867191881e-05 7.628717867191881e-05
0.0037937939632683992 0.0037937939632683992
6.698228389723226e-05 6.698228389723226e-05
0.004092562478035688 0.004092562478035688
0.0005746317328885198 0.0005746317328885198
0.0018241240177303553 0.0018241240177303553
0.0013344206381589174 0.0013344206381589174
0.0027972988318651915 0.0027972988318651915
0.19704073667526245 0.19704073667526245
0.0008122575818561018 0.0008122575818561018
9.90550106507726e-05 9.90550106507726e-05
0.0013084809761494398 0.0013084809761494398
0.1188310831785202 0.1188310831785202
0.0022535857278853655 0.0022535857278853655
0.0011147434124723077 0.0011147434124723077
0.0004338017897680402 0.0004338017897680402
0.023902175948023796 0.023902175948023796
0.003004588885232806 0.003004588885232806
0.00029811219428665936 0.00029811219428665936
0.0029657769482582808 0.0029657769482582808
0.0010138709330931306 0.0010138709330931306
0.0016

3.4210042940685526e-05 3.4210042940685526e-05
0.0001627768942853436 0.0001627768942853436
3.430079232202843e-05 3.430079232202843e-05
9.084379416890442e-05 9.084379416890442e-05
0.004148965235799551 0.004148965235799551
4.3139298213645816e-05 4.3139298213645816e-05
0.00014440270024351776 0.00014440270024351776
6.591271085198969e-05 6.591271085198969e-05
8.66886621224694e-05 8.66886621224694e-05
6.318953091977164e-05 6.318953091977164e-05
0.00032172779901884496 0.00032172779901884496
7.985098636709154e-05 7.985098636709154e-05
0.00041702386806719005 0.00041702386806719005
2.850334203685634e-05 2.850334203685634e-05
1.405546299793059e-05 1.405546299793059e-05
2.1614108845824376e-05 2.1614108845824376e-05
5.7008888688869774e-05 5.7008888688869774e-05
4.4253589294385165e-05 4.4253589294385165e-05
0.005848505534231663 0.005848505534231663
4.326264388510026e-05 4.326264388510026e-05
0.03368731215596199 0.03368731215596199
5.856663119629957e-05 5.856663119629957e-05
0.0004108798748347908 0.00

0.5807787775993347 0.5807787775993347
0.037866901606321335 0.037866901606321335
0.20490127801895142 0.20490127801895142
0.07481811940670013 0.07481811940670013
0.38685494661331177 0.38685494661331177
0.04780882969498634 0.04780882969498634
0.18067213892936707 0.18067213892936707
0.1448117196559906 0.1448117196559906
0.023038048297166824 0.023038048297166824
0.09690249711275101 0.09690249711275101
0.13072919845581055 0.13072919845581055
0.1157441958785057 0.1157441958785057
0.2707311511039734 0.2707311511039734
0.0774817019701004 0.0774817019701004
0.5887908339500427 0.5887908339500427
0.2077673226594925 0.2077673226594925
0.4605218172073364 0.4605218172073364
0.10199650377035141 0.10199650377035141
0.2704858183860779 0.2704858183860779
0.023477701470255852 0.023477701470255852
0.336325466632843 0.336325466632843
0.3784654140472412 0.3784654140472412
0.034891072660684586 0.034891072660684586
0.1650690883398056 0.1650690883398056
0.15770681202411652 0.15770681202411652
0.1044241786003112

0.015221922658383846 0.015221922658383846
0.0006432416848838329 0.0006432416848838329
0.00011916951189050451 0.00011916951189050451
0.00038773147389292717 0.00038773147389292717
0.05303335189819336 0.05303335189819336
0.0008674286655150354 0.0008674286655150354
0.00034081056946888566 0.00034081056946888566
0.0005976001848466694 0.0005976001848466694
0.01168323215097189 0.01168323215097189
0.0009250662405975163 0.0009250662405975163
0.08052187412977219 0.08052187412977219
0.009921119548380375 0.009921119548380375
0.24890154600143433 0.24890154600143433
0.026306983083486557 0.026306983083486557
0.0006816019886173308 0.0006816019886173308
0.06110655888915062 0.06110655888915062
0.01572369411587715 0.01572369411587715
0.022677293047308922 0.022677293047308922
0.0014501865953207016 0.0014501865953207016
0.007438991218805313 0.007438991218805313
0.002960620913654566 0.002960620913654566
0.01935473456978798 0.01935473456978798
0.03221660479903221 0.03221660479903221
0.012876014225184917 0.012

0.00012158280878793448 0.00012158280878793448
5.907493323320523e-05 5.907493323320523e-05
0.00018707520212046802 0.00018707520212046802
5.21255424246192e-06 5.21255424246192e-06
0.0008059273823164403 0.0008059273823164403
0.0005657117580994964 0.0005657117580994964
0.0012837982503697276 0.0012837982503697276
1.2026783224428073e-05 1.2026783224428073e-05
0.00032095948699861765 0.00032095948699861765
5.458214218378998e-06 5.458214218378998e-06
4.1036591937881894e-06 4.1036591937881894e-06
0.0001400690816808492 0.0001400690816808492
3.929393642465584e-06 3.929393642465584e-06
1.2781269106199034e-05 1.2781269106199034e-05
4.2633550947357435e-06 4.2633550947357435e-06
5.65549044040381e-06 5.65549044040381e-06
5.226367193245096e-06 5.226367193245096e-06
2.7139478333992884e-05 2.7139478333992884e-05
9.638347364671063e-06 9.638347364671063e-06
0.0006422761944122612 0.0006422761944122612
0.0045026205480098724 0.0045026205480098724
9.393918298883364e-05 9.393918298883364e-05
0.001246601808816194

KeyboardInterrupt: 

In [52]:
guid1 == guid2

False