In [1]:
# install required libraries
!pip3 install sentence-transformers

Collecting sentence-transformers
[?25l  Downloading https://files.pythonhosted.org/packages/6a/e2/84d6acfcee2d83164149778a33b6bdd1a74e1bcb59b2b2cd1b861359b339/sentence-transformers-0.4.1.2.tar.gz (64kB)
[K     |████████████████████████████████| 71kB 3.4MB/s 
[?25hCollecting transformers<5.0.0,>=3.1.0
[?25l  Downloading https://files.pythonhosted.org/packages/f9/54/5ca07ec9569d2f232f3166de5457b63943882f7950ddfcc887732fc7fb23/transformers-4.3.3-py3-none-any.whl (1.9MB)
[K     |████████████████████████████████| 1.9MB 7.2MB/s 
Collecting sentencepiece
[?25l  Downloading https://files.pythonhosted.org/packages/f5/99/e0808cb947ba10f575839c43e8fafc9cc44e4a7a2c8f79c60db48220a577/sentencepiece-0.1.95-cp37-cp37m-manylinux2014_x86_64.whl (1.2MB)
[K     |████████████████████████████████| 1.2MB 40.8MB/s 
Collecting tokenizers<0.11,>=0.10.1
[?25l  Downloading https://files.pythonhosted.org/packages/71/23/2ddc317b2121117bf34dd00f5b0de194158f2a44ee2bf5e47c7166878a97/tokenizers-0.10.1-cp37-cp37

In [2]:
!pip3 install elasticsearch

Collecting elasticsearch
[?25l  Downloading https://files.pythonhosted.org/packages/72/68/76c5d46cc6a48fddb759f585bc8728caa11bfc9b812ce6705fc5f99beab2/elasticsearch-7.11.0-py2.py3-none-any.whl (325kB)
[K     |████████████████████████████████| 327kB 5.8MB/s 
Installing collected packages: elasticsearch
Successfully installed elasticsearch-7.11.0


In [3]:
from google.colab import drive

In [4]:
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
%cd /content/drive/MyDrive/BERT-FAQ

/content/drive/MyDrive/BERT-FAQ


In [6]:
!ls

data			     metric.py	  requirements.txt
evaluation.py		     notebook	  reranker.py
faq_bert_finetuning.py	     output	  searcher.py
faq_bert.py		     parser	  shared
hard_negatives_generator.py  __pycache__  training_data_generator.py
indexer.py		     README.md


In [7]:
# import required dependencies
from evaluation import get_relevance_label_df
from shared.utils import load_from_json
from shared.utils import dump_to_json
from shared.utils import make_dirs
from reranker import ReRanker

In [8]:
output_path="data/StackFAQ/rank_results"

# load user_query ES results from json files
es_output_path = output_path + "/unsupervised"
es_query_by_question = load_from_json(es_output_path + '/es_query_by_question.json')
es_query_by_answer = load_from_json(es_output_path + '/es_query_by_answer.json')
es_query_by_question_answer = load_from_json(es_output_path + '/es_query_by_question_answer.json')
es_query_by_question_answer_concat = load_from_json(es_output_path + '/es_query_by_question_answer_concat.json')

In [9]:
# load test_queries, relevance_label_df for ReRanker
query_answer_pair_filepath = 'data/StackFAQ/query_answer_pairs.json'
relevance_label_df = get_relevance_label_df(query_answer_pair_filepath)
test_queries = relevance_label_df[relevance_label_df['query_type'] == 'user_query'].question.unique()

In [10]:
test_queries[:10]

array(['How to make font strikethrough on github.',
       'Is it possible to get  strikethrough letter formatting on github markdown.',
       'Making the text on github crossed out.',
       'Introducing stikethrough formatting on markdown for github.',
       'The <s> tag for font on github markdown doesnt work, is there an alternative?',
       'Making the letters i write on github striked through.',
       'Producing strikethrough text in github.',
       'Does github support strikethrough letters?',
       'How can I cross out my text on git hub?',
       'I want to have strikethrough text on github, is this possible?'],
      dtype=object)

In [11]:
# total number of test queries
len(test_queries)

1249

**#################### Triplet Loss ######################**

**Re-ranking results using query_type="user_query"; neg_type="hard"**

In [None]:
# define variables
query_type="user_query"; neg_type="hard"; version="1.1"; loss_type='triplet'
bert_model_path='output/StackFAQ/models/' + loss_type + '_' + neg_type + '_' + query_type + '_' + version

In [None]:
# create instance of ReRanker class
r = ReRanker(
    bert_model_path=bert_model_path, 
    test_queries=test_queries, relevance_label_df=relevance_label_df, loss_type=loss_type
)

In [None]:
# generate directory structure
reranked_output_path = output_path + "/supervised/" + loss_type + "/" + query_type + "/" + neg_type
make_dirs(reranked_output_path)

# next, generate BERT, Re-ranked top-k results and dump to files
bert_query_by_question = r.get_bert_topk_preds(es_query_by_question)
dump_to_json(bert_query_by_question, reranked_output_path + '/bert_query_by_question.json')
reranked_query_by_question = r.get_reranked_results(bert_query_by_question)
dump_to_json(reranked_query_by_question, reranked_output_path + '/reranked_query_by_question.json')

bert_query_by_answer = r.get_bert_topk_preds(es_query_by_answer)
dump_to_json(bert_query_by_answer, reranked_output_path + '/bert_query_by_answer.json')
reranked_query_by_answer = r.get_reranked_results(bert_query_by_answer)
dump_to_json(reranked_query_by_answer, reranked_output_path + '/reranked_query_by_answer.json')

bert_query_by_question_answer = r.get_bert_topk_preds(es_query_by_question_answer)
dump_to_json(bert_query_by_question_answer, reranked_output_path + '/bert_query_by_question_answer.json')
reranked_query_by_question_answer = r.get_reranked_results(bert_query_by_question_answer)
dump_to_json(reranked_query_by_question_answer, reranked_output_path + '/reranked_query_by_question_answer.json')

bert_query_by_question_answer_concat = r.get_bert_topk_preds(es_query_by_question_answer_concat)
dump_to_json(bert_query_by_question_answer_concat, reranked_output_path + '/bert_query_by_question_answer_concat.json')
reranked_query_by_question_answer_concat = r.get_reranked_results(bert_query_by_question_answer_concat)
dump_to_json(reranked_query_by_question_answer_concat, reranked_output_path + '/reranked_query_by_question_answer_concat.json')

2021-03-07 08:31:31 - Generating BERT top-k results ...
2021-03-07 08:31:31 - Load pretrained SentenceTransformer: output/StackFAQ/models/triplet_hard_user_query_1.1
2021-03-07 08:31:31 - Load SentenceTransformer from folder: output/StackFAQ/models/triplet_hard_user_query_1.1
2021-03-07 08:31:38 - Use pytorch device: cuda
100%|██████████| 1249/1249 [18:33<00:00,  1.12it/s]
2021-03-07 08:50:13 - Re-ranking the top-k results ...
2021-03-07 08:50:15 - Generating BERT top-k results ...
2021-03-07 08:50:15 - Load pretrained SentenceTransformer: output/StackFAQ/models/triplet_hard_user_query_1.1
2021-03-07 08:50:15 - Load SentenceTransformer from folder: output/StackFAQ/models/triplet_hard_user_query_1.1
2021-03-07 08:50:17 - Use pytorch device: cuda
100%|██████████| 1249/1249 [19:32<00:00,  1.07it/s]
2021-03-07 09:09:52 - Re-ranking the top-k results ...
2021-03-07 09:09:54 - Generating BERT top-k results ...
2021-03-07 09:09:54 - Load pretrained SentenceTransformer: output/StackFAQ/models/

**Re-ranking results using query_type="user_query"; neg_type="simple"**

In [None]:
# define variables
query_type="user_query"; neg_type="simple"; version="1.1"; loss_type='triplet'
bert_model_path='output/StackFAQ/models/' + loss_type + '_' + neg_type + '_' + query_type + '_' + version

In [None]:
# create instance of ReRanker class
r = ReRanker(
    bert_model_path=bert_model_path, 
    test_queries=test_queries, relevance_label_df=relevance_label_df, loss_type=loss_type
)

In [None]:
# generate directory structure
reranked_output_path = output_path + "/supervised/" + loss_type + "/" + query_type + "/" + neg_type
make_dirs(reranked_output_path)

# next, generate BERT, Re-ranked top-k results and dump to files
bert_query_by_question = r.get_bert_topk_preds(es_query_by_question)
dump_to_json(bert_query_by_question, reranked_output_path + '/bert_query_by_question.json')
reranked_query_by_question = r.get_reranked_results(bert_query_by_question)
dump_to_json(reranked_query_by_question, reranked_output_path + '/reranked_query_by_question.json')

bert_query_by_answer = r.get_bert_topk_preds(es_query_by_answer)
dump_to_json(bert_query_by_answer, reranked_output_path + '/bert_query_by_answer.json')
reranked_query_by_answer = r.get_reranked_results(bert_query_by_answer)
dump_to_json(reranked_query_by_answer, reranked_output_path + '/reranked_query_by_answer.json')

bert_query_by_question_answer = r.get_bert_topk_preds(es_query_by_question_answer)
dump_to_json(bert_query_by_question_answer, reranked_output_path + '/bert_query_by_question_answer.json')
reranked_query_by_question_answer = r.get_reranked_results(bert_query_by_question_answer)
dump_to_json(reranked_query_by_question_answer, reranked_output_path + '/reranked_query_by_question_answer.json')

bert_query_by_question_answer_concat = r.get_bert_topk_preds(es_query_by_question_answer_concat)
dump_to_json(bert_query_by_question_answer_concat, reranked_output_path + '/bert_query_by_question_answer_concat.json')
reranked_query_by_question_answer_concat = r.get_reranked_results(bert_query_by_question_answer_concat)
dump_to_json(reranked_query_by_question_answer_concat, reranked_output_path + '/reranked_query_by_question_answer_concat.json')

2021-03-07 09:48:14 - Generating BERT top-k results ...
2021-03-07 09:48:14 - Load pretrained SentenceTransformer: output/StackFAQ/models/triplet_simple_user_query_1.1
2021-03-07 09:48:14 - Load SentenceTransformer from folder: output/StackFAQ/models/triplet_simple_user_query_1.1
2021-03-07 09:48:24 - Use pytorch device: cuda
100%|██████████| 1249/1249 [18:04<00:00,  1.15it/s]
2021-03-07 10:06:31 - Re-ranking the top-k results ...
2021-03-07 10:06:33 - Generating BERT top-k results ...
2021-03-07 10:06:33 - Load pretrained SentenceTransformer: output/StackFAQ/models/triplet_simple_user_query_1.1
2021-03-07 10:06:33 - Load SentenceTransformer from folder: output/StackFAQ/models/triplet_simple_user_query_1.1
2021-03-07 10:06:34 - Use pytorch device: cuda
100%|██████████| 1249/1249 [19:16<00:00,  1.08it/s]
2021-03-07 10:25:54 - Re-ranking the top-k results ...
2021-03-07 10:25:56 - Generating BERT top-k results ...
2021-03-07 10:25:56 - Load pretrained SentenceTransformer: output/StackFAQ

**Re-ranking results using query_type="faq"; neg_type="hard"**

In [None]:
# define variables
query_type="faq"; neg_type="hard"; version="1.1"; loss_type='triplet'
bert_model_path='output/StackFAQ/models/' + loss_type + '_' + neg_type + '_' + query_type + '_' + version

In [None]:
# create instance of ReRanker class
r = ReRanker(
    bert_model_path=bert_model_path, 
    test_queries=test_queries, relevance_label_df=relevance_label_df, loss_type=loss_type
)

In [None]:
# generate directory structure
reranked_output_path = output_path + "/supervised/" + loss_type + "/" + query_type + "/" + neg_type
make_dirs(reranked_output_path)

# next, generate BERT, Re-ranked top-k results and dump to files
bert_query_by_question = r.get_bert_topk_preds(es_query_by_question)
dump_to_json(bert_query_by_question, reranked_output_path + '/bert_query_by_question.json')
reranked_query_by_question = r.get_reranked_results(bert_query_by_question)
dump_to_json(reranked_query_by_question, reranked_output_path + '/reranked_query_by_question.json')

bert_query_by_answer = r.get_bert_topk_preds(es_query_by_answer)
dump_to_json(bert_query_by_answer, reranked_output_path + '/bert_query_by_answer.json')
reranked_query_by_answer = r.get_reranked_results(bert_query_by_answer)
dump_to_json(reranked_query_by_answer, reranked_output_path + '/reranked_query_by_answer.json')

bert_query_by_question_answer = r.get_bert_topk_preds(es_query_by_question_answer)
dump_to_json(bert_query_by_question_answer, reranked_output_path + '/bert_query_by_question_answer.json')
reranked_query_by_question_answer = r.get_reranked_results(bert_query_by_question_answer)
dump_to_json(reranked_query_by_question_answer, reranked_output_path + '/reranked_query_by_question_answer.json')

bert_query_by_question_answer_concat = r.get_bert_topk_preds(es_query_by_question_answer_concat)
dump_to_json(bert_query_by_question_answer_concat, reranked_output_path + '/bert_query_by_question_answer_concat.json')
reranked_query_by_question_answer_concat = r.get_reranked_results(bert_query_by_question_answer_concat)
dump_to_json(reranked_query_by_question_answer_concat, reranked_output_path + '/reranked_query_by_question_answer_concat.json')

2021-03-07 11:03:54 - Generating BERT top-k results ...
2021-03-07 11:03:54 - Load pretrained SentenceTransformer: output/StackFAQ/models/triplet_hard_faq_1.1
2021-03-07 11:03:54 - Load SentenceTransformer from folder: output/StackFAQ/models/triplet_hard_faq_1.1
2021-03-07 11:04:06 - Use pytorch device: cuda
100%|██████████| 1249/1249 [18:06<00:00,  1.15it/s]
2021-03-07 11:22:14 - Re-ranking the top-k results ...
2021-03-07 11:22:17 - Generating BERT top-k results ...
2021-03-07 11:22:17 - Load pretrained SentenceTransformer: output/StackFAQ/models/triplet_hard_faq_1.1
2021-03-07 11:22:17 - Load SentenceTransformer from folder: output/StackFAQ/models/triplet_hard_faq_1.1
2021-03-07 11:22:18 - Use pytorch device: cuda
100%|██████████| 1249/1249 [19:09<00:00,  1.09it/s]
2021-03-07 11:41:30 - Re-ranking the top-k results ...
2021-03-07 11:41:32 - Generating BERT top-k results ...
2021-03-07 11:41:32 - Load pretrained SentenceTransformer: output/StackFAQ/models/triplet_hard_faq_1.1
2021-03

**Re-ranking results using query_type="faq"; neg_type="simple"**

In [None]:
# define variables
query_type="faq"; neg_type="simple"; version="1.1"; loss_type='triplet'
bert_model_path='output/StackFAQ/models/' + loss_type + '_' + neg_type + '_' + query_type + '_' + version

In [None]:
# create instance of ReRanker class
r = ReRanker(
    bert_model_path=bert_model_path, 
    test_queries=test_queries, relevance_label_df=relevance_label_df, loss_type=loss_type
)

In [None]:
# generate directory structure
reranked_output_path = output_path + "/supervised/" + loss_type + "/" + query_type + "/" + neg_type
make_dirs(reranked_output_path)

# next, generate BERT, Re-ranked top-k results and dump to files
bert_query_by_question = r.get_bert_topk_preds(es_query_by_question)
dump_to_json(bert_query_by_question, reranked_output_path + '/bert_query_by_question.json')
reranked_query_by_question = r.get_reranked_results(bert_query_by_question)
dump_to_json(reranked_query_by_question, reranked_output_path + '/reranked_query_by_question.json')

bert_query_by_answer = r.get_bert_topk_preds(es_query_by_answer)
dump_to_json(bert_query_by_answer, reranked_output_path + '/bert_query_by_answer.json')
reranked_query_by_answer = r.get_reranked_results(bert_query_by_answer)
dump_to_json(reranked_query_by_answer, reranked_output_path + '/reranked_query_by_answer.json')

bert_query_by_question_answer = r.get_bert_topk_preds(es_query_by_question_answer)
dump_to_json(bert_query_by_question_answer, reranked_output_path + '/bert_query_by_question_answer.json')
reranked_query_by_question_answer = r.get_reranked_results(bert_query_by_question_answer)
dump_to_json(reranked_query_by_question_answer, reranked_output_path + '/reranked_query_by_question_answer.json')

bert_query_by_question_answer_concat = r.get_bert_topk_preds(es_query_by_question_answer_concat)
dump_to_json(bert_query_by_question_answer_concat, reranked_output_path + '/bert_query_by_question_answer_concat.json')
reranked_query_by_question_answer_concat = r.get_reranked_results(bert_query_by_question_answer_concat)
dump_to_json(reranked_query_by_question_answer_concat, reranked_output_path + '/reranked_query_by_question_answer_concat.json')

2021-03-07 12:19:26 - Generating BERT top-k results ...
2021-03-07 12:19:26 - Load pretrained SentenceTransformer: output/StackFAQ/models/triplet_simple_faq_1.1
2021-03-07 12:19:26 - Load SentenceTransformer from folder: output/StackFAQ/models/triplet_simple_faq_1.1
2021-03-07 12:19:35 - Use pytorch device: cuda
100%|██████████| 1249/1249 [18:02<00:00,  1.15it/s]
2021-03-07 12:37:40 - Re-ranking the top-k results ...
2021-03-07 12:37:42 - Generating BERT top-k results ...
2021-03-07 12:37:42 - Load pretrained SentenceTransformer: output/StackFAQ/models/triplet_simple_faq_1.1
2021-03-07 12:37:42 - Load SentenceTransformer from folder: output/StackFAQ/models/triplet_simple_faq_1.1
2021-03-07 12:37:43 - Use pytorch device: cuda
100%|██████████| 1249/1249 [19:17<00:00,  1.08it/s]
2021-03-07 12:57:03 - Re-ranking the top-k results ...
2021-03-07 12:57:06 - Generating BERT top-k results ...
2021-03-07 12:57:06 - Load pretrained SentenceTransformer: output/StackFAQ/models/triplet_simple_faq_1

**##################### Softmax Loss #####################**

**Re-ranking results using query_type="user_query"; neg_type="hard"**

In [12]:
# define variables
query_type="user_query"; neg_type="hard"; version="1.1"; loss_type='softmax'
bert_model_path='output/StackFAQ/models/' + loss_type + '_' + neg_type + '_' + query_type + '_' + version

In [13]:
# create instance of ReRanker class
r = ReRanker(
    bert_model_path=bert_model_path, 
    test_queries=test_queries, relevance_label_df=relevance_label_df, loss_type=loss_type
)

In [14]:
# generate directory structure
reranked_output_path = output_path + "/supervised/" + loss_type + "/" + query_type + "/" + neg_type
make_dirs(reranked_output_path)

# next, generate BERT, Re-ranked top-k results and dump to files
bert_query_by_question = r.get_bert_topk_preds(es_query_by_question)
dump_to_json(bert_query_by_question, reranked_output_path + '/bert_query_by_question.json')
reranked_query_by_question = r.get_reranked_results(bert_query_by_question)
dump_to_json(reranked_query_by_question, reranked_output_path + '/reranked_query_by_question.json')

bert_query_by_answer = r.get_bert_topk_preds(es_query_by_answer)
dump_to_json(bert_query_by_answer, reranked_output_path + '/bert_query_by_answer.json')
reranked_query_by_answer = r.get_reranked_results(bert_query_by_answer)
dump_to_json(reranked_query_by_answer, reranked_output_path + '/reranked_query_by_answer.json')

bert_query_by_question_answer = r.get_bert_topk_preds(es_query_by_question_answer)
dump_to_json(bert_query_by_question_answer, reranked_output_path + '/bert_query_by_question_answer.json')
reranked_query_by_question_answer = r.get_reranked_results(bert_query_by_question_answer)
dump_to_json(reranked_query_by_question_answer, reranked_output_path + '/reranked_query_by_question_answer.json')

bert_query_by_question_answer_concat = r.get_bert_topk_preds(es_query_by_question_answer_concat)
dump_to_json(bert_query_by_question_answer_concat, reranked_output_path + '/bert_query_by_question_answer_concat.json')
reranked_query_by_question_answer_concat = r.get_reranked_results(bert_query_by_question_answer_concat)
dump_to_json(reranked_query_by_question_answer_concat, reranked_output_path + '/reranked_query_by_question_answer_concat.json')

2021-03-08 17:39:14 - Generating BERT top-k results ...
2021-03-08 17:39:28 - Use pytorch device: cuda
100%|██████████| 1249/1249 [34:48<00:00,  1.67s/it]
2021-03-08 18:14:19 - Re-ranking the top-k results ...
2021-03-08 18:14:21 - Generating BERT top-k results ...
2021-03-08 18:14:24 - Use pytorch device: cuda
100%|██████████| 1249/1249 [39:40<00:00,  1.91s/it]
2021-03-08 18:54:07 - Re-ranking the top-k results ...
2021-03-08 18:54:09 - Generating BERT top-k results ...
2021-03-08 18:54:12 - Use pytorch device: cuda
100%|██████████| 1249/1249 [38:44<00:00,  1.86s/it]
2021-03-08 19:32:59 - Re-ranking the top-k results ...
2021-03-08 19:33:02 - Generating BERT top-k results ...
2021-03-08 19:33:05 - Use pytorch device: cuda
100%|██████████| 1249/1249 [38:02<00:00,  1.83s/it]
2021-03-08 20:11:10 - Re-ranking the top-k results ...


**Re-ranking results using query_type="user_query"; neg_type="simple"**

In [15]:
# define variables
query_type="user_query"; neg_type="simple"; version="1.1"; loss_type='softmax'
bert_model_path='output/StackFAQ/models/' + loss_type + '_' + neg_type + '_' + query_type + '_' + version

In [16]:
# create instance of ReRanker class
r = ReRanker(
    bert_model_path=bert_model_path, 
    test_queries=test_queries, relevance_label_df=relevance_label_df, loss_type=loss_type
)

In [17]:
# generate directory structure
reranked_output_path = output_path + "/supervised/" + loss_type + "/" + query_type + "/" + neg_type
make_dirs(reranked_output_path)

# next, generate BERT, Re-ranked top-k results and dump to files
bert_query_by_question = r.get_bert_topk_preds(es_query_by_question)
dump_to_json(bert_query_by_question, reranked_output_path + '/bert_query_by_question.json')
reranked_query_by_question = r.get_reranked_results(bert_query_by_question)
dump_to_json(reranked_query_by_question, reranked_output_path + '/reranked_query_by_question.json')

bert_query_by_answer = r.get_bert_topk_preds(es_query_by_answer)
dump_to_json(bert_query_by_answer, reranked_output_path + '/bert_query_by_answer.json')
reranked_query_by_answer = r.get_reranked_results(bert_query_by_answer)
dump_to_json(reranked_query_by_answer, reranked_output_path + '/reranked_query_by_answer.json')

bert_query_by_question_answer = r.get_bert_topk_preds(es_query_by_question_answer)
dump_to_json(bert_query_by_question_answer, reranked_output_path + '/bert_query_by_question_answer.json')
reranked_query_by_question_answer = r.get_reranked_results(bert_query_by_question_answer)
dump_to_json(reranked_query_by_question_answer, reranked_output_path + '/reranked_query_by_question_answer.json')

bert_query_by_question_answer_concat = r.get_bert_topk_preds(es_query_by_question_answer_concat)
dump_to_json(bert_query_by_question_answer_concat, reranked_output_path + '/bert_query_by_question_answer_concat.json')
reranked_query_by_question_answer_concat = r.get_reranked_results(bert_query_by_question_answer_concat)
dump_to_json(reranked_query_by_question_answer_concat, reranked_output_path + '/reranked_query_by_question_answer_concat.json')

2021-03-08 20:11:14 - Generating BERT top-k results ...
2021-03-08 20:11:25 - Use pytorch device: cuda
100%|██████████| 1249/1249 [34:31<00:00,  1.66s/it]
2021-03-08 20:46:00 - Re-ranking the top-k results ...
2021-03-08 20:46:02 - Generating BERT top-k results ...
2021-03-08 20:46:05 - Use pytorch device: cuda
100%|██████████| 1249/1249 [39:57<00:00,  1.92s/it]
2021-03-08 21:26:05 - Re-ranking the top-k results ...
2021-03-08 21:26:08 - Generating BERT top-k results ...
2021-03-08 21:26:11 - Use pytorch device: cuda
100%|██████████| 1249/1249 [38:37<00:00,  1.86s/it]
2021-03-08 22:04:51 - Re-ranking the top-k results ...
2021-03-08 22:04:54 - Generating BERT top-k results ...
2021-03-08 22:04:57 - Use pytorch device: cuda
100%|██████████| 1249/1249 [37:41<00:00,  1.81s/it]
2021-03-08 22:42:42 - Re-ranking the top-k results ...


**Re-ranking results using query_type="faq"; neg_type="hard"**

In [12]:
# define variables
query_type="faq"; neg_type="hard"; version="1.1"; loss_type='softmax'
bert_model_path='output/StackFAQ/models/' + loss_type + '_' + neg_type + '_' + query_type + '_' + version

In [13]:
# create instance of ReRanker class
r = ReRanker(
    bert_model_path=bert_model_path, 
    test_queries=test_queries, relevance_label_df=relevance_label_df, loss_type=loss_type
)

In [15]:
# generate directory structure
reranked_output_path = output_path + "/supervised/" + loss_type + "/" + query_type + "/" + neg_type
make_dirs(reranked_output_path)

# next, generate BERT, Re-ranked top-k results and dump to files
bert_query_by_question = r.get_bert_topk_preds(es_query_by_question)
dump_to_json(bert_query_by_question, reranked_output_path + '/bert_query_by_question.json')
reranked_query_by_question = r.get_reranked_results(bert_query_by_question)
dump_to_json(reranked_query_by_question, reranked_output_path + '/reranked_query_by_question.json')
bert_query_by_answer = r.get_bert_topk_preds(es_query_by_answer)
dump_to_json(bert_query_by_answer, reranked_output_path + '/bert_query_by_answer.json')
reranked_query_by_answer = r.get_reranked_results(bert_query_by_answer)
dump_to_json(reranked_query_by_answer, reranked_output_path + '/reranked_query_by_answer.json')

bert_query_by_question_answer = r.get_bert_topk_preds(es_query_by_question_answer)
dump_to_json(bert_query_by_question_answer, reranked_output_path + '/bert_query_by_question_answer.json')
reranked_query_by_question_answer = r.get_reranked_results(bert_query_by_question_answer)
dump_to_json(reranked_query_by_question_answer, reranked_output_path + '/reranked_query_by_question_answer.json')

bert_query_by_question_answer_concat = r.get_bert_topk_preds(es_query_by_question_answer_concat)
dump_to_json(bert_query_by_question_answer_concat, reranked_output_path + '/bert_query_by_question_answer_concat.json')
reranked_query_by_question_answer_concat = r.get_reranked_results(bert_query_by_question_answer_concat)
dump_to_json(reranked_query_by_question_answer_concat, reranked_output_path + '/reranked_query_by_question_answer_concat.json')

2021-03-09 04:08:06 - Generating BERT top-k results ...
2021-03-09 04:08:21 - Use pytorch device: cuda
100%|██████████| 1249/1249 [41:09<00:00,  1.98s/it]
2021-03-09 04:49:34 - Re-ranking the top-k results ...
2021-03-09 04:49:36 - Generating BERT top-k results ...
2021-03-09 04:49:39 - Use pytorch device: cuda
100%|██████████| 1249/1249 [39:37<00:00,  1.90s/it]
2021-03-09 05:29:19 - Re-ranking the top-k results ...
2021-03-09 05:29:22 - Generating BERT top-k results ...
2021-03-09 05:29:24 - Use pytorch device: cuda
100%|██████████| 1249/1249 [38:35<00:00,  1.85s/it]
2021-03-09 06:08:03 - Re-ranking the top-k results ...


**Re-ranking results using query_type="faq"; neg_type="simple"**

In [16]:
# define variables
query_type="faq"; neg_type="simple"; version="1.1"; loss_type='softmax'
bert_model_path='output/StackFAQ/models/' + loss_type + '_' + neg_type + '_' + query_type + '_' + version

In [17]:
# create instance of ReRanker class
r = ReRanker(
    bert_model_path=bert_model_path, 
    test_queries=test_queries, relevance_label_df=relevance_label_df, loss_type=loss_type
)

In [18]:
# generate directory structure
reranked_output_path = output_path + "/supervised/" + loss_type + "/" + query_type + "/" + neg_type
make_dirs(reranked_output_path)

# next, generate BERT, Re-ranked top-k results and dump to files
bert_query_by_question = r.get_bert_topk_preds(es_query_by_question)
dump_to_json(bert_query_by_question, reranked_output_path + '/bert_query_by_question.json')
reranked_query_by_question = r.get_reranked_results(bert_query_by_question)
dump_to_json(reranked_query_by_question, reranked_output_path + '/reranked_query_by_question.json')

bert_query_by_answer = r.get_bert_topk_preds(es_query_by_answer)
dump_to_json(bert_query_by_answer, reranked_output_path + '/bert_query_by_answer.json')
reranked_query_by_answer = r.get_reranked_results(bert_query_by_answer)
dump_to_json(reranked_query_by_answer, reranked_output_path + '/reranked_query_by_answer.json')

bert_query_by_question_answer = r.get_bert_topk_preds(es_query_by_question_answer)
dump_to_json(bert_query_by_question_answer, reranked_output_path + '/bert_query_by_question_answer.json')
reranked_query_by_question_answer = r.get_reranked_results(bert_query_by_question_answer)
dump_to_json(reranked_query_by_question_answer, reranked_output_path + '/reranked_query_by_question_answer.json')

bert_query_by_question_answer_concat = r.get_bert_topk_preds(es_query_by_question_answer_concat)
dump_to_json(bert_query_by_question_answer_concat, reranked_output_path + '/bert_query_by_question_answer_concat.json')
reranked_query_by_question_answer_concat = r.get_reranked_results(bert_query_by_question_answer_concat)
dump_to_json(reranked_query_by_question_answer_concat, reranked_output_path + '/reranked_query_by_question_answer_concat.json')

2021-03-09 06:08:07 - Generating BERT top-k results ...
2021-03-09 06:08:18 - Use pytorch device: cuda
100%|██████████| 1249/1249 [34:30<00:00,  1.66s/it]
2021-03-09 06:42:51 - Re-ranking the top-k results ...
2021-03-09 06:42:54 - Generating BERT top-k results ...
2021-03-09 06:42:56 - Use pytorch device: cuda
100%|██████████| 1249/1249 [41:09<00:00,  1.98s/it]
2021-03-09 07:24:09 - Re-ranking the top-k results ...
2021-03-09 07:24:12 - Generating BERT top-k results ...
2021-03-09 07:24:14 - Use pytorch device: cuda
100%|██████████| 1249/1249 [39:30<00:00,  1.90s/it]
2021-03-09 08:03:48 - Re-ranking the top-k results ...
2021-03-09 08:03:51 - Generating BERT top-k results ...
2021-03-09 08:03:54 - Use pytorch device: cuda
100%|██████████| 1249/1249 [38:24<00:00,  1.84s/it]
2021-03-09 08:42:21 - Re-ranking the top-k results ...
