In [1]:
# install required libraries
!pip3 install sentence-transformers



In [2]:
!pip3 install elasticsearch



In [3]:
from google.colab import drive

In [4]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
%cd /content/drive/MyDrive/BERT-FAQ

/content/drive/MyDrive/BERT-FAQ


In [6]:
!ls

data			     indexer.py   reranker.py
evaluation.py		     metric.py	  searcher.py
faq_bert_finetuning.py	     notebook	  shared
faq_bert.py		     output	  training_data_generator.py
faq_bert_ranker.py	     parser	  webserver.py
hard_negatives_generator.py  __pycache__


In [7]:
# import required dependencies
from evaluation import get_relevance_label_df
from shared.utils import load_from_json
from shared.utils import dump_to_json
from shared.utils import make_dirs
from reranker import ReRanker

In [8]:
output_path="data/FAQIR/rank_results"

# load user_query ES results from json files
es_output_path = output_path + "/unsupervised"
es_query_by_question = load_from_json(es_output_path + '/es_query_by_question.json')
es_query_by_answer = load_from_json(es_output_path + '/es_query_by_answer.json')
es_query_by_question_answer = load_from_json(es_output_path + '/es_query_by_question_answer.json')
es_query_by_question_answer_concat = load_from_json(es_output_path + '/es_query_by_question_answer_concat.json')

In [9]:
# load test_queries, relevance_label_df for ReRanker
query_answer_pair_filepath = 'data/FAQIR/query_answer_pairs.json'
relevance_label_df = get_relevance_label_df(query_answer_pair_filepath)
test_queries = relevance_label_df[relevance_label_df['query_type'] == 'user_query'].question.unique()

In [10]:
test_queries[:10]

array(['How to increase the efficiency of my car with respect to fuel expended?',
       'Is there a special way to drive to decreasse my fuel consumption?',
       'How to use less gas in my car?',
       'I want to save some money on gas. What can I do to make my car use less gas?',
       'Are there any simple tweaks one can do to a car to make it more fuel efficient?',
       'How can I make my fuel mileage bigger?',
       'I feel my car is using too much gas. Can I make some cheap modifications that would fix this?',
       'What should I do to reduce the fuel required by my car while not affecting the mileage?',
       'I need to decrease the fuel consumption of my car. What are the best strategies?',
       'I have a sporty style of driving, can I reduce the fuel consumption by changing the driving style?'],
      dtype=object)

In [11]:
# total number of test queries
len(test_queries)

1183

**1. Generating BERT prediction results from Answer (BERT-Q-a)"**

In [None]:
# define rank_field parameter
rank_field="BERT-Q-a"

**query_type="user_query"; neg_type="hard"; loss_type='triplet'**

In [None]:
# define variables
query_type="user_query"; neg_type="hard"; version="1.1"; loss_type='triplet'
bert_model_path='output/FAQIR/models/' + loss_type + '_' + neg_type + '_' + query_type + '_' + version

In [None]:
# create instance of ReRanker class
r = ReRanker(
    bert_model_path=bert_model_path, 
    test_queries=test_queries, relevance_label_df=relevance_label_df,
    rank_field=rank_field
)

In [None]:
# generate directory structure
pred_output_path = output_path + "/supervised/" + rank_field + "/" + loss_type + "/" + query_type + "/" + neg_type
make_dirs(pred_output_path)

# next, generate BERT, Re-ranked top-k results and dump to files
bert_query_by_question = r.get_bert_topk_preds(es_query_by_question)
dump_to_json(bert_query_by_question, pred_output_path + '/bert_query_by_question.json')

bert_query_by_answer = r.get_bert_topk_preds(es_query_by_answer)
dump_to_json(bert_query_by_answer, pred_output_path + '/bert_query_by_answer.json')

bert_query_by_question_answer = r.get_bert_topk_preds(es_query_by_question_answer)
dump_to_json(bert_query_by_question_answer, pred_output_path + '/bert_query_by_question_answer.json')

bert_query_by_question_answer_concat = r.get_bert_topk_preds(es_query_by_question_answer_concat)
dump_to_json(bert_query_by_question_answer_concat, pred_output_path + '/bert_query_by_question_answer_concat.json')

2021-03-29 17:12:54 - Generating BERT top-k results ...
2021-03-29 17:12:54 - Load pretrained SentenceTransformer: output/FAQIR/models/triplet_hard_user_query_1.1
2021-03-29 17:12:54 - Load SentenceTransformer from folder: output/FAQIR/models/triplet_hard_user_query_1.1
2021-03-29 17:12:55 - Use pytorch device: cuda
100%|██████████| 1183/1183 [15:56<00:00,  1.24it/s]
2021-03-29 17:28:54 - Generating BERT top-k results ...
2021-03-29 17:28:54 - Load pretrained SentenceTransformer: output/FAQIR/models/triplet_hard_user_query_1.1
2021-03-29 17:28:54 - Load SentenceTransformer from folder: output/FAQIR/models/triplet_hard_user_query_1.1
2021-03-29 17:28:56 - Use pytorch device: cuda
100%|██████████| 1183/1183 [17:19<00:00,  1.14it/s]
2021-03-29 17:46:17 - Generating BERT top-k results ...
2021-03-29 17:46:17 - Load pretrained SentenceTransformer: output/FAQIR/models/triplet_hard_user_query_1.1
2021-03-29 17:46:17 - Load SentenceTransformer from folder: output/FAQIR/models/triplet_hard_user

**query_type="user_query"; neg_type="simple"; loss_type='triplet'**

In [None]:
# define variables
query_type="user_query"; neg_type="simple"; version="1.1"; loss_type='triplet'
bert_model_path='output/FAQIR/models/' + loss_type + '_' + neg_type + '_' + query_type + '_' + version

In [None]:
# create instance of ReRanker class
r = ReRanker(
    bert_model_path=bert_model_path, 
    test_queries=test_queries, relevance_label_df=relevance_label_df,
    rank_field=rank_field
)

In [None]:
# generate directory structure
pred_output_path = output_path + "/supervised/" + rank_field + "/" + loss_type + "/" + query_type + "/" + neg_type
make_dirs(pred_output_path)

# next, generate BERT, Re-ranked top-k results and dump to files
bert_query_by_question = r.get_bert_topk_preds(es_query_by_question)
dump_to_json(bert_query_by_question, pred_output_path + '/bert_query_by_question.json')

bert_query_by_answer = r.get_bert_topk_preds(es_query_by_answer)
dump_to_json(bert_query_by_answer, pred_output_path + '/bert_query_by_answer.json')

bert_query_by_question_answer = r.get_bert_topk_preds(es_query_by_question_answer)
dump_to_json(bert_query_by_question_answer, pred_output_path + '/bert_query_by_question_answer.json')

bert_query_by_question_answer_concat = r.get_bert_topk_preds(es_query_by_question_answer_concat)
dump_to_json(bert_query_by_question_answer_concat, pred_output_path + '/bert_query_by_question_answer_concat.json')

2021-03-30 08:01:41 - Generating BERT top-k results ...
2021-03-30 08:01:41 - Load pretrained SentenceTransformer: output/FAQIR/models/triplet_simple_user_query_1.1
2021-03-30 08:01:41 - Load SentenceTransformer from folder: output/FAQIR/models/triplet_simple_user_query_1.1
2021-03-30 08:01:50 - Use pytorch device: cuda
100%|██████████| 1183/1183 [19:11<00:00,  1.03it/s]
2021-03-30 08:21:04 - Generating BERT top-k results ...
2021-03-30 08:21:04 - Load pretrained SentenceTransformer: output/FAQIR/models/triplet_simple_user_query_1.1
2021-03-30 08:21:04 - Load SentenceTransformer from folder: output/FAQIR/models/triplet_simple_user_query_1.1
2021-03-30 08:21:05 - Use pytorch device: cuda
100%|██████████| 1183/1183 [24:20<00:00,  1.23s/it]
2021-03-30 08:45:28 - Generating BERT top-k results ...
2021-03-30 08:45:28 - Load pretrained SentenceTransformer: output/FAQIR/models/triplet_simple_user_query_1.1
2021-03-30 08:45:28 - Load SentenceTransformer from folder: output/FAQIR/models/triplet

**query_type="faq"; neg_type="hard"; loss_type='triplet'**

In [None]:
# define variables
query_type="faq"; neg_type="hard"; version="1.1"; loss_type='triplet'
bert_model_path='output/FAQIR/models/' + loss_type + '_' + neg_type + '_' + query_type + '_' + version

In [None]:
# create instance of ReRanker class
r = ReRanker(
    bert_model_path=bert_model_path, 
    test_queries=test_queries, relevance_label_df=relevance_label_df,
    rank_field=rank_field
)

In [None]:
# generate directory structure
pred_output_path = output_path + "/supervised/" + rank_field + "/" + loss_type + "/" + query_type + "/" + neg_type
make_dirs(pred_output_path)

# next, generate BERT, Re-ranked top-k results and dump to files
bert_query_by_question = r.get_bert_topk_preds(es_query_by_question)
dump_to_json(bert_query_by_question, pred_output_path + '/bert_query_by_question.json')

bert_query_by_answer = r.get_bert_topk_preds(es_query_by_answer)
dump_to_json(bert_query_by_answer, pred_output_path + '/bert_query_by_answer.json')

bert_query_by_question_answer = r.get_bert_topk_preds(es_query_by_question_answer)
dump_to_json(bert_query_by_question_answer, pred_output_path + '/bert_query_by_question_answer.json')

bert_query_by_question_answer_concat = r.get_bert_topk_preds(es_query_by_question_answer_concat)
dump_to_json(bert_query_by_question_answer_concat, pred_output_path + '/bert_query_by_question_answer_concat.json')

2021-03-30 09:29:18 - Generating BERT top-k results ...
2021-03-30 09:29:18 - Load pretrained SentenceTransformer: output/FAQIR/models/triplet_hard_faq_1.1
2021-03-30 09:29:18 - Load SentenceTransformer from folder: output/FAQIR/models/triplet_hard_faq_1.1
2021-03-30 09:29:28 - Use pytorch device: cuda
100%|██████████| 1183/1183 [15:38<00:00,  1.26it/s]
2021-03-30 09:45:09 - Generating BERT top-k results ...
2021-03-30 09:45:09 - Load pretrained SentenceTransformer: output/FAQIR/models/triplet_hard_faq_1.1
2021-03-30 09:45:09 - Load SentenceTransformer from folder: output/FAQIR/models/triplet_hard_faq_1.1
2021-03-30 09:45:10 - Use pytorch device: cuda
100%|██████████| 1183/1183 [17:14<00:00,  1.14it/s]
2021-03-30 10:02:27 - Generating BERT top-k results ...
2021-03-30 10:02:27 - Load pretrained SentenceTransformer: output/FAQIR/models/triplet_hard_faq_1.1
2021-03-30 10:02:27 - Load SentenceTransformer from folder: output/FAQIR/models/triplet_hard_faq_1.1
2021-03-30 10:02:29 - Use pytor

**query_type="faq"; neg_type="simple"; loss_type='triplet'**

In [None]:
# define variables
query_type="faq"; neg_type="simple"; version="1.1"; loss_type='triplet'
bert_model_path='output/FAQIR/models/' + loss_type + '_' + neg_type + '_' + query_type + '_' + version

In [None]:
# create instance of ReRanker class
r = ReRanker(
    bert_model_path=bert_model_path, 
    test_queries=test_queries, relevance_label_df=relevance_label_df,
    rank_field=rank_field
)

In [None]:
# generate directory structure
pred_output_path = output_path + "/supervised/" + rank_field + "/" + loss_type + "/" + query_type + "/" + neg_type
make_dirs(pred_output_path)

# next, generate BERT, Re-ranked top-k results and dump to files
bert_query_by_question = r.get_bert_topk_preds(es_query_by_question)
dump_to_json(bert_query_by_question, pred_output_path + '/bert_query_by_question.json')

bert_query_by_answer = r.get_bert_topk_preds(es_query_by_answer)
dump_to_json(bert_query_by_answer, pred_output_path + '/bert_query_by_answer.json')

bert_query_by_question_answer = r.get_bert_topk_preds(es_query_by_question_answer)
dump_to_json(bert_query_by_question_answer, pred_output_path + '/bert_query_by_question_answer.json')

bert_query_by_question_answer_concat = r.get_bert_topk_preds(es_query_by_question_answer_concat)
dump_to_json(bert_query_by_question_answer_concat, pred_output_path + '/bert_query_by_question_answer_concat.json')

2021-03-30 10:35:42 - Generating BERT top-k results ...
2021-03-30 10:35:42 - Load pretrained SentenceTransformer: output/FAQIR/models/triplet_simple_faq_1.1
2021-03-30 10:35:42 - Load SentenceTransformer from folder: output/FAQIR/models/triplet_simple_faq_1.1
2021-03-30 10:35:52 - Use pytorch device: cuda
100%|██████████| 1183/1183 [15:37<00:00,  1.26it/s]
2021-03-30 10:51:31 - Generating BERT top-k results ...
2021-03-30 10:51:31 - Load pretrained SentenceTransformer: output/FAQIR/models/triplet_simple_faq_1.1
2021-03-30 10:51:31 - Load SentenceTransformer from folder: output/FAQIR/models/triplet_simple_faq_1.1
2021-03-30 10:51:33 - Use pytorch device: cuda
100%|██████████| 1183/1183 [17:10<00:00,  1.15it/s]
2021-03-30 11:08:46 - Generating BERT top-k results ...
2021-03-30 11:08:46 - Load pretrained SentenceTransformer: output/FAQIR/models/triplet_simple_faq_1.1
2021-03-30 11:08:46 - Load SentenceTransformer from folder: output/FAQIR/models/triplet_simple_faq_1.1
2021-03-30 11:08:47

**query_type="user_query"; neg_type="hard"; loss_type='softmax'**

In [None]:
# define variables
query_type="user_query"; neg_type="hard"; version="1.1"; loss_type='softmax'
bert_model_path='output/FAQIR/models/' + loss_type + '_' + neg_type + '_' + query_type + '_' + version

In [None]:
# create instance of ReRanker class
r = ReRanker(
    bert_model_path=bert_model_path, 
    test_queries=test_queries, relevance_label_df=relevance_label_df,
    rank_field=rank_field
)

In [None]:
# generate directory structure
pred_output_path = output_path + "/supervised/" + rank_field + "/" + loss_type + "/" + query_type + "/" + neg_type
make_dirs(pred_output_path)

# next, generate BERT, Re-ranked top-k results and dump to files
bert_query_by_question = r.get_bert_topk_preds(es_query_by_question)
dump_to_json(bert_query_by_question, pred_output_path + '/bert_query_by_question.json')

bert_query_by_answer = r.get_bert_topk_preds(es_query_by_answer)
dump_to_json(bert_query_by_answer, pred_output_path + '/bert_query_by_answer.json')

bert_query_by_question_answer = r.get_bert_topk_preds(es_query_by_question_answer)
dump_to_json(bert_query_by_question_answer, pred_output_path + '/bert_query_by_question_answer.json')

bert_query_by_question_answer_concat = r.get_bert_topk_preds(es_query_by_question_answer_concat)
dump_to_json(bert_query_by_question_answer_concat, pred_output_path + '/bert_query_by_question_answer_concat.json')

2021-03-30 11:41:45 - Generating BERT top-k results ...
2021-03-30 11:41:56 - Use pytorch device: cuda
100%|██████████| 1183/1183 [14:40<00:00,  1.34it/s]
2021-03-30 11:56:39 - Generating BERT top-k results ...
2021-03-30 11:56:41 - Use pytorch device: cuda
100%|██████████| 1183/1183 [16:46<00:00,  1.18it/s]
2021-03-30 12:13:29 - Generating BERT top-k results ...
2021-03-30 12:13:31 - Use pytorch device: cuda
100%|██████████| 1183/1183 [15:55<00:00,  1.24it/s]
2021-03-30 12:29:29 - Generating BERT top-k results ...
2021-03-30 12:29:31 - Use pytorch device: cuda
100%|██████████| 1183/1183 [15:39<00:00,  1.26it/s]


**query_type="user_query"; neg_type="simple"; loss_type='softmax'**

In [None]:
# define variables
query_type="user_query"; neg_type="simple"; version="1.1"; loss_type='softmax'
bert_model_path='output/FAQIR/models/' + loss_type + '_' + neg_type + '_' + query_type + '_' + version

In [None]:
# create instance of ReRanker class
r = ReRanker(
    bert_model_path=bert_model_path, 
    test_queries=test_queries, relevance_label_df=relevance_label_df,
    rank_field=rank_field
)

In [None]:
# generate directory structure
pred_output_path = output_path + "/supervised/" + rank_field + "/" + loss_type + "/" + query_type + "/" + neg_type
make_dirs(pred_output_path)

# next, generate BERT, Re-ranked top-k results and dump to files
bert_query_by_question = r.get_bert_topk_preds(es_query_by_question)
dump_to_json(bert_query_by_question, pred_output_path + '/bert_query_by_question.json')

bert_query_by_answer = r.get_bert_topk_preds(es_query_by_answer)
dump_to_json(bert_query_by_answer, pred_output_path + '/bert_query_by_answer.json')

bert_query_by_question_answer = r.get_bert_topk_preds(es_query_by_question_answer)
dump_to_json(bert_query_by_question_answer, pred_output_path + '/bert_query_by_question_answer.json')

bert_query_by_question_answer_concat = r.get_bert_topk_preds(es_query_by_question_answer_concat)
dump_to_json(bert_query_by_question_answer_concat, pred_output_path + '/bert_query_by_question_answer_concat.json')

2021-03-30 12:45:14 - Generating BERT top-k results ...
2021-03-30 12:45:22 - Use pytorch device: cuda
100%|██████████| 1183/1183 [14:57<00:00,  1.32it/s]
2021-03-30 13:00:22 - Generating BERT top-k results ...
2021-03-30 13:00:24 - Use pytorch device: cuda
100%|██████████| 1183/1183 [16:55<00:00,  1.17it/s]
2021-03-30 13:17:22 - Generating BERT top-k results ...
2021-03-30 13:17:24 - Use pytorch device: cuda
100%|██████████| 1183/1183 [15:59<00:00,  1.23it/s]
2021-03-30 13:33:25 - Generating BERT top-k results ...
2021-03-30 13:33:27 - Use pytorch device: cuda
100%|██████████| 1183/1183 [15:39<00:00,  1.26it/s]


**query_type="faq"; neg_type="hard"; loss_type='softmax'**

In [None]:
# define variables
query_type="faq"; neg_type="hard"; version="1.1"; loss_type='softmax'
bert_model_path='output/FAQIR/models/' + loss_type + '_' + neg_type + '_' + query_type + '_' + version

In [None]:
# create instance of ReRanker class
r = ReRanker(
    bert_model_path=bert_model_path, 
    test_queries=test_queries, relevance_label_df=relevance_label_df,
    rank_field=rank_field
)

In [None]:
# generate directory structure
pred_output_path = output_path + "/supervised/" + rank_field + "/" + loss_type + "/" + query_type + "/" + neg_type
make_dirs(pred_output_path)

# next, generate BERT, Re-ranked top-k results and dump to files
bert_query_by_question = r.get_bert_topk_preds(es_query_by_question)
dump_to_json(bert_query_by_question, pred_output_path + '/bert_query_by_question.json')

bert_query_by_answer = r.get_bert_topk_preds(es_query_by_answer)
dump_to_json(bert_query_by_answer, pred_output_path + '/bert_query_by_answer.json')

bert_query_by_question_answer = r.get_bert_topk_preds(es_query_by_question_answer)
dump_to_json(bert_query_by_question_answer, pred_output_path + '/bert_query_by_question_answer.json')

bert_query_by_question_answer_concat = r.get_bert_topk_preds(es_query_by_question_answer_concat)
dump_to_json(bert_query_by_question_answer_concat, pred_output_path + '/bert_query_by_question_answer_concat.json')

2021-03-30 13:49:09 - Generating BERT top-k results ...
2021-03-30 13:49:19 - Use pytorch device: cuda
100%|██████████| 1183/1183 [14:49<00:00,  1.33it/s]
2021-03-30 14:04:11 - Generating BERT top-k results ...
2021-03-30 14:04:13 - Use pytorch device: cuda
100%|██████████| 1183/1183 [16:55<00:00,  1.17it/s]
2021-03-30 14:21:11 - Generating BERT top-k results ...
2021-03-30 14:21:13 - Use pytorch device: cuda
100%|██████████| 1183/1183 [16:02<00:00,  1.23it/s]
2021-03-30 14:37:18 - Generating BERT top-k results ...
2021-03-30 14:37:20 - Use pytorch device: cuda
100%|██████████| 1183/1183 [15:43<00:00,  1.25it/s]


**query_type="faq"; neg_type="simple"; loss_type='softmax'**

In [None]:
# define variables
query_type="faq"; neg_type="simple"; version="1.1"; loss_type='softmax'
bert_model_path='output/FAQIR/models/' + loss_type + '_' + neg_type + '_' + query_type + '_' + version

In [None]:
# create instance of ReRanker class
r = ReRanker(
    bert_model_path=bert_model_path, 
    test_queries=test_queries, relevance_label_df=relevance_label_df,
    rank_field=rank_field
)

In [None]:
# generate directory structure
pred_output_path = output_path + "/supervised/" + rank_field + "/" + loss_type + "/" + query_type + "/" + neg_type
make_dirs(pred_output_path)

# next, generate BERT, Re-ranked top-k results and dump to files
bert_query_by_question = r.get_bert_topk_preds(es_query_by_question)
dump_to_json(bert_query_by_question, pred_output_path + '/bert_query_by_question.json')

bert_query_by_answer = r.get_bert_topk_preds(es_query_by_answer)
dump_to_json(bert_query_by_answer, pred_output_path + '/bert_query_by_answer.json')

bert_query_by_question_answer = r.get_bert_topk_preds(es_query_by_question_answer)
dump_to_json(bert_query_by_question_answer, pred_output_path + '/bert_query_by_question_answer.json')

bert_query_by_question_answer_concat = r.get_bert_topk_preds(es_query_by_question_answer_concat)
dump_to_json(bert_query_by_question_answer_concat, pred_output_path + '/bert_query_by_question_answer_concat.json')

2021-03-30 14:53:06 - Generating BERT top-k results ...
2021-03-30 14:53:20 - Use pytorch device: cuda
100%|██████████| 1183/1183 [14:54<00:00,  1.32it/s]
2021-03-30 15:08:17 - Generating BERT top-k results ...
2021-03-30 15:08:19 - Use pytorch device: cuda
100%|██████████| 1183/1183 [16:54<00:00,  1.17it/s]
2021-03-30 15:25:17 - Generating BERT top-k results ...
2021-03-30 15:25:18 - Use pytorch device: cuda
100%|██████████| 1183/1183 [16:04<00:00,  1.23it/s]
2021-03-30 15:41:26 - Generating BERT top-k results ...
2021-03-30 15:41:28 - Use pytorch device: cuda
100%|██████████| 1183/1183 [15:37<00:00,  1.26it/s]


**2. Generating BERT prediction results from Question (BERT-Q-q)"**


In [12]:
# define rank_field parameter
rank_field="BERT-Q-q"

**query_type="user_query"; neg_type="hard"; loss_type='triplet'**

In [None]:
# define variables
query_type="user_query"; neg_type="hard"; version="1.1"; loss_type='triplet'
bert_model_path='output/FAQIR/models/' + loss_type + '_' + neg_type + '_' + query_type + '_' + version

In [None]:
# create instance of ReRanker class
r = ReRanker(
    bert_model_path=bert_model_path, 
    test_queries=test_queries, relevance_label_df=relevance_label_df,
    rank_field=rank_field
)

In [None]:
# generate directory structure
pred_output_path = output_path + "/supervised/" + rank_field + "/" + loss_type + "/" + query_type + "/" + neg_type
make_dirs(pred_output_path)

# next, generate BERT, Re-ranked top-k results and dump to files
bert_query_by_question = r.get_bert_topk_preds(es_query_by_question)
dump_to_json(bert_query_by_question, pred_output_path + '/bert_query_by_question.json')

bert_query_by_answer = r.get_bert_topk_preds(es_query_by_answer)
dump_to_json(bert_query_by_answer, pred_output_path + '/bert_query_by_answer.json')

bert_query_by_question_answer = r.get_bert_topk_preds(es_query_by_question_answer)
dump_to_json(bert_query_by_question_answer, pred_output_path + '/bert_query_by_question_answer.json')

bert_query_by_question_answer_concat = r.get_bert_topk_preds(es_query_by_question_answer_concat)
dump_to_json(bert_query_by_question_answer_concat, pred_output_path + '/bert_query_by_question_answer_concat.json')

2021-03-30 15:57:08 - Generating BERT top-k results ...
2021-03-30 15:57:08 - Load pretrained SentenceTransformer: output/FAQIR/models/triplet_hard_user_query_1.1
2021-03-30 15:57:08 - Load SentenceTransformer from folder: output/FAQIR/models/triplet_hard_user_query_1.1
2021-03-30 15:57:16 - Use pytorch device: cuda
100%|██████████| 1183/1183 [13:49<00:00,  1.43it/s]
2021-03-30 16:11:08 - Generating BERT top-k results ...
2021-03-30 16:11:08 - Load pretrained SentenceTransformer: output/FAQIR/models/triplet_hard_user_query_1.1
2021-03-30 16:11:08 - Load SentenceTransformer from folder: output/FAQIR/models/triplet_hard_user_query_1.1
2021-03-30 16:11:10 - Use pytorch device: cuda
100%|██████████| 1183/1183 [13:39<00:00,  1.44it/s]
2021-03-30 16:24:52 - Generating BERT top-k results ...
2021-03-30 16:24:52 - Load pretrained SentenceTransformer: output/FAQIR/models/triplet_hard_user_query_1.1
2021-03-30 16:24:52 - Load SentenceTransformer from folder: output/FAQIR/models/triplet_hard_user

**query_type="user_query"; neg_type="simple"; loss_type='triplet'**

In [None]:
# define variables
query_type="user_query"; neg_type="simple"; version="1.1"; loss_type='triplet'
bert_model_path='output/FAQIR/models/' + loss_type + '_' + neg_type + '_' + query_type + '_' + version

In [None]:
# create instance of ReRanker class
r = ReRanker(
    bert_model_path=bert_model_path, 
    test_queries=test_queries, relevance_label_df=relevance_label_df,
    rank_field=rank_field
)

In [None]:
# generate directory structure
pred_output_path = output_path + "/supervised/" + rank_field + "/" + loss_type + "/" + query_type + "/" + neg_type
make_dirs(pred_output_path)

# next, generate BERT, Re-ranked top-k results and dump to files
bert_query_by_question = r.get_bert_topk_preds(es_query_by_question)
dump_to_json(bert_query_by_question, pred_output_path + '/bert_query_by_question.json')

bert_query_by_answer = r.get_bert_topk_preds(es_query_by_answer)
dump_to_json(bert_query_by_answer, pred_output_path + '/bert_query_by_answer.json')

bert_query_by_question_answer = r.get_bert_topk_preds(es_query_by_question_answer)
dump_to_json(bert_query_by_question_answer, pred_output_path + '/bert_query_by_question_answer.json')

bert_query_by_question_answer_concat = r.get_bert_topk_preds(es_query_by_question_answer_concat)
dump_to_json(bert_query_by_question_answer_concat, pred_output_path + '/bert_query_by_question_answer_concat.json')

2021-03-30 16:52:29 - Generating BERT top-k results ...
2021-03-30 16:52:29 - Load pretrained SentenceTransformer: output/FAQIR/models/triplet_simple_user_query_1.1
2021-03-30 16:52:29 - Load SentenceTransformer from folder: output/FAQIR/models/triplet_simple_user_query_1.1
2021-03-30 16:52:30 - Use pytorch device: cuda
100%|██████████| 1183/1183 [13:50<00:00,  1.42it/s]
2021-03-30 17:06:23 - Generating BERT top-k results ...
2021-03-30 17:06:23 - Load pretrained SentenceTransformer: output/FAQIR/models/triplet_simple_user_query_1.1
2021-03-30 17:06:23 - Load SentenceTransformer from folder: output/FAQIR/models/triplet_simple_user_query_1.1
2021-03-30 17:06:25 - Use pytorch device: cuda
100%|██████████| 1183/1183 [14:00<00:00,  1.41it/s]
2021-03-30 17:20:27 - Generating BERT top-k results ...
2021-03-30 17:20:28 - Load pretrained SentenceTransformer: output/FAQIR/models/triplet_simple_user_query_1.1
2021-03-30 17:20:28 - Load SentenceTransformer from folder: output/FAQIR/models/triplet

**query_type="faq"; neg_type="hard"; loss_type='triplet'**

In [13]:
# define variables
query_type="faq"; neg_type="hard"; version="1.1"; loss_type='triplet'
bert_model_path='output/FAQIR/models/' + loss_type + '_' + neg_type + '_' + query_type + '_' + version

In [14]:
# create instance of ReRanker class
r = ReRanker(
    bert_model_path=bert_model_path, 
    test_queries=test_queries, relevance_label_df=relevance_label_df,
    rank_field=rank_field
)

In [15]:
# generate directory structure
pred_output_path = output_path + "/supervised/" + rank_field + "/" + loss_type + "/" + query_type + "/" + neg_type
make_dirs(pred_output_path)

# next, generate BERT, Re-ranked top-k results and dump to files
bert_query_by_question = r.get_bert_topk_preds(es_query_by_question)
dump_to_json(bert_query_by_question, pred_output_path + '/bert_query_by_question.json')

bert_query_by_answer = r.get_bert_topk_preds(es_query_by_answer)
dump_to_json(bert_query_by_answer, pred_output_path + '/bert_query_by_answer.json')

bert_query_by_question_answer = r.get_bert_topk_preds(es_query_by_question_answer)
dump_to_json(bert_query_by_question_answer, pred_output_path + '/bert_query_by_question_answer.json')

bert_query_by_question_answer_concat = r.get_bert_topk_preds(es_query_by_question_answer_concat)
dump_to_json(bert_query_by_question_answer_concat, pred_output_path + '/bert_query_by_question_answer_concat.json')

2021-03-31 05:21:57 - Generating BERT top-k results ...
2021-03-31 05:21:58 - Load pretrained SentenceTransformer: output/FAQIR/models/triplet_hard_faq_1.1
2021-03-31 05:21:58 - Load SentenceTransformer from folder: output/FAQIR/models/triplet_hard_faq_1.1
2021-03-31 05:22:07 - Use pytorch device: cuda
100%|██████████| 1183/1183 [14:14<00:00,  1.38it/s]
2021-03-31 05:36:24 - Generating BERT top-k results ...
2021-03-31 05:36:24 - Load pretrained SentenceTransformer: output/FAQIR/models/triplet_hard_faq_1.1
2021-03-31 05:36:24 - Load SentenceTransformer from folder: output/FAQIR/models/triplet_hard_faq_1.1
2021-03-31 05:36:25 - Use pytorch device: cuda
100%|██████████| 1183/1183 [13:29<00:00,  1.46it/s]
2021-03-31 05:49:57 - Generating BERT top-k results ...
2021-03-31 05:49:57 - Load pretrained SentenceTransformer: output/FAQIR/models/triplet_hard_faq_1.1
2021-03-31 05:49:57 - Load SentenceTransformer from folder: output/FAQIR/models/triplet_hard_faq_1.1
2021-03-31 05:49:58 - Use pytor

**query_type="faq"; neg_type="simple"; loss_type='triplet'**

In [16]:
# define variables
query_type="faq"; neg_type="simple"; version="1.1"; loss_type='triplet'
bert_model_path='output/FAQIR/models/' + loss_type + '_' + neg_type + '_' + query_type + '_' + version

In [17]:
# create instance of ReRanker class
r = ReRanker(
    bert_model_path=bert_model_path, 
    test_queries=test_queries, relevance_label_df=relevance_label_df,
    rank_field=rank_field
)

In [18]:
# generate directory structure
pred_output_path = output_path + "/supervised/" + rank_field + "/" + loss_type + "/" + query_type + "/" + neg_type
make_dirs(pred_output_path)

# next, generate BERT, Re-ranked top-k results and dump to files
bert_query_by_question = r.get_bert_topk_preds(es_query_by_question)
dump_to_json(bert_query_by_question, pred_output_path + '/bert_query_by_question.json')

bert_query_by_answer = r.get_bert_topk_preds(es_query_by_answer)
dump_to_json(bert_query_by_answer, pred_output_path + '/bert_query_by_answer.json')

bert_query_by_question_answer = r.get_bert_topk_preds(es_query_by_question_answer)
dump_to_json(bert_query_by_question_answer, pred_output_path + '/bert_query_by_question_answer.json')

bert_query_by_question_answer_concat = r.get_bert_topk_preds(es_query_by_question_answer_concat)
dump_to_json(bert_query_by_question_answer_concat, pred_output_path + '/bert_query_by_question_answer_concat.json')

2021-03-31 06:17:08 - Generating BERT top-k results ...
2021-03-31 06:17:08 - Load pretrained SentenceTransformer: output/FAQIR/models/triplet_simple_faq_1.1
2021-03-31 06:17:08 - Load SentenceTransformer from folder: output/FAQIR/models/triplet_simple_faq_1.1
2021-03-31 06:17:16 - Use pytorch device: cuda
100%|██████████| 1183/1183 [13:29<00:00,  1.46it/s]
2021-03-31 06:30:48 - Generating BERT top-k results ...
2021-03-31 06:30:48 - Load pretrained SentenceTransformer: output/FAQIR/models/triplet_simple_faq_1.1
2021-03-31 06:30:48 - Load SentenceTransformer from folder: output/FAQIR/models/triplet_simple_faq_1.1
2021-03-31 06:30:49 - Use pytorch device: cuda
100%|██████████| 1183/1183 [13:28<00:00,  1.46it/s]
2021-03-31 06:44:20 - Generating BERT top-k results ...
2021-03-31 06:44:20 - Load pretrained SentenceTransformer: output/FAQIR/models/triplet_simple_faq_1.1
2021-03-31 06:44:20 - Load SentenceTransformer from folder: output/FAQIR/models/triplet_simple_faq_1.1
2021-03-31 06:44:21

**query_type="user_query"; neg_type="hard"; loss_type='softmax'**

In [19]:
# define variables
query_type="user_query"; neg_type="hard"; version="1.1"; loss_type='softmax'
bert_model_path='output/FAQIR/models/' + loss_type + '_' + neg_type + '_' + query_type + '_' + version

In [20]:
# create instance of ReRanker class
r = ReRanker(
    bert_model_path=bert_model_path, 
    test_queries=test_queries, relevance_label_df=relevance_label_df,
    rank_field=rank_field
)

In [21]:
# generate directory structure
pred_output_path = output_path + "/supervised/" + rank_field + "/" + loss_type + "/" + query_type + "/" + neg_type
make_dirs(pred_output_path)

# next, generate BERT, Re-ranked top-k results and dump to files
bert_query_by_question = r.get_bert_topk_preds(es_query_by_question)
dump_to_json(bert_query_by_question, pred_output_path + '/bert_query_by_question.json')

bert_query_by_answer = r.get_bert_topk_preds(es_query_by_answer)
dump_to_json(bert_query_by_answer, pred_output_path + '/bert_query_by_answer.json')

bert_query_by_question_answer = r.get_bert_topk_preds(es_query_by_question_answer)
dump_to_json(bert_query_by_question_answer, pred_output_path + '/bert_query_by_question_answer.json')

bert_query_by_question_answer_concat = r.get_bert_topk_preds(es_query_by_question_answer_concat)
dump_to_json(bert_query_by_question_answer_concat, pred_output_path + '/bert_query_by_question_answer_concat.json')

2021-03-31 07:11:32 - Generating BERT top-k results ...
2021-03-31 07:11:53 - Use pytorch device: cuda
100%|██████████| 1183/1183 [12:52<00:00,  1.53it/s]
2021-03-31 07:24:48 - Generating BERT top-k results ...
2021-03-31 07:24:50 - Use pytorch device: cuda
100%|██████████| 1183/1183 [12:52<00:00,  1.53it/s]
2021-03-31 07:37:45 - Generating BERT top-k results ...
2021-03-31 07:37:47 - Use pytorch device: cuda
100%|██████████| 1183/1183 [12:53<00:00,  1.53it/s]
2021-03-31 07:50:43 - Generating BERT top-k results ...
2021-03-31 07:50:45 - Use pytorch device: cuda
100%|██████████| 1183/1183 [12:55<00:00,  1.53it/s]


**query_type="user_query"; neg_type="simple"; loss_type='softmax'**

In [22]:
# define variables
query_type="user_query"; neg_type="simple"; version="1.1"; loss_type='softmax'
bert_model_path='output/FAQIR/models/' + loss_type + '_' + neg_type + '_' + query_type + '_' + version

In [23]:
# create instance of ReRanker class
r = ReRanker(
    bert_model_path=bert_model_path, 
    test_queries=test_queries, relevance_label_df=relevance_label_df,
    rank_field=rank_field
)

In [24]:
# generate directory structure
pred_output_path = output_path + "/supervised/" + rank_field + "/" + loss_type + "/" + query_type + "/" + neg_type
make_dirs(pred_output_path)

# next, generate BERT, Re-ranked top-k results and dump to files
bert_query_by_question = r.get_bert_topk_preds(es_query_by_question)
dump_to_json(bert_query_by_question, pred_output_path + '/bert_query_by_question.json')

bert_query_by_answer = r.get_bert_topk_preds(es_query_by_answer)
dump_to_json(bert_query_by_answer, pred_output_path + '/bert_query_by_answer.json')

bert_query_by_question_answer = r.get_bert_topk_preds(es_query_by_question_answer)
dump_to_json(bert_query_by_question_answer, pred_output_path + '/bert_query_by_question_answer.json')

bert_query_by_question_answer_concat = r.get_bert_topk_preds(es_query_by_question_answer_concat)
dump_to_json(bert_query_by_question_answer_concat, pred_output_path + '/bert_query_by_question_answer_concat.json')

2021-03-31 08:03:42 - Generating BERT top-k results ...
2021-03-31 08:03:52 - Use pytorch device: cuda
100%|██████████| 1183/1183 [12:57<00:00,  1.52it/s]
2021-03-31 08:16:52 - Generating BERT top-k results ...
2021-03-31 08:16:54 - Use pytorch device: cuda
100%|██████████| 1183/1183 [12:50<00:00,  1.54it/s]
2021-03-31 08:29:47 - Generating BERT top-k results ...
2021-03-31 08:29:49 - Use pytorch device: cuda
100%|██████████| 1183/1183 [12:49<00:00,  1.54it/s]
2021-03-31 08:42:40 - Generating BERT top-k results ...
2021-03-31 08:42:42 - Use pytorch device: cuda
100%|██████████| 1183/1183 [12:47<00:00,  1.54it/s]


**query_type="faq"; neg_type="hard"; loss_type='softmax'**

In [25]:
# define variables
query_type="faq"; neg_type="hard"; version="1.1"; loss_type='softmax'
bert_model_path='output/FAQIR/models/' + loss_type + '_' + neg_type + '_' + query_type + '_' + version

In [26]:
# create instance of ReRanker class
r = ReRanker(
    bert_model_path=bert_model_path, 
    test_queries=test_queries, relevance_label_df=relevance_label_df,
    rank_field=rank_field
)

In [27]:
# generate directory structure
pred_output_path = output_path + "/supervised/" + rank_field + "/" + loss_type + "/" + query_type + "/" + neg_type
make_dirs(pred_output_path)

# next, generate BERT, Re-ranked top-k results and dump to files
bert_query_by_question = r.get_bert_topk_preds(es_query_by_question)
dump_to_json(bert_query_by_question, pred_output_path + '/bert_query_by_question.json')

bert_query_by_answer = r.get_bert_topk_preds(es_query_by_answer)
dump_to_json(bert_query_by_answer, pred_output_path + '/bert_query_by_answer.json')

bert_query_by_question_answer = r.get_bert_topk_preds(es_query_by_question_answer)
dump_to_json(bert_query_by_question_answer, pred_output_path + '/bert_query_by_question_answer.json')

bert_query_by_question_answer_concat = r.get_bert_topk_preds(es_query_by_question_answer_concat)
dump_to_json(bert_query_by_question_answer_concat, pred_output_path + '/bert_query_by_question_answer_concat.json')

2021-03-31 08:55:32 - Generating BERT top-k results ...
2021-03-31 08:55:43 - Use pytorch device: cuda
100%|██████████| 1183/1183 [12:46<00:00,  1.54it/s]
2021-03-31 09:08:32 - Generating BERT top-k results ...
2021-03-31 09:08:34 - Use pytorch device: cuda
100%|██████████| 1183/1183 [12:48<00:00,  1.54it/s]
2021-03-31 09:21:25 - Generating BERT top-k results ...
2021-03-31 09:21:27 - Use pytorch device: cuda
100%|██████████| 1183/1183 [12:47<00:00,  1.54it/s]
2021-03-31 09:34:17 - Generating BERT top-k results ...
2021-03-31 09:34:19 - Use pytorch device: cuda
100%|██████████| 1183/1183 [12:46<00:00,  1.54it/s]


**query_type="faq"; neg_type="simple"; loss_type='softmax'**

In [28]:
# define variables
query_type="faq"; neg_type="simple"; version="1.1"; loss_type='softmax'
bert_model_path='output/FAQIR/models/' + loss_type + '_' + neg_type + '_' + query_type + '_' + version

In [29]:
# create instance of ReRanker class
r = ReRanker(
    bert_model_path=bert_model_path, 
    test_queries=test_queries, relevance_label_df=relevance_label_df,
    rank_field=rank_field
)

In [30]:
# generate directory structure
pred_output_path = output_path + "/supervised/" + rank_field + "/" + loss_type + "/" + query_type + "/" + neg_type
make_dirs(pred_output_path)

# next, generate BERT, Re-ranked top-k results and dump to files
bert_query_by_question = r.get_bert_topk_preds(es_query_by_question)
dump_to_json(bert_query_by_question, pred_output_path + '/bert_query_by_question.json')

bert_query_by_answer = r.get_bert_topk_preds(es_query_by_answer)
dump_to_json(bert_query_by_answer, pred_output_path + '/bert_query_by_answer.json')

bert_query_by_question_answer = r.get_bert_topk_preds(es_query_by_question_answer)
dump_to_json(bert_query_by_question_answer, pred_output_path + '/bert_query_by_question_answer.json')

bert_query_by_question_answer_concat = r.get_bert_topk_preds(es_query_by_question_answer_concat)
dump_to_json(bert_query_by_question_answer_concat, pred_output_path + '/bert_query_by_question_answer_concat.json')

2021-03-31 09:47:08 - Generating BERT top-k results ...
2021-03-31 09:47:23 - Use pytorch device: cuda
100%|██████████| 1183/1183 [12:48<00:00,  1.54it/s]
2021-03-31 10:00:14 - Generating BERT top-k results ...
2021-03-31 10:00:16 - Use pytorch device: cuda
100%|██████████| 1183/1183 [12:51<00:00,  1.53it/s]
2021-03-31 10:13:10 - Generating BERT top-k results ...
2021-03-31 10:13:12 - Use pytorch device: cuda
100%|██████████| 1183/1183 [12:51<00:00,  1.53it/s]
2021-03-31 10:26:06 - Generating BERT top-k results ...
2021-03-31 10:26:08 - Use pytorch device: cuda
100%|██████████| 1183/1183 [12:53<00:00,  1.53it/s]
