# BERT Meets Cranfield - Regression Approach
instead of classifing the lot we can also do regression. The trick is to assign a good conversion table in the first place.

## Environement Setup
Connect to Drive by clicking on the icon, this only works when this colab is generated via the Drive menu

In [1]:
# %cd /content/drive/MyDrive/COMPUTING SCIENCE/THESIS_PROJECT/bert-meets-cranfield-regression/Code
%cd /home/jupyter/BERT-BM25-Thesis-Project/bert-meets-cranfield-regression/Code

/home/jupyter/BERT-BM25-Thesis-Project/bert-meets-cranfield-regression/Code


In [2]:
!pip3 install -r ../requirements.txt



In [3]:
import utils
import data_utils
from operator import itemgetter
import os
import numpy as np

import torch
import importlib
import timeit
# from transformers import BertForSequenceClassification, BertTokenizer, BertForMaskedLM, BertForNextSentencePrediction


In [4]:
# call after making any changes in utils.py
importlib.reload(utils) 
importlib.reload(data_utils) 

<module 'data_utils' from '/home/jupyter/BERT-BM25-Thesis-Project/bert-meets-cranfield-regression/Code/data_utils.py'>

## Notes
The following changes apply:
 * labels have been replaced with the regression_labels
 * the prepare model routine now works with a `num_labels=1` setting the taks head in regression mode

In [5]:
# import utils
# import data_utils
# from operator import itemgetter
# import os

# ========================================
#               Hyper-Parameters
# ========================================
SEED = 76
MODE = 'Re-ranker'
MODEL_TYPE = 'bert-base-uncased'
LEARNING_RATE = 2e-5
MAX_LENGTH = 128
BATCH_SIZE = 32
EPOCHS = 1
TOP_BM25 = 100
MAP_CUT = 100
NDCG_CUT = 20
if MODE == 'Full-ranker':
    TEST_BATCH_SIZE = 1400
else:
    TEST_BATCH_SIZE = 100

CONVERSION_TABLE = [[0, 1, 2, 3, 4],[0, 1, 1, 1, 1]] 

# Set the seed value all over the place to make this reproducible.
utils.initialize_random_generators(SEED)

# if __name__ == "__main__":
def train_test():
    print("# ========================================")
    print("#               Hyper-Parameters")
    print(MODE)
    print(MODEL_TYPE)
    print(LEARNING_RATE)
    print(MAX_LENGTH)
    print(BATCH_SIZE)
    print(EPOCHS)
    print("# ========================================")
    print("#               Experiment-Settings")
    print("CONVERSION TABLE:")
    print(CONVERSION_TABLE )
    print("# ========================================")

    start = timeit.default_timer()

    device = utils.get_gpu_device()
    if not os.path.exists('../Output_Folder'):
        os.makedirs('../Output_Folder')

    queries = data_utils.get_queries('../Data/cran/cran.qry')
    corpus = data_utils.get_corpus('../Data/cran/cran.all.1400')
    rel_fed = data_utils.get_judgments('../Data/cran/cranqrel')

    # labels = utils.get_binary_labels(rel_fed)

    # get labels for the regression input
    regression_labels = utils.get_regression_labels(rel_fed, CONVERSION_TABLE)

    tokenized_corpus = [doc.split(" ") for doc in corpus]
    tokenized_queries = [query.split(" ") for query in queries]

    bm25, bm25_top_n = utils.get_bm25_top_results(tokenized_corpus, tokenized_queries, TOP_BM25)

    padded_all, attention_mask_all, token_type_ids_all, temp_feedback = utils.bert_tokenizer(MODE, bm25_top_n, corpus,
                                                                                            #  labels, queries,
                                                                                             regression_labels, queries,
                                                                                             MAX_LENGTH, MODEL_TYPE)

    # ========================================
    #               Folds
    # ========================================
    mrr_bm25_list, map_bm25_list, ndcg_bm25_list = [], [], []
    mrr_bert_list, map_bert_list, ndcg_bert_list = [], [], []
    mrr_bm25, map_bm25, ndcg_bm25 = 0, 0, 0
    mrr_bert, map_bert, ndcg_bert = 0, 0, 0

    for fold_number in range(1, 6):
        print('======== Fold {:} / {:} ========'.format(fold_number, 5))
        train_index, test_index = data_utils.load_fold(fold_number)

        padded, attention_mask, token_type_ids = [], [], []
        if MODE == 'Re-ranker':
            padded, attention_mask, token_type_ids = padded_all, attention_mask_all, token_type_ids_all
        else:
            temp_feedback = []
            for query_num in range(0, len(bm25_top_n)):
                if query_num in test_index:
                    doc_nums = range(0, 1400)
                else:
                    doc_nums = bm25_top_n[query_num]
                padded.append(list(itemgetter(*doc_nums)(padded_all[query_num])))
                attention_mask.append(list(itemgetter(*doc_nums)(attention_mask_all[query_num])))
                token_type_ids.append(list(itemgetter(*doc_nums)(token_type_ids_all[query_num])))
                temp_feedback.append(list(itemgetter(*doc_nums)(regression_labels[query_num])))
                # temp_feedback.append(list(itemgetter(*doc_nums)(labels[query_num])))

        train_dataset = data_utils.get_tensor_dataset(train_index, padded, attention_mask, token_type_ids,
                                                      temp_feedback)
        test_dataset = data_utils.get_tensor_dataset(test_index, padded, attention_mask, token_type_ids, temp_feedback)

        mrr_bm25, map_bm25, ndcg_bm25, mrr_bm25_list, map_bm25_list, ndcg_bm25_list = utils.get_bm25_results(
            mrr_bm25_list, map_bm25_list, ndcg_bm25_list, test_index, tokenized_queries, bm25, mrr_bm25, map_bm25,
            ndcg_bm25, rel_fed, fold_number, MAP_CUT, NDCG_CUT)

        train_dataloader, test_dataloader, model, optimizer, scheduler = utils.model_preparation(MODEL_TYPE, train_dataset,
                                                                                                 test_dataset,
                                                                                                 BATCH_SIZE, TEST_BATCH_SIZE,
                                                                                                 LEARNING_RATE, EPOCHS)
        # ========================================
        #               Training Loop
        # ========================================
        epochs_train_loss, epochs_val_loss = [], []
        for epoch_i in range(0, EPOCHS):
            # ========================================
            #               Training
            # ========================================
            print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, EPOCHS))
            print('Training...')
            model, optimizer, scheduler = utils.training(model, train_dataloader, device, optimizer, scheduler)
        # ========================================
        #               Testing
        # ========================================
        print('Testing...')
        mrr_bert, map_bert, ndcg_bert, mrr_bert_list, map_bert_list, ndcg_bert_list = utils.testing(MODE, model,
                                                                                                    test_dataloader,
                                                                                                    device, test_index,
                                                                                                    bm25_top_n,
                                                                                                    mrr_bert_list,
                                                                                                    map_bert_list,
                                                                                                    ndcg_bert_list,
                                                                                                    mrr_bert, map_bert,
                                                                                                    ndcg_bert, rel_fed,
                                                                                                    fold_number,
                                                                                                    MAP_CUT, NDCG_CUT)
    print("  BM25 MRR:  " + "{:.4f}".format(mrr_bm25 / 5))
    print("  BM25 MAP:  " + "{:.4f}".format(map_bm25 / 5))
    print("  BM25 NDCG: " + "{:.4f}".format(ndcg_bm25 / 5))

    print("  BERT MRR:  " + "{:.4f}".format(mrr_bert / 5))
    print("  BERT MAP:  " + "{:.4f}".format(map_bert / 5))
    print("  BERT NDCG: " + "{:.4f}".format(ndcg_bert / 5))

    utils.t_test(mrr_bm25_list, mrr_bert_list, 'MRR')
    utils.t_test(map_bm25_list, map_bert_list, 'MAP')
    utils.t_test(ndcg_bm25_list, ndcg_bert_list, 'NDCG')
   
    stop = timeit.default_timer()
    wall_time = (stop - start) / 60 

    print('Time: ', wall_time, ' min')

    # utils.results_to_csv('./mrr_bm25_list.csv', mrr_bm25_list)
    # utils.results_to_csv('./mrr_bert_list.csv', mrr_bert_list)
    # utils.results_to_csv('./map_bm25_list.csv', map_bm25_list)
    # utils.results_to_csv('./map_bert_list.csv', map_bert_list)
    # utils.results_to_csv('./ndcg_bm25_list.csv', ndcg_bm25_list)
    # utils.results_to_csv('./ndcg_bert_list.csv', ndcg_bert_list)


# Testing part
note that the `lr=2e-05` and `EPOCHS=1` already have been done

In [6]:
EPOCHS = 2 
train_test()

#               Hyper-Parameters
Re-ranker
bert-base-uncased
2e-05
128
32
2
#               Experiment-Settings
CONVERSION TABLE:
[[0, 1, 2, 3, 4], [0, 1, 1, 1, 1]]
GPU Type: Tesla T4




KeyboardInterrupt: 

In [7]:
LEARNING_RATE = 3e-5
EPOCHS = 1
train_test()

#               Hyper-Parameters
Re-ranker
bert-base-uncased
3e-05
128
32
1
#               Experiment-Settings
CONVERSION TABLE:
[[0, 1, 2, 3, 4], [0, 1, 1, 1, 1]]
GPU Type: Tesla T4
MRR:  0.7837
MAP:  0.3493
NDCG: 0.5011
45


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0426
Testing...
  Test MRR:  0.8180
  Test MAP:  0.4082
  Test NDCG: 0.5464
45
MRR:  0.6596
MAP:  0.3036
NDCG: 0.4546
90


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0464
Testing...
  Test MRR:  0.7133
  Test MAP:  0.3477
  Test NDCG: 0.4975
90
MRR:  0.7611
MAP:  0.3341
NDCG: 0.4826
135


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0421
Testing...
  Test MRR:  0.8616
  Test MAP:  0.4389
  Test NDCG: 0.5707
135
MRR:  0.6859
MAP:  0.3317
NDCG: 0.4408
180


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0434
Testing...
  Test MRR:  0.7447
  Test MAP:  0.3972
  Test NDCG: 0.5004
180
MRR:  0.7796
MAP:  0.3182
NDCG: 0.4780
225


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0521
Testing...
  Test MRR:  0.8139
  Test MAP:  0.3880
  Test NDCG: 0.5381
225
  BM25 MRR:  0.7340
  BM25 MAP:  0.3274
  BM25 NDCG: 0.4714
  BERT MRR:  0.7903
  BERT MAP:  0.3960
  BERT NDCG: 0.5306
p-value MRR: 0.1024
p-value MAP: 0.0050
p-value NDCG: 0.0191
Time:  39.045717645983316  min


In [6]:
LEARNING_RATE = 3e-5
EPOCHS = 2
train_test()

#               Hyper-Parameters
Re-ranker
bert-base-uncased
3e-05
128
32
2
#               Experiment-Settings
CONVERSION TABLE:
[[0, 1, 2, 3, 4], [0, 1, 1, 1, 1]]
GPU Type: Tesla T4




MRR:  0.7837
MAP:  0.3493
NDCG: 0.5011
45


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0469
Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0305
Testing...
  Test MRR:  0.8556
  Test MAP:  0.4313
  Test NDCG: 0.5792
45
MRR:  0.6596
MAP:  0.3036
NDCG: 0.4546
90


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0530
Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0329
Testing...
  Test MRR:  0.7278
  Test MAP:  0.3646
  Test NDCG: 0.5131
90
MRR:  0.7611
MAP:  0.3341
NDCG: 0.4826
135


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0444
Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0298
Testing...
  Test MRR:  0.8356
  Test MAP:  0.4424
  Test NDCG: 0.5812
135
MRR:  0.6859
MAP:  0.3317
NDCG: 0.4408
180


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0451
Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0307
Testing...
  Test MRR:  0.7500
  Test MAP:  0.3954
  Test NDCG: 0.4972
180
MRR:  0.7796
MAP:  0.3182
NDCG: 0.4780
225


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0421
Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0296
Testing...
  Test MRR:  0.8416
  Test MAP:  0.4304
  Test NDCG: 0.5768
225
  BM25 MRR:  0.7340
  BM25 MAP:  0.3274
  BM25 NDCG: 0.4714
  BERT MRR:  0.8021
  BERT MAP:  0.4128
  BERT NDCG: 0.5495
p-value MRR: 0.0473
p-value MAP: 0.0006
p-value NDCG: 0.0021
Time:  73.21226674481667  min


In [6]:
CONVERSION_TABLE = [[0, 1, 2, 3, 4],[-0.5, 1, 0.8, 0.6, 0.3]] 

In [8]:
LEARNING_RATE = 2e-5
EPOCHS = 2
train_test()

#               Hyper-Parameters
Re-ranker
bert-base-uncased
2e-05
128
32
2
#               Experiment-Settings
CONVERSION TABLE:
[[0, 1, 2, 3, 4], [-0.5, 1, 0.8, 0.6, 0.3]]
GPU Type: Tesla T4
MRR:  0.7837
MAP:  0.3493
NDCG: 0.5011
45


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0244
Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0169
Testing...
  Test MRR:  0.8654
  Test MAP:  0.4227
  Test NDCG: 0.5767
45
MRR:  0.6596
MAP:  0.3036
NDCG: 0.4546
90


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0241
Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0165
Testing...
  Test MRR:  0.7182
  Test MAP:  0.3660
  Test NDCG: 0.5101
90
MRR:  0.7611
MAP:  0.3341
NDCG: 0.4826
135


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0255
Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0164
Testing...
  Test MRR:  0.8740
  Test MAP:  0.4556
  Test NDCG: 0.5880
135
MRR:  0.6859
MAP:  0.3317
NDCG: 0.4408
180


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0242
Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0161
Testing...
  Test MRR:  0.7493
  Test MAP:  0.3947
  Test NDCG: 0.5053
180
MRR:  0.7796
MAP:  0.3182
NDCG: 0.4780
225


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0221
Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0153
Testing...
  Test MRR:  0.8319
  Test MAP:  0.4294
  Test NDCG: 0.5835
225
  BM25 MRR:  0.7340
  BM25 MAP:  0.3274
  BM25 NDCG: 0.4714
  BERT MRR:  0.8077
  BERT MAP:  0.4137
  BERT NDCG: 0.5527
p-value MRR: 0.0327
p-value MAP: 0.0005
p-value NDCG: 0.0014
Time:  72.69267198805001  min


In [7]:
LEARNING_RATE = 3e-5
EPOCHS = 1
train_test()

#               Hyper-Parameters
Re-ranker
bert-base-uncased
3e-05
128
32
1
#               Experiment-Settings
CONVERSION TABLE:
[[0, 1, 2, 3, 4], [-0.5, 1, 0.8, 0.6, 0.3]]
GPU Type: Tesla T4




MRR:  0.7837
MAP:  0.3493
NDCG: 0.5011
45


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0268
Testing...
  Test MRR:  0.8356
  Test MAP:  0.4171
  Test NDCG: 0.5512
45
MRR:  0.6596
MAP:  0.3036
NDCG: 0.4546
90


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0294
Testing...
  Test MRR:  0.6822
  Test MAP:  0.3391
  Test NDCG: 0.4753
90
MRR:  0.7611
MAP:  0.3341
NDCG: 0.4826
135


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0243
Testing...
  Test MRR:  0.8565
  Test MAP:  0.4465
  Test NDCG: 0.5853
135
MRR:  0.6859
MAP:  0.3317
NDCG: 0.4408
180


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0247
Testing...
  Test MRR:  0.7331
  Test MAP:  0.3900
  Test NDCG: 0.4976
180
MRR:  0.7796
MAP:  0.3182
NDCG: 0.4780
225


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0222
Testing...
  Test MRR:  0.8142
  Test MAP:  0.4079
  Test NDCG: 0.5694
225
  BM25 MRR:  0.7340
  BM25 MAP:  0.3274
  BM25 NDCG: 0.4714
  BERT MRR:  0.7843
  BERT MAP:  0.4001
  BERT NDCG: 0.5358
p-value MRR: 0.1481
p-value MAP: 0.0031
p-value NDCG: 0.0107
Time:  39.038887666966666  min


In [8]:
LEARNING_RATE = 3e-5
EPOCHS = 2
train_test()

#               Hyper-Parameters
Re-ranker
bert-base-uncased
3e-05
128
32
2
#               Experiment-Settings
CONVERSION TABLE:
[[0, 1, 2, 3, 4], [-0.5, 1, 0.8, 0.6, 0.3]]
GPU Type: Tesla T4
MRR:  0.7837
MAP:  0.3493
NDCG: 0.5011
45


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0275
Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0150
Testing...
  Test MRR:  0.8552
  Test MAP:  0.4213
  Test NDCG: 0.5706
45
MRR:  0.6596
MAP:  0.3036
NDCG: 0.4546
90


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0246
Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0160
Testing...
  Test MRR:  0.7090
  Test MAP:  0.3536
  Test NDCG: 0.5030
90
MRR:  0.7611
MAP:  0.3341
NDCG: 0.4826
135


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0251
Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0148
Testing...
  Test MRR:  0.8237
  Test MAP:  0.4400
  Test NDCG: 0.5748
135
MRR:  0.6859
MAP:  0.3317
NDCG: 0.4408
180


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0256
Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0153
Testing...
  Test MRR:  0.7712
  Test MAP:  0.3929
  Test NDCG: 0.4975
180
MRR:  0.7796
MAP:  0.3182
NDCG: 0.4780
225


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0244
Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0161
Testing...
  Test MRR:  0.8278
  Test MAP:  0.4281
  Test NDCG: 0.5853
225
  BM25 MRR:  0.7340
  BM25 MAP:  0.3274
  BM25 NDCG: 0.4714
  BERT MRR:  0.7974
  BERT MAP:  0.4072
  BERT NDCG: 0.5462
p-value MRR: 0.0656
p-value MAP: 0.0012
p-value NDCG: 0.0030
Time:  72.77926726286668  min


In [7]:
CONVERSION_TABLE = [[0, 1, 2, 3, 4],[-1, 1, 0.7, 0.53, 0.4]] 

In [10]:
LEARNING_RATE = 2e-5
EPOCHS = 2
train_test()

#               Hyper-Parameters
Re-ranker
bert-base-uncased
2e-05
128
32
2
#               Experiment-Settings
CONVERSION TABLE:
[[0, 1, 2, 3, 4], [-1, 1, 0.7, 0.53, 0.4]]
GPU Type: Tesla T4
MRR:  0.7837
MAP:  0.3493
NDCG: 0.5011
45


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0214
Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0145
Testing...
  Test MRR:  0.8569
  Test MAP:  0.4239
  Test NDCG: 0.5736
45
MRR:  0.6596
MAP:  0.3036
NDCG: 0.4546
90


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0214
Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0138
Testing...
  Test MRR:  0.7030
  Test MAP:  0.3570
  Test NDCG: 0.5040
90
MRR:  0.7611
MAP:  0.3341
NDCG: 0.4826
135


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0234
Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0151
Testing...
  Test MRR:  0.8875
  Test MAP:  0.4480
  Test NDCG: 0.5906
135
MRR:  0.6859
MAP:  0.3317
NDCG: 0.4408
180


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0232
Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0146
Testing...
  Test MRR:  0.7459
  Test MAP:  0.4036
  Test NDCG: 0.5005
180
MRR:  0.7796
MAP:  0.3182
NDCG: 0.4780
225


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0218
Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0132
Testing...
  Test MRR:  0.7923
  Test MAP:  0.4143
  Test NDCG: 0.5717
225
  BM25 MRR:  0.7340
  BM25 MAP:  0.3274
  BM25 NDCG: 0.4714
  BERT MRR:  0.7971
  BERT MAP:  0.4094
  BERT NDCG: 0.5481
p-value MRR: 0.0684
p-value MAP: 0.0009
p-value NDCG: 0.0024
Time:  72.76701461618335  min


In [11]:
LEARNING_RATE = 3e-5
EPOCHS = 1
train_test()

#               Hyper-Parameters
Re-ranker
bert-base-uncased
3e-05
128
32
1
#               Experiment-Settings
CONVERSION TABLE:
[[0, 1, 2, 3, 4], [-1, 1, 0.7, 0.53, 0.4]]
GPU Type: Tesla T4
MRR:  0.7837
MAP:  0.3493
NDCG: 0.5011
45


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0245
Testing...
  Test MRR:  0.8295
  Test MAP:  0.4095
  Test NDCG: 0.5492
45
MRR:  0.6596
MAP:  0.3036
NDCG: 0.4546
90


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0210
Testing...
  Test MRR:  0.7190
  Test MAP:  0.3578
  Test NDCG: 0.4972
90
MRR:  0.7611
MAP:  0.3341
NDCG: 0.4826
135


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0212
Testing...
  Test MRR:  0.8432
  Test MAP:  0.4360
  Test NDCG: 0.5728
135
MRR:  0.6859
MAP:  0.3317
NDCG: 0.4408
180


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0212
Testing...
  Test MRR:  0.7538
  Test MAP:  0.3908
  Test NDCG: 0.4912
180
MRR:  0.7796
MAP:  0.3182
NDCG: 0.4780
225


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0213
Testing...
  Test MRR:  0.8354
  Test MAP:  0.4135
  Test NDCG: 0.5646
225
  BM25 MRR:  0.7340
  BM25 MAP:  0.3274
  BM25 NDCG: 0.4714
  BERT MRR:  0.7962
  BERT MAP:  0.4015
  BERT NDCG: 0.5350
p-value MRR: 0.0727
p-value MAP: 0.0027
p-value NDCG: 0.0121
Time:  38.99887011243333  min


In [8]:

LEARNING_RATE = 3e-5
EPOCHS = 2
train_test()

#               Hyper-Parameters
Re-ranker
bert-base-uncased
3e-05
128
32
2
#               Experiment-Settings
CONVERSION TABLE:
[[0, 1, 2, 3, 4], [-1, 1, 0.7, 0.53, 0.4]]
GPU Type: Tesla T4
MRR:  0.7837
MAP:  0.3493
NDCG: 0.5011
45


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0252
Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0147
Testing...
  Test MRR:  0.8732
  Test MAP:  0.4336
  Test NDCG: 0.5821
45
MRR:  0.6596
MAP:  0.3036
NDCG: 0.4546
90


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0287
Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0148
Testing...
  Test MRR:  0.6991
  Test MAP:  0.3555
  Test NDCG: 0.5051
90
MRR:  0.7611
MAP:  0.3341
NDCG: 0.4826
135


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0221
Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0138
Testing...
  Test MRR:  0.8631
  Test MAP:  0.4480
  Test NDCG: 0.5818
135
MRR:  0.6859
MAP:  0.3317
NDCG: 0.4408
180


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0231
Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0142
Testing...
  Test MRR:  0.7644
  Test MAP:  0.3987
  Test NDCG: 0.5014
180
MRR:  0.7796
MAP:  0.3182
NDCG: 0.4780
225


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0209
Training...
  Batch   100  of    563.
  Batch   200  of    563.
  Batch   300  of    563.
  Batch   400  of    563.
  Batch   500  of    563.
  Average training loss: 0.0138
Testing...
  Test MRR:  0.8126
  Test MAP:  0.4384
  Test NDCG: 0.5805
225
  BM25 MRR:  0.7340
  BM25 MAP:  0.3274
  BM25 NDCG: 0.4714
  BERT MRR:  0.8025
  BERT MAP:  0.4148
  BERT NDCG: 0.5502
p-value MRR: 0.0469
p-value MAP: 0.0005
p-value NDCG: 0.0020
Time:  82.16132233031666  min
