<a href="https://colab.research.google.com/github/Prakhar314/COL764-Project/blob/main/notebooks/pointwise_evaluate.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install transformers tensorflow-addons



In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


### Model

In [3]:
import transformers
from transformers import BertTokenizer, BertConfig, TFBertModel
import tensorflow as tf
import tensorflow_addons as tfa

In [4]:
bert_model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(bert_model_name)

#### Pointwise

In [5]:
def create_inputs(num_nodes,name):
  layers = []
  for layer_name in ['input_ids','token_type_ids','attention_mask']:
    layers.append(tf.keras.layers.Input(shape=(num_nodes,),dtype=tf.int32,name=layer_name))
  return layers

def create_model_pointwise(output_bias=None):
    if output_bias is not None:
      output_bias = tf.keras.initializers.Constant(output_bias)
    config = BertConfig(hidden_dropout_prob=0.1)
    bert = TFBertModel.from_pretrained(bert_model_name,config=config)
    bert.save_pretrained('./bert-model/')
    for layer in bert.layers[:]:
      if isinstance(layer, transformers.models.bert.modeling_tf_bert.TFBertMainLayer):
        layer.embeddings.trainable=False
        layer.pooler.trainable=False
        for idx, layer in enumerate(layer.encoder.layer):
            # print(layer)
            # freeze first 10
            if idx in range(8):
                layer.trainable = False
      else:
        layer.trainable = False
              
    input_layer = create_inputs(512,'pair')
    bert_out = bert(input_layer).last_hidden_state
    cls = tf.keras.layers.Lambda(lambda x:x[:,0,:])(bert_out)
    # print(avg_q.shape)
    output = tf.keras.layers.Dense(1, activation="sigmoid",bias_initializer=output_bias)(cls)
    model = tf.keras.models.Model(inputs=input_layer, outputs=[output])
    # opt,schedule = transformers.create_optimizer(num_train_steps=num_train_steps,init_lr=3e-5,adam_beta1=0.9,adam_beta2=0.999,weight_decay_rate=0.01,num_warmup_steps=10000)
    opt = tfa.optimizers.RectifiedAdam()
    model.compile(optimizer=opt,
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=[tf.keras.metrics.BinaryAccuracy(),
                       tf.keras.metrics.AUC(curve="ROC")])
    model.summary()
    return model

#### Load Weights

In [6]:
model = create_model_pointwise()

Some layers from the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_ids (InputLayer)         [(None, 512)]        0           []                               
                                                                                                  
 token_type_ids (InputLayer)    [(None, 512)]        0           []                               
                                                                                                  
 attention_mask (InputLayer)    [(None, 512)]        0           []                               
                                                                                                  
 tf_bert_model (TFBertModel)    TFBaseModelOutputWi  109482240   ['input_ids[0][0]',              
                                thPoolingAndCrossAt               'token_type_ids[0][0]',     

In [7]:
model.load_weights("/content/gdrive/MyDrive/col764/final_model_pointwise.h5")

### Reranking

In [8]:
!tar -zxf /content/gdrive/MyDrive/col764/output_qrel_only.tar.gz

In [9]:
!ls /content/gdrive/MyDrive/col764/results

run.desc.test.txt	    run.desc.train.txt	run.query.test.txt.reranked
run.desc.test.txt.reranked  run.query.test.txt	run.query.train.txt


In [10]:
import os

In [11]:
import pandas as pd

In [12]:
query_file = '/content/gdrive/MyDrive/col764/queries/podcasts_2020_topics_test_description.tsv'

In [13]:
queries = pd.read_csv(query_file,sep='\t',header=None)
queries.columns = ['qnum','query']

In [14]:
queries

Unnamed: 0,qnum,query
0,9,The White House released a rough transcript of...
1,10,The Boeing 737 MAX passenger airliner was grou...
2,11,I’m looking for podcasts that talk about how t...
3,12,Imran Khan is the 22nd Prime Minister of Pakis...
4,13,I’m interested in hearing personal stories of ...
5,14,I wonder if people have shared near-death expe...
6,15,I would like to start a podcast. What equipme...
7,16,"2019 saw a large number of wildfires, in Austr..."
8,17,I am looking for some ideas for things to do d...
9,18,I’d like to hear personal stories from women i...


In [15]:
results_file = '/content/gdrive/MyDrive/col764/results/run.query.test.txt'

In [16]:
bm25_ranks = pd.read_csv(results_file,sep=' ',header=None)
bm25_ranks.columns = ['qnum','qid','epid','rank','score','runid']

In [17]:
bm25_ranks

Unnamed: 0,qnum,qid,epid,rank,score,runid
0,9,Q0,spotify:episode:6O8djf3RL94yNfaoWqvk3r_840.0,1,16.569401,Anserini
1,9,Q0,spotify:episode:3gJ0fBXYRryWAjgwxY4q6C_240.0,2,16.302999,Anserini
2,9,Q0,spotify:episode:6svv5L5kRn8groSsM8gkpc_3180.0,3,15.984800,Anserini
3,9,Q0,spotify:episode:4JqqVFhNjtlwoIMl5tvL8R_780.0,4,15.275800,Anserini
4,9,Q0,spotify:episode:3gJ0fBXYRryWAjgwxY4q6C_0.0,5,14.734600,Anserini
...,...,...,...,...,...,...
47753,58,Q0,spotify:episode:30j4F0dABykbXJuGevrfNB_1920.0,996,4.663695,Anserini
47754,58,Q0,spotify:episode:3ALoC0SU00c8RtxQHohesZ_1440.0,997,4.663694,Anserini
47755,58,Q0,spotify:episode:3ALoC0SU00c8RtxQHohesZ_420.0,998,4.663693,Anserini
47756,58,Q0,spotify:episode:3bieaciVail62osQ0YC8JE_1380.0,999,4.663692,Anserini


In [18]:
bm25_ranks = bm25_ranks[bm25_ranks['rank']<51].reset_index(drop=True)

In [19]:
episode_contents = {k:None for k in bm25_ranks.epid.unique()}

In [20]:
!ls /content/

bert-model  gdrive  output_copy  sample_data  trec_eval


In [21]:
import json
for root, dirs, files in os.walk('/content/output_copy'):
  files.sort()
  for file in files:
    # print(file)
    with open(root+"/"+file,'r') as f:
      episode_segments = json.load(f)
      for segment in episode_segments:
        if segment['id'] in episode_contents:
          episode_contents[segment['id']] = segment['contents']

In [22]:
len(sorted(list(episode_contents.keys())))

2471

In [23]:
bm25_ranks['segment'] = bm25_ranks['epid'].map(episode_contents)

In [24]:
bm25_ranks=bm25_ranks.merge(queries,how='left',left_on='qnum',right_on='qnum')

In [25]:
q_trunc = 64
p_trunc = 512-64+1

In [26]:
import gc
import numpy as np

In [27]:
q = tokenizer(bm25_ranks['query'].values.tolist(), return_tensors="tf",padding="max_length",max_length=q_trunc,truncation=True)
p = tokenizer(bm25_ranks['segment'].values.tolist(), return_tensors="tf",padding="max_length",max_length=p_trunc,truncation=True)
X_test = []
for id in ['input_ids','token_type_ids','attention_mask']:
  X_test.append(tf.concat([q[id],p[id][:,1:]],1))
q = None
p = None
gc.collect()

24990

In [28]:
scores = model.predict(X_test,batch_size=128,verbose=1)



In [29]:
print(scores.shape)

(2476, 1)


In [30]:
bm25_ranks['score'] = scores

In [31]:
bm25_ranks = bm25_ranks.drop(['segment','query'],axis=1)

In [32]:
bm25_ranks = bm25_ranks.sort_values(['qnum','score'], ascending = [True, False]).reset_index(drop=True)

In [33]:
bm25_ranks['rank'] = bm25_ranks.groupby('qnum')['score'].rank(ascending=False).astype(np.int16)

In [34]:
bm25_ranks

Unnamed: 0,qnum,qid,epid,rank,score,runid
0,9,Q0,spotify:episode:6O8djf3RL94yNfaoWqvk3r_840.0,1,0.570476,Anserini
1,9,Q0,spotify:episode:2wlZZPXaCiEBGB0TCPk7VL_780.0,2,0.350815,Anserini
2,9,Q0,spotify:episode:3FUFCyf9VRdYbOInQWAWKq_1080.0,3,0.328774,Anserini
3,9,Q0,spotify:episode:3gJ0fBXYRryWAjgwxY4q6C_240.0,4,0.232002,Anserini
4,9,Q0,spotify:episode:6svv5L5kRn8groSsM8gkpc_3180.0,5,0.219507,Anserini
...,...,...,...,...,...,...
2471,58,Q0,spotify:episode:1AX7RBDIlAvUlBbjXgwHEp_180.0,46,0.001120,Anserini
2472,58,Q0,spotify:episode:5xDfrpC4pmkHBVf28JYKv1_120.0,47,0.001082,Anserini
2473,58,Q0,spotify:episode:0Ec1VOSw0IcIqyhKJXT4hi_2220.0,48,0.000968,Anserini
2474,58,Q0,spotify:episode:2L43MfmBaNIVXTAvtl7Pxg_1380.0,49,0.000807,Anserini


In [35]:
bm25_ranks.to_csv(results_file+'.reranked', sep = '\t', index=False)

In [36]:
!git clone https://github.com/usnistgov/trec_eval

fatal: destination path 'trec_eval' already exists and is not an empty directory.


In [37]:
!cd trec_eval;make

make: 'trec_eval' is up to date.


In [38]:
!./trec_eval/trec_eval  -q -c -M1000 -m ndcg_cut.5 /content/gdrive/MyDrive/col764/qrels/2020_test_qrels.list {results_file}.reranked

ndcg_cut_5            	10	0.2140
ndcg_cut_5            	11	0.2557
ndcg_cut_5            	12	0.0000
ndcg_cut_5            	13	0.0000
ndcg_cut_5            	14	0.8800
ndcg_cut_5            	15	0.0000
ndcg_cut_5            	16	0.3787
ndcg_cut_5            	17	0.0000
ndcg_cut_5            	18	0.0000
ndcg_cut_5            	19	0.8800
ndcg_cut_5            	20	0.1746
ndcg_cut_5            	21	0.2074
ndcg_cut_5            	22	0.3323
ndcg_cut_5            	23	0.2544
ndcg_cut_5            	24	0.7529
ndcg_cut_5            	25	0.8688
ndcg_cut_5            	26	0.6081
ndcg_cut_5            	27	0.4852
ndcg_cut_5            	28	0.4344
ndcg_cut_5            	29	0.2557
ndcg_cut_5            	30	0.2533
ndcg_cut_5            	31	0.0000
ndcg_cut_5            	32	0.0000
ndcg_cut_5            	33	0.3992
ndcg_cut_5            	34	0.5362
ndcg_cut_5            	35	0.2994
ndcg_cut_5            	36	0.3943
ndcg_cut_5            	37	0.5180
ndcg_cut_5            	38	0.9563
ndcg_cut_5            	39	0.3543
ndcg_cut_5

In [39]:
!./trec_eval/trec_eval  -q -c -m ndcg_cut.5  /content/gdrive/MyDrive/col764/qrels/2020_test_qrels.list {results_file}

ndcg_cut_5            	10	0.6302
ndcg_cut_5            	11	0.3383
ndcg_cut_5            	12	0.2140
ndcg_cut_5            	13	0.0000
ndcg_cut_5            	14	0.7945
ndcg_cut_5            	15	0.0000
ndcg_cut_5            	16	0.1918
ndcg_cut_5            	17	0.0000
ndcg_cut_5            	18	0.0000
ndcg_cut_5            	19	0.7174
ndcg_cut_5            	20	0.6740
ndcg_cut_5            	21	0.4979
ndcg_cut_5            	22	0.1556
ndcg_cut_5            	23	0.0000
ndcg_cut_5            	24	0.2517
ndcg_cut_5            	25	0.6399
ndcg_cut_5            	26	0.7021
ndcg_cut_5            	27	0.4790
ndcg_cut_5            	28	0.6070
ndcg_cut_5            	29	0.7154
ndcg_cut_5            	30	0.1598
ndcg_cut_5            	31	0.0000
ndcg_cut_5            	32	0.1091
ndcg_cut_5            	33	0.3493
ndcg_cut_5            	34	0.2140
ndcg_cut_5            	35	0.3284
ndcg_cut_5            	36	0.5932
ndcg_cut_5            	37	0.2953
ndcg_cut_5            	38	0.9076
ndcg_cut_5            	39	0.4480
ndcg_cut_5

In [40]:
!ls /content/gdrive/MyDrive/col764/results/

run.desc.test.txt	    run.desc.train.txt	run.query.test.txt.reranked
run.desc.test.txt.reranked  run.query.test.txt	run.query.train.txt
