In [1]:
import pandas as pd
import numpy as np
import json

In [2]:
dev_df=pd.read_csv('./output/dev_statement_section.csv')
dev_df

Unnamed: 0,statement,section,label
0,there is a 13.2% difference between the result...,"[""Outcome Measurement:"", ""Event-free Survival""...",Contradiction
1,Patients with significantly elevated ejection ...,"[""Inclusion criteria:"", ""Inclusion Criteria:"",...",Contradiction
2,a significant number of the participants in th...,"[""Adverse Events 1:"", ""Total: 20/167 (11.98%)""...",Contradiction
3,the primary trial does not report the PFS or o...,"[""Outcome Measurement:"", ""Local Control Using ...",Entailment
4,Prior treatment with fulvestrant or with a pho...,"[""Inclusion Criteria:"", ""Postmenopausal women ...",Contradiction
...,...,...,...
195,The the primary trial intervention involves on...,"[""INTERVENTION 1:"", ""Letrozole"", ""Participants...",Contradiction
196,the secondary trial reported 1 single case of ...,"[""Adverse Events 1:"", ""Total: 16/48 (33.33%)"",...",Entailment
197,the secondary trial and the primary trial do n...,"[""Outcome Measurement:"", ""Number of Patients W...",Entailment
198,the outcome measurement of the primary trial i...,"[""Outcome Measurement:"", ""Progression-free Sur...",Entailment


In [3]:
hypothesis_lst=dev_df['statement'].values.tolist()
len(hypothesis_lst)

200

In [4]:
evidence_lst=dev_df['section'].apply(lambda l:' '.join(json.loads(l))).values.tolist()
len(evidence_lst)

200

In [5]:
label2id={"Contradiction":0,"Entailment":1}
label_lst=dev_df['label'].apply(lambda x:label2id[x]).values.tolist()
len(label_lst)

200

In [6]:
import random
import math
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F

# from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import BertTokenizer, MegatronBertForSequenceClassification

In [7]:
# text_tok=AutoTokenizer.from_pretrained('bert-base-uncased')
# text_clf=AutoModelForSequenceClassification.from_pretrained('bert-base-uncased',num_labels=2)
#roberta-base
# text_tok=AutoTokenizer.from_pretrained('roberta-base')
# text_clf=AutoModelForSequenceClassification.from_pretrained('roberta-base',num_labels=2)
#bio-bert
# text_tok=AutoTokenizer.from_pretrained('dmis-lab/biobert-v1.1')
# text_clf=AutoModelForSequenceClassification.from_pretrained('dmis-lab/biobert-v1.1',num_labels=2)
#biomegatron
text_tok=BertTokenizer.from_pretrained('../transformer_models/biomegatron345uncased')
text_clf=MegatronBertForSequenceClassification.from_pretrained('../transformer_models/biomegatron345uncased',num_labels=2)

Some weights of MegatronBertForSequenceClassification were not initialized from the model checkpoint at ../transformer_models/biomegatron345uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
class InputSequence:
    
    def __init__(self,tok,l_text,l_text2,l_label,batch_size=64,gpu=True):
        
        self.data_len=len(l_text)
        self.data_idx=[i for i in range(self.data_len)]
        self.texts=tok(l_text,l_text2,padding=True, truncation=True, max_length=512, return_tensors='pt')
        self.l_label=np.array(l_label)
        print('tokenize done')
        
        self.batch_size=batch_size
        self.gpu=gpu
        
    def on_epoch_end(self):
        random.shuffle(self.data_idx)
        
    def __getitem__(self,i):
        start=i*self.batch_size
        batch_idx=self.data_idx[start:min(start+self.batch_size,self.data_len)]
        
        return_texts=dict([(k,self.texts[k][batch_idx]) for k in self.texts])
        return_labels=torch.from_numpy(
            self.l_label[batch_idx].astype(np.int64)
        )
        
        if self.gpu:
            return_texts=dict([(k,return_texts[k].cuda()) for k in return_texts])
            return_labels=return_labels.cuda()
        
        return return_texts,return_labels
    
    def __len__(self):
        return math.ceil(1.0*self.data_len/self.batch_size)
    

In [9]:
testing_data=InputSequence(text_tok,hypothesis_lst,evidence_lst,label_lst,batch_size=16,gpu=True)

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

tokenize done


In [10]:
scores=[]
# model_names=['bert-base-uncased']+[
#     './output/clf_models/bert-base-uncased_epoch_{}.pt'.format(format(epoch,'05d'))
#     for epoch in range(10)
# ]
#roberta-base
# model_names=['roberta-base']+[
#     './output/clf_models/roberta-base_epoch_{}.pt'.format(format(epoch,'05d'))
#     for epoch in range(10)
# ]
#biobert
# model_names=['dmis-lab/biobert-v1.1']+[
#     './output/clf_models/biobert-v1.1_epoch_{}.pt'.format(format(epoch,'05d'))
#     for epoch in range(10)
# ]
#biomegatron
model_names=['../transformer_models/biomegatron345uncased']+[
    './output/clf_models/biomegatron345uncased_epoch_{}.pt'.format(format(epoch,'05d'))
    for epoch in range(10)
]
for model_name in model_names:
    scores.append([])
    clf=MegatronBertForSequenceClassification.from_pretrained(model_name).cuda()
    with torch.no_grad():
        for batch in range(len(testing_data)):
            batch_texts,batch_labels=testing_data[batch]
            scores[-1].append(F.softmax(clf(**batch_texts).logits,dim=1).detach().cpu().numpy())
            print('model:',model_name,'batch:',batch,end='\r')
    scores[-1]=np.concatenate(scores[-1],axis=0)
    clf.cpu()

Some weights of MegatronBertForSequenceClassification were not initialized from the model checkpoint at ../transformer_models/biomegatron345uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


model: ./output/clf_models/biomegatron345uncased_epoch_00009.pt batch: 12

In [11]:
from sklearn.metrics import average_precision_score,f1_score,precision_score,recall_score,accuracy_score

y_true=label_lst
results=[]
for epoch in range(len(scores)):
    y_prob=scores[epoch][:,1]
    y_pred=[1 if a>0.5 else 0 for a in y_prob]
    results.append([
        'pretrained' if epoch==0 else epoch,
        average_precision_score(y_true,y_prob),
        f1_score(y_true,y_pred),
        precision_score(y_true,y_pred),
        recall_score(y_true,y_pred),
        accuracy_score(y_true,y_pred)
    ])

import pandas as pd

pd.DataFrame(results,columns=['epoch','AVG_PREC','F1','PREC','REC','ACC'])

Unnamed: 0,epoch,AVG_PREC,F1,PREC,REC,ACC
0,pretrained,0.539019,0.2,0.6,0.12,0.52
1,1,0.509829,0.600791,0.496732,0.76,0.495
2,2,0.51473,0.492462,0.494949,0.49,0.495
3,3,0.539388,0.523077,0.536842,0.51,0.535
4,4,0.547169,0.584071,0.52381,0.66,0.53
5,5,0.551264,0.564356,0.558824,0.57,0.56
6,6,0.569993,0.591928,0.536585,0.66,0.545
7,7,0.576213,0.599034,0.579439,0.62,0.585
8,8,0.589859,0.453988,0.587302,0.37,0.555
9,9,0.60273,0.548387,0.593023,0.51,0.58
