# AI CUP 2022: Argument Detection
Meng-Chieh, Liu  
2022/11/28

## Note

1. shared Bert layers --> seperate Bert layers? (√)
2. (s+q), (s+r) encoder? (√)
3. smaller batch size/ smaller learning rate? (?)
4. higher loss weight? (?)
5. remove html tokens (√)
6. summarization in long text and long sentence (?)
7. split long sentences (?)
8. freeze less layers? (?)

function ClickConnect(){
console.log("Working");
document.querySelector("#top-toolbar > colab-connect-button").shadowRoot.querySelector("#connect").click();
}
setInterval(ClickConnect,60000)

## import

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install -q torch pytorch-lightning
!pip install -q transformers
!pip install -q nltk==3.7
!pip install -q bert-extractive-summarizer

In [3]:
# Import all libraries
import pandas as pd
import numpy as np
import re
import pickle
from tqdm import tqdm

# Huggingface transformers
import transformers
from transformers import BertTokenizer, BertModel, get_linear_schedule_with_warmup

import torch
from torch import nn, cuda
from torchmetrics import Accuracy, F1Score
from torch.utils.data import DataLoader,Dataset,RandomSampler, SequentialSampler

import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler

import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
%matplotlib inline

import spacy
import nltk
nltk.download('punkt')
from nltk.tokenize import word_tokenize
punctuations = '''!"#$%&'()*+, -./:;<=>?@[\]^_`{|}~'''

from summarizer import Summarizer


RANDOM_SEED = 666
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
version = 'v4'

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [4]:
# normal LCS
def LCS(text1: str, text2: str) -> int:

    text1 = [i for i in word_tokenize(text1) if len(i)>1 or i not in punctuations]
    len_text1 = len(text1)
    if len_text1 == 0:
      return 0

    text2 = [i for i in word_tokenize(text2) if len(i)>1 or i not in punctuations]

    if len(text2) > len(text1):
        text1, text2 = text2, text1
    lcs = [[0]*(len(text2)+1) for _ in range(2)]
    for i in range(1, len(text1)+1):
        for j in range(1, len(text2)+1):
            if text1[i-1]== text2[j-1]:
                lcs[i%2][j] = lcs[(i-1) % 2][j-1] +1
            else:
                lcs[i%2][j]= max(lcs[(i-1)%2][j], lcs[i % 2][j-1])
    lcs = lcs[len(text1)% 2][len(text2)]
    return lcs/len_text1


# 評分用LCS
def LCS_Score(text1: str, text2: str) -> int:

    text1 = [i for i in word_tokenize(text1) if len(i)>1 or i not in punctuations]
    text2 = [i for i in word_tokenize(text2) if len(i)>1 or i not in punctuations]

    if len(text2) > len(text1):
        text1, text2 = text2, text1
    lcs = [[0]*(len(text2)+1) for _ in range(2)]
    for i in range(1, len(text1)+1):
        for j in range(1, len(text2)+1):
            if text1[i-1]== text2[j-1]:
                lcs[i%2][j] = lcs[(i-1) % 2][j-1] +1
            else:
                lcs[i%2][j]= max(lcs[(i-1)%2][j], lcs[i % 2][j-1])
    lcs = lcs[len(text1)% 2][len(text2)]
    return  lcs / (len(text1) + len(text2) - lcs)

## Preprocessing


### load and filter csv

In [5]:
# # load csv
# train_path = "/content/drive/Shareddrives/AI_CUP_NLP/Batch_answers - train_data (no-blank).csv"
# train_data = pd.read_csv(train_path, encoding = "utf-8", index_col='id').iloc[:,:5].applymap(lambda x: x.strip('"')).reset_index()

In [6]:
# train_data["length"] = train_data["q'"] + train_data["r'"]
# train_data["length"] = train_data["length"].map(len)

In [7]:
# # target sample size
# train_data.groupby(by=train_data.id).first().shape[0]

In [8]:
# idx = train_data.groupby(by=train_data.id)['length'].transform(max) == train_data['length']
# small_train_data = train_data[idx].set_index("id")

In [9]:
# small_train_data = small_train_data.groupby(by=small_train_data.index).first()

In [10]:
# small_train_data['q_length'] = small_train_data['q'].map(len)
# small_train_data['r_length'] = small_train_data['r'].map(len)
# small_train_data['s'] = small_train_data['s'].map(lambda x: 1 if x=="AGREE" else 0)

In [11]:
# # check sample size
# small_train_data.shape[0]

In [12]:
# # Save
# with open('/content/drive/Shareddrives/AI_CUP_NLP/small_train_data.pickle', 'wb') as f:
#     pickle.dump(small_train_data, f)

In [13]:
# Load
with open(f'/content/drive/Shareddrives/AI_CUP_NLP/data_{version}/small_train_data.pickle', 'rb') as f:
    small_train_data = pickle.load(f)

### Remove html token

In [14]:
# import re

In [15]:
# def regex_remove(text):
#   text = re.sub("& #? ?[a-zA-Z\d]{2,8} ; ", '', text)
#   text = re.sub("-- -- ", '', text)
#   return text

In [16]:
# regex_data = small_train_data.copy()
# regex_data['q'] = regex_data['q'].map(regex_remove)
# regex_data['r'] = regex_data['r'].map(regex_remove)

In [17]:
# # Save
# with open('/content/drive/Shareddrives/AI_CUP_NLP/regex_data.pickle', 'wb') as f:
#     pickle.dump(regex_data, f)

In [18]:
# Load
with open(f'/content/drive/Shareddrives/AI_CUP_NLP/data_{version}/regex_data.pickle', 'rb') as f:
    regex_data = pickle.load(f)

### sentencize

In [19]:
# sentencizer = spacy.load('en_core_web_sm')

# def sentencize(sentence):
#   return [str(sent) for sent in sentencizer(sentence).sents]

In [20]:
# texts = regex_data[['q', 'r', "q'", "r'"]]
# texts = texts.applymap(sentencize)

In [21]:
# # Save
# with open('/content/drive/Shareddrives/AI_CUP_NLP/texts.pickle', 'wb') as f:
#     pickle.dump(texts, f)

In [22]:
# Load
with open(f'/content/drive/Shareddrives/AI_CUP_NLP/data_{version}/texts.pickle', 'rb') as f:
    texts = pickle.load(f)

### extractive summary (shorten)

In [23]:
# bert_summarizer = Summarizer()

# def bert_summarize(sentence):
#   if len(sentence) > 1000:
#     bert_summary = ''.join(bert_summarizer(sentence, num_sentences=10))
#     if bert_summary != "":
#       return bert_summary
#   return sentence

In [24]:
# summary = regex_data[['q', 'r']]
# summary = summary.applymap(bert_summarize)

In [25]:
# # Save
# with open('/content/drive/Shareddrives/AI_CUP_NLP/summary.pickle', 'wb') as f:
#     pickle.dump(summary, f)

In [26]:
# Load
with open(f'/content/drive/Shareddrives/AI_CUP_NLP/data_{version}/summary.pickle', 'rb') as f:
    summary = pickle.load(f)

### reformat

In [27]:
# reformat_df = pd.DataFrame(columns=['id','sentence', 'is_q', 'label'])

# for i in tqdm(small_train_data.index):
#   if len(texts["q'"][i]) == 0 or len(texts["r'"][i]) == 0:
#     continue
  
#   ### Q ###
#   temp_df = pd.DataFrame(columns=['id', 'sentence', 'is_q', 'label'])
#   temp_df['sentence'] = texts['q'][i]
#   temp_df['is_q'] = 1
#   temp_df["id"] = i
  
#   if len(texts['q'][i]) == len(texts["q'"][i]):
#     temp_df['label'] = 1
#   else:
#     label_list = []
#     for sentence in texts['q'][i]:
#       if LCS(sentence, small_train_data["q'"][i]) >= 0.7:
#         label_list.append(1)
#       else:
#         label_list.append(0)
#     temp_df['label'] = label_list

#   reformat_df = pd.concat([reformat_df, temp_df], axis=0)

#   ### R ###
#   temp_df = pd.DataFrame(columns=['id', 'sentence', 'is_q', 'label'])
#   temp_df['sentence'] = texts['r'][i]
#   temp_df['is_q'] = 0
#   temp_df["id"] = i

#   if len(texts['r'][i]) == len(texts["r'"][i]):
#     temp_df['label'] = 1

#   else:
#     label_list = []
#     for sentence in texts['r'][i]:
#       if LCS(sentence, small_train_data["r'"][i]) >= 0.7:
#         label_list.append(1)
#       else:
#         label_list.append(0)
#     temp_df['label'] = label_list
  
#   reformat_df = pd.concat([reformat_df, temp_df], axis=0)
  
# reformat_df = reformat_df.set_index('id', drop=True)

In [28]:
# # Save
# with open('/content/drive/Shareddrives/AI_CUP_NLP/reformat_df.pickle', 'wb') as f:
#     pickle.dump(reformat_df, f)

In [29]:
# Load
with open(f'/content/drive/Shareddrives/AI_CUP_NLP/data_{version}/reformat_df.pickle', 'rb') as f:
    reformat_df = pickle.load(f)

### combine

In [30]:
# combine regex_data, summary and new_df
df_1 = regex_data.copy()
df_1['q'] = summary['q']
df_1['r'] = summary['r']
new_df = pd.merge(reformat_df, df_1, how="left", left_index=True, right_index=True)

In [31]:
sum(new_df['label'])/len(new_df)

0.45591411510608565

### split train, test, val

In [32]:
index = new_df.index.unique()
train_index, test_index = train_test_split(index, test_size=0.1, random_state=RANDOM_SEED, shuffle=True)
train_index, val_index = train_test_split(train_index, test_size=0.1, random_state=RANDOM_SEED, shuffle=True)

In [33]:
def x_y_split(df_index, new_df, train=False):
  df = new_df.loc[df_index]
  if train:
    df = shuffle(df, random_state=RANDOM_SEED)
  X = df[['sentence','q','r','q_length','r_length','is_q']]
  y = df[['label','s']]
  return X, y

In [34]:
X_train, y_train = x_y_split(train_index, new_df, train=True)
X_val, y_val = x_y_split(val_index, new_df)
X_test, y_test = x_y_split(test_index, new_df)

In [35]:
X_train.shape, X_val.shape, X_test.shape

((50964, 6), (5617, 6), (6387, 6))

### normalize feature

In [36]:
scaler = StandardScaler()

def length_scaler(X, train=False):
  if train:
    length_feature = scaler.fit_transform(X[['q_length', 'r_length']])
  else:
    length_feature = scaler.transform(X[['q_length', 'r_length']])
  X['q_length'] = length_feature[:,0]
  X['r_length'] = length_feature[:,1]
  return X

X_train = length_scaler(X_train, train=True)
X_val = length_scaler(X_val)
X_test = length_scaler(X_test)

## Model

### parameters

In [37]:
# Initialize the parameters that will be use for training
N_EPOCHS = 20
BATCH_SIZE = 4
STEPS_PER_EPOCH = len(X_train)//BATCH_SIZE
MAX_LEN = 512
LR = 2e-4
DROPOUT_RATE = 0.1

In [38]:
BERT_MODEL_NAME = "bert-base-uncased" # we will use the BERT base model(the smaller one)
tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_NAME)

### model class

In [39]:
class bertDataset (Dataset):
    def __init__(self, X, y, tokenizer, max_len=MAX_LEN):
        self.tokenizer = tokenizer
        self.q = list(X["q"])
        self.r = list(X["r"])
        self.sentence = list(X["sentence"])
        self.length = len(self.sentence)
        self.features = torch.FloatTensor(np.array(X[['q_length', 'r_length', 'is_q']], dtype=np.float32))
        self.label = torch.LongTensor(np.array(y['label'], dtype=np.int16).reshape(self.length, 1))
        self.s = torch.LongTensor(np.array(y['s'], dtype=np.int16).reshape(self.length, 1))
        self.max_len = max_len
        
    def __len__(self):
        return self.length
    
    def __getitem__(self, item_idx):
        sentence_q = self.tokenizer.encode_plus(
            self.sentence[item_idx],
            self.q[item_idx],
            add_special_tokens = True,
            max_length= self.max_len,
            padding = 'max_length',
            return_attention_mask= True,
            truncation=True,
            return_tensors = 'pt'
          )
        
        sentence_r = self.tokenizer.encode_plus(
            self.sentence[item_idx],
            self.r[item_idx],
            add_special_tokens=True,
            max_length= self.max_len,
            padding = 'max_length',
            return_attention_mask= True,
            truncation=True,
            return_tensors = 'pt'
          )

        # sentence = self.tokenizer.encode_plus(
            
        #     add_special_tokens=True, 
        #     max_length= self.max_len,
        #     padding = 'max_length',
        #     return_attention_mask= True, 
        #     truncation=True,
        #     return_tensors = 'pt'
        #   )

        
        return {
            'sentence_q': (sentence_q['input_ids'].flatten(), sentence_q['attention_mask'].flatten(), sentence_q['token_type_ids'].flatten()),
            'sentence_r': (sentence_r['input_ids'].flatten(), sentence_r['attention_mask'].flatten(), sentence_r['token_type_ids'].flatten()),
            # 'sentence': (sentence['input_ids'].flatten(), sentence['attention_mask'].flatten()),
            'features' : self.features[item_idx],
            'label' : self.label[item_idx],
            's' : self.s[item_idx]
        }

In [40]:
class bertDataModule (pl.LightningDataModule):
    
    def __init__(self, X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, X_test=X_test, y_test=y_test, tokenizer=tokenizer, batch_size=BATCH_SIZE, max_token_len=MAX_LEN):
        super().__init__()
        self.X_train = X_train
        self.y_train = y_train
        self.X_val = X_val
        self.y_val = y_val
        self.X_test = X_test
        self.y_test = y_test
        self.tokenizer = tokenizer
        self.batch_size = batch_size
        self.max_token_len = max_token_len

    def setup(self, stage=None):
        self.train_dataset = bertDataset(X=self.X_train, y=self.y_train, tokenizer=self.tokenizer, max_len=self.max_token_len)
        self.val_dataset  = bertDataset(X=self.X_val, y=self.y_val, tokenizer=self.tokenizer, max_len=self.max_token_len)
        self.test_dataset  = bertDataset(X=self.X_test, y=self.y_test, tokenizer=self.tokenizer, max_len=self.max_token_len)
        
    def train_dataloader(self):
        return DataLoader (self.train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=2)

    def val_dataloader(self):
        return DataLoader (self.val_dataset,batch_size=self.batch_size)

    def test_dataloader(self):
        return DataLoader (self.test_dataset,batch_size=self.batch_size)

In [41]:
class bertClassifier(pl.LightningModule):
    # Set up the classifier
    def __init__(self, lr=LR, dropout_rate=DROPOUT_RATE, maxLength=MAX_LEN, steps_per_epoch=STEPS_PER_EPOCH, n_epochs=N_EPOCHS):
        super().__init__()

        self.bert1 = BertModel.from_pretrained(BERT_MODEL_NAME, return_dict=True)
        self.bert2 = BertModel.from_pretrained(BERT_MODEL_NAME, return_dict=True)
        self.lr = lr
        self.fc_task1 = nn.Sequential(
            nn.Linear(768*3+3, 512),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(512, 2)
        )

        self.fc_task2 = nn.Sequential(
            nn.Linear(768*3+3, 512),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(512, 2)
        )
        self.steps_per_epoch = steps_per_epoch
        self.n_epochs = n_epochs
        self.criterion = nn.CrossEntropyLoss()


    def forward(self, input_ids1, attention_mask1, token_type_ids1, input_ids2, attention_mask2, token_type_ids2, features):
        sentence_q = self.bert1(input_ids=input_ids1, attention_mask=attention_mask1, token_type_ids=token_type_ids1).pooler_output
        sentence_r = self.bert2(input_ids=input_ids2, attention_mask=attention_mask2, token_type_ids=token_type_ids2).pooler_output
        logits = torch.cat([sentence_q, sentence_r, sentence_q*sentence_r, features], 1)
        logits1 = self.fc_task1(logits)
        logits2 = self.fc_task2(logits)
        return logits1, logits2
    
    
    def training_step(self, batch, batch_idx):
        input_ids1, attention_mask1, token_type_ids1  = batch['sentence_q']
        input_ids2, attention_mask2, token_type_ids2  = batch['sentence_r']
        
        features = batch['features']
        label = batch['label'].squeeze(1)
        s = batch['s'].squeeze(1)

        logits1, logits2 = self.forward(input_ids1, attention_mask1, token_type_ids1, input_ids2, attention_mask2, token_type_ids2, features)
        loss = self.criterion(logits1, label)*2.5 + self.criterion(logits2, s)
        self.log('train_loss', loss, prog_bar=True, logger=True)
        return loss


    def validation_step(self, batch, batch_idx):
        input_ids1, attention_mask1, token_type_ids1  = batch['sentence_q']
        input_ids2, attention_mask2, token_type_ids2  = batch['sentence_r']

        features = batch['features']
        label = batch['label'].squeeze(1)
        s = batch['s'].squeeze(1)

        logits1, logits2 = self.forward(input_ids1, attention_mask1, token_type_ids1, input_ids2, attention_mask2, token_type_ids2, features)
        loss = self.criterion(logits1, label)*2.5 + self.criterion(logits2, s)
        self.log('val_loss', loss, prog_bar=True, logger=True)
        return loss


    def test_step(self, batch, batch_idx):
        input_ids1, attention_mask1, token_type_ids1  = batch['sentence_q']
        input_ids2, attention_mask2, token_type_ids2  = batch['sentence_r']

        features = batch['features']
        label = batch['label'].squeeze(1)
        s = batch['s'].squeeze(1)

        logits1, logits2 = self.forward(input_ids1, attention_mask1, token_type_ids1, input_ids2, attention_mask2, token_type_ids2, features)
        loss = self.criterion(logits1, label)*2.5 + self.criterion(logits2, s)
        self.log('test_loss', loss, prog_bar=True, logger=True)
        return loss    
    

    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.parameters() , lr=self.lr)
        warmup_steps = self.steps_per_epoch//3
        total_steps = self.steps_per_epoch * self.n_epochs - warmup_steps

        scheduler = get_linear_schedule_with_warmup(optimizer,warmup_steps,total_steps)

        return [optimizer], [scheduler]

## Train
remember to revise checkpoint path

In [42]:
transformers.logging.set_verbosity_error()

In [None]:
resume_from_checkpoint = None

In [43]:
# Instantiate and set up the data_module
bert_data_module = bertDataModule()
bert_data_module.setup()

In [44]:
model = bertClassifier()

In [None]:
# freeze bert layers
for param in model.bert1.embeddings.parameters():
    param.requires_grad = False
for param in model.bert1.encoder.layer[:10].parameters():
    param.requires_grad = False
for param in model.bert2.embeddings.parameters():
    param.requires_grad = False
for param in model.bert2.encoder.layer[:10].parameters():
    param.requires_grad = False

In [None]:
checkpoint_callback = ModelCheckpoint(
    monitor='val_loss',
    filename='{epoch:02d}-{val_loss:.3f}',
    save_top_k=3, 
    mode='min'
)

In [None]:
# Instantiate the Model Trainer
trainer = pl.Trainer(
    max_epochs=N_EPOCHS, 
    accelerator='gpu', 
    devices=1, 
    callbacks=[checkpoint_callback], 
    default_root_dir='/content/drive/Shareddrives/AI_CUP_NLP',
    resume_from_checkpoint=resume_from_checkpoint)

  rank_zero_deprecation(
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [None]:
trainer.fit(model, bert_data_module)

### Visualize

In [None]:
# import tensorboard
# %load_ext tensorboard
# %tensorboard --logdir /content/drive/Shareddrives/AI_CUP_NLP/lightning_logs

## Valid/Test
remember to revise model path

### load model

In [63]:
model_path = "/content/drive/Shareddrives/AI_CUP_NLP/logs_v4/version_6/checkpoints/epoch=10-val_loss=1.731.ckpt"

In [64]:
model = model.load_from_checkpoint(model_path)
model.eval()
model.to(device)

bertClassifier(
  (bert1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=Tr

### predict function

In [62]:
def predict(df, dataloader):

  with torch.no_grad():

    softmax = nn.Softmax()

    label_predict = torch.Tensor().to(device)
    s_predict = torch.Tensor().to(device)

    for i, batch in enumerate(tqdm(dataloader)):
      input_ids1, attention_mask1, token_type_ids1  = batch['sentence_q']
      input_ids2, attention_mask2, token_type_ids2  = batch['sentence_r']

      features = batch['features']
      label = batch['label'].squeeze(1)
      s = batch['s'].squeeze(1)

      logits1, logits2 = model(input_ids1.to(device), attention_mask1.to(device), token_type_ids1.to(device),
                    input_ids2.to(device), attention_mask2.to(device), token_type_ids2.to(device), features.to(device))
      logits1 = softmax(logits1)
      logits2 = softmax(logits2)

      label_predict = torch.concat([label_predict, logits1])
      s_predict = torch.concat([s_predict, logits2])


  label_predict_np = label_predict.to('cpu').numpy()
  s_predict_np = s_predict.to('cpu').numpy()

  
  df['label_0'] = label_predict_np[:,0]
  df['label_1'] = label_predict_np[:,1]
  df['s_0'] = s_predict_np[:,0]
  df['s_1'] = s_predict_np[:,1]

  return df

### val

In [85]:
val_df = new_df.loc[val_index]
val_dataloader = bert_data_module.val_dataloader()
val_result = predict(val_df, val_dataloader)
val_result.to_csv('/content/drive/Shareddrives/AI_CUP_NLP/answer/val_result.csv')

  logits1 = softmax(logits1)
  logits2 = softmax(logits2)
  3%|▎         | 42/1405 [00:13<05:49,  3.90it/s]Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
 66%|██████▌   | 928/1405 [04:19<02:14,  3.55it/s]Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
 83%|████████▎ | 1172/1405 [05:27<01:05,  3.58it/s]Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
 91%|█████████ | 1275/1405 [05:56<00:36,  3.58it/s]Be aware, overflowing tokens are not returned for the setting y

### find threshold

In [55]:
val_result = pd.read_csv('/content/drive/Shareddrives/AI_CUP_NLP/answer/val_result.csv', index_col='id')

In [56]:
def evaluate(df, q_threshold=0.5, r_threshold=0.5):

  score_list = []
  ids = df.index.unique()

  for id in ids:

    try:
      data = df.loc[id]
      # q
      q = data[data['is_q']==1].reset_index()
      q_answer = q["q'"][0]
      if len(q)==1:
        q_predict = q["q"][0]
      else:
        q = q[q['label_1']>=q_threshold]
        q_predict = " ".join(q['sentence'])
      
      q_score = LCS_Score(q_answer, q_predict)

      # r
      r = data[data['is_q']==0].reset_index()
      r_answer = r["r'"][0]
      if len(r)==1:
        r_predict = r["r"][0]
      else:
        r = r[r['label_1']>=r_threshold]
        r_predict = " ".join(r['sentence'])
      
      r_score = LCS_Score(r_answer, r_predict)

      # last
      score = (q_score+r_score)/2
      score_list.append(score)

    except:
      pass

  final_score = sum(score_list)/len(score_list)
  return final_score

In [65]:
best_score = 0
best_q_threshold = 0
best_r_threshold = 0

for q_threshold in tqdm(range(23, 28, 1)):
  q_threshold /= 100
  for r_threshold in range(23, 28, 1):
    r_threshold /= 100 
    try:
      final_score = evaluate(val_result, q_threshold, r_threshold)
      if final_score > best_score:
        best_score = final_score
        best_q_threshold = q_threshold
        best_r_threshold = r_threshold
    except:
      pass

100%|██████████| 5/5 [01:44<00:00, 20.84s/it]


In [66]:
best_score, best_q_threshold, best_r_threshold

(0.7007625613469985, 0.25, 0.23)

### test

In [67]:
test_df = new_df.loc[test_index]
test_dataloader = bert_data_module.test_dataloader()
test_result = predict(test_df, test_dataloader)
test_result.to_csv('/content/drive/Shareddrives/AI_CUP_NLP/answer/test_result.csv')

  logits1 = softmax(logits1)
  logits2 = softmax(logits2)
100%|██████████| 1597/1597 [07:23<00:00,  3.60it/s]


In [68]:
evaluate(test_result, 0.25, 0.25)

0.7026683752514115

## Result

### model v3

model = version_4/checkpoints/epoch=11-val_loss=1.45.ckpt  
weight = 2  
(val) best_score, best_threshold = 0.6999463597711145, 0.2  
(test) score = 0.7031210002892163
(public) score = 0.794138



### model v4
model = 