In [1]:
# ========================================
# library
# ========================================
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold, KFold,GroupKFold
from sklearn.metrics import mean_squared_error
%matplotlib inline
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, Subset
import transformers
from transformers import LongformerTokenizer, LongformerModel,AutoTokenizer
from transformers import AdamW, get_linear_schedule_with_warmup
from torch.cuda.amp import autocast, GradScaler
import logging
from ast import literal_eval
import sys
from contextlib import contextmanager
import time
import random
from tqdm import tqdm
import os

2022-01-19 02:52:18.990788: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0


In [2]:
# ==================
# Constant
# ==================
ex = "019"
TRAIN_PATH = "../data/train.csv"
DATA_DIR = "../data/longformer-large-4096/"
if not os.path.exists(f"../output/exp/ex{ex}"):
    os.makedirs(f"../output/exp/ex{ex}")
    os.makedirs(f"../output/exp/ex{ex}/ex{ex}_model")
    
OUTPUT_DIR = f"../output/exp/ex{ex}"
MODEL_PATH_BASE = f"../output/exp/ex{ex}/ex{ex}_model/ex{ex}"
LOGGER_PATH = f"../output/exp/ex{ex}/ex{ex}.txt"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
# ===============
# Configs
# ===============
SEED = 0
N_SPLITS = 5
SHUFFLE = True
num_workers = 4
BATCH_SIZE = 4

n_epochs = 6
max_len = 2048
weight_decay = 0.1
beta = (0.9, 0.98)
lr = 2e-5
num_warmup_steps_rate = 0.1

MODEL_PATH = 'allenai/longformer-large-4096'
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

In [4]:
# ===============
# Functions
# ===============
def set_seed(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

def setup_logger(out_file=None, stderr=True, stderr_level=logging.INFO, file_level=logging.DEBUG):
    LOGGER.handlers = []
    LOGGER.setLevel(min(stderr_level, file_level))

    if stderr:
        handler = logging.StreamHandler(sys.stderr)
        handler.setFormatter(FORMATTER)
        handler.setLevel(stderr_level)
        LOGGER.addHandler(handler)

    if out_file is not None:
        handler = logging.FileHandler(out_file)
        handler.setFormatter(FORMATTER)
        handler.setLevel(file_level)
        LOGGER.addHandler(handler)

    LOGGER.info("logger set up")
    return LOGGER


@contextmanager
def timer(name):
    t0 = time.time()
    yield 
    LOGGER.info(f'[{name}] done in {time.time() - t0:.0f} s')
    
    
LOGGER = logging.getLogger()
FORMATTER = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
setup_logger(out_file=LOGGER_PATH)

2022-01-19 02:52:24,195 - INFO - logger set up


<RootLogger root (DEBUG)>

In [5]:
class TrainDataset(Dataset):
    def __init__(self, token,attentiona_mask,label=None):
        self.len = len(token)
        self.token = token
        self.attention_mask = attentiona_mask
        self.label = label
        #self.get_wids = get_wids # for validation

    def __getitem__(self, index):
        # GET TEXT AND WORD LABELS 
        if self.label is not None:
            return {
              'token': torch.tensor(self.token[index], dtype=torch.long),
              'mask': torch.tensor(self.attention_mask[index], dtype=torch.long),
              "y":torch.tensor(self.label[index], dtype=torch.float32)
               }
        else:
            return {
              'token': torch.tensor(self.token[index], dtype=torch.long),
              'mask': torch.tensor(self.attention_mask[index], dtype=torch.long),
               }

    def __len__(self):
        return self.len

class custom_model(nn.Module):
    def __init__(self):
        super(custom_model, self).__init__()
        self.backbone = LongformerModel.from_pretrained(
            MODEL_PATH, 
        )
        
        #self.dropout = nn.Dropout(p=0.2)
        self.ln = nn.LayerNorm(1024)
        
        self.conv1= nn.Conv1d(1024, 512, kernel_size=3, padding=1)
        self.conv2= nn.Conv1d(1024, 512, kernel_size=9, padding=4)
        self.conv3= nn.Conv1d(1024, 512, kernel_size=15, padding=7)
        self.conv4= nn.Conv1d(1024, 512, kernel_size=31, padding=15)
        self.ln1 = nn.Sequential(nn.LayerNorm(512),
                                            nn.ReLU(),
                                            nn.Dropout(0.2))
        self.ln2 = nn.Sequential( nn.LayerNorm(512),
                                            nn.ReLU(),
                                            nn.Dropout(0.2))
        self.ln3 = nn.Sequential( nn.LayerNorm(512),
                                            nn.ReLU(),
                                            nn.Dropout(0.2))
        self.ln4 = nn.Sequential( nn.LayerNorm(512),
                                            nn.ReLU(),
                                            nn.Dropout(0.2))
        
        self.linear1 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear2 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear3 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear4 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear5 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear6 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear7 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,2),
        )
        self.linear8 = nn.Sequential(
            nn.Linear(2048,1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(1024,1),
        )
    def forward(self, ids, mask):
        # pooler
        emb = self.backbone(ids, attention_mask=mask)["last_hidden_state"]
        output = self.ln(emb)
        output = output.permute((0, 2, 1)).contiguous()
        output1 = self.conv1(output)
        output1 = self.ln1(output1.permute((0, 2, 1)).contiguous())
        output2 = self.conv2(output)
        output2 = self.ln2(output2.permute((0, 2, 1)).contiguous())
        output3 = self.conv3(output)
        output3 = self.ln3(output3.permute((0, 2, 1)).contiguous())
        output4 = self.conv4(output)
        output4 = self.ln4(output4.permute((0, 2, 1)).contiguous())
        output_concat = torch.cat([output1,output2,output3,output4],axis=-1)
        output2_1 = self.linear1(output_concat)
        output2_2 = self.linear2(output_concat)
        output2_3 = self.linear3(output_concat)
        output2_4 = self.linear4(output_concat)
        output2_5 = self.linear5(output_concat)
        output2_6 = self.linear6(output_concat)
        output2_7= self.linear7(output_concat)
        output2_8 = self.linear8(output_concat)
        out = torch.cat(
            [output2_1,output2_2,output2_3,output2_4,
             output2_5,output2_6,output2_7,output2_8], axis=2)
        return out

In [6]:
target_map_rev = {0:'Lead', 1:'Position', 2:'Evidence', 3:'Claim', 4:'Concluding Statement',
             5:'Counterclaim', 6:'Rebuttal', 7:'blank'}

def get_preds_collate(dataset, verbose,text_ids, preds, preds_len):
    all_predictions = []

    for id_num in tqdm(range(len(preds))):
    
        # GET ID
        #if (id_num%100==0)&(verbose): 
        #    print(id_num,', ',end='')
        n = text_ids[id_num]
        max_len = int(preds_len[id_num])
        # GET TOKEN POSITIONS IN CHARS
        name = f'../data/{dataset}/{n}.txt'
        txt = open(name, 'r').read()
        tokens = tokenizer.encode_plus(txt, max_length=max_len, padding='max_length',
                                   truncation=True, return_offsets_mapping=True)
        off = tokens['offset_mapping']
    
        # GET WORD POSITIONS IN CHARS
        w = []
        blank = True
        for i in range(len(txt)):
            if (txt[i]!=' ')&(txt[i]!='\n')&(txt[i]!='\xa0')&(txt[i]!='\x85')&(blank==True):
                w.append(i)
                blank=False
            elif (txt[i]==' ')|(txt[i]=='\n')|(txt[i]=='\xa0')|(txt[i]=='\x85'):
                blank=True
        w.append(1e6)
            
        # MAPPING FROM TOKENS TO WORDS
        word_map = -1 * np.ones(max_len,dtype='int32')
        w_i = 0
        for i in range(len(off)):
            if off[i][1]==0: continue
            while off[i][0]>=w[w_i+1]: w_i += 1
            word_map[i] = int(w_i)
        
        # CONVERT TOKEN PREDICTIONS INTO WORD LABELS
        ### KEY: ###
        # 0: LEAD_B, 1: LEAD_I
        # 2: POSITION_B, 3: POSITION_I
        # 4: EVIDENCE_B, 5: EVIDENCE_I
        # 6: CLAIM_B, 7: CLAIM_I
        # 8: CONCLUSION_B, 9: CONCLUSION_I
        # 10: COUNTERCLAIM_B, 11: COUNTERCLAIM_I
        # 12: REBUTTAL_B, 13: REBUTTAL_I
        # 14: NOTHING i.e. O
        ### NOTE THESE VALUES ARE DIVIDED BY 2 IN NEXT CODE LINE
        pred = preds[id_num,]/2.0
    
        i = 0
        while i<max_len:
            prediction = []
            start = pred[i]
            if start in [0,1,2,3,4,5,6,7]:
                prediction.append(word_map[i])
                i += 1
                if i>=max_len: break
                while pred[i]==start+0.5:
                    if not word_map[i] in prediction:
                        prediction.append(word_map[i])
                    i += 1
                    if i>=max_len: break
            else:
                i += 1
            prediction = [x for x in prediction if x!=-1]
            if len(prediction)>4:
                all_predictions.append( (n, target_map_rev[int(start)], 
                                ' '.join([str(x) for x in prediction]) ) )
                
    # MAKE DATAFRAME
    df = pd.DataFrame(all_predictions)
    df.columns = ['id','class','predictionstring']
    
    return df


def calc_overlap(row):
    """
    Calculates the overlap between prediction and
    ground truth and overlap percentages used for determining
    true positives.
    """
    set_pred = set(row.predictionstring_pred.split(' '))
    set_gt = set(row.predictionstring_gt.split(' '))
    # Length of each and intersection
    len_gt = len(set_gt)
    len_pred = len(set_pred)
    inter = len(set_gt.intersection(set_pred))
    overlap_1 = inter / len_gt
    overlap_2 = inter/ len_pred
    return [overlap_1, overlap_2]


def score_feedback_comp(pred_df, gt_df):
    """
    A function that scores for the kaggle
        Student Writing Competition
        
    Uses the steps in the evaluation page here:
        https://www.kaggle.com/c/feedback-prize-2021/overview/evaluation
    """
    gt_df = gt_df[['id','discourse_type','predictionstring']] \
        .reset_index(drop=True).copy()
    pred_df = pred_df[['id','class','predictionstring']] \
        .reset_index(drop=True).copy()
    pred_df['pred_id'] = pred_df.index
    gt_df['gt_id'] = gt_df.index
    # Step 1. all ground truths and predictions for a given class are compared.
    joined = pred_df.merge(gt_df,
                           left_on=['id','class'],
                           right_on=['id','discourse_type'],
                           how='outer',
                           suffixes=('_pred','_gt')
                          )
    joined['predictionstring_gt'] = joined['predictionstring_gt'].fillna(' ')
    joined['predictionstring_pred'] = joined['predictionstring_pred'].fillna(' ')

    joined['overlaps'] = joined.apply(calc_overlap, axis=1)

    # 2. If the overlap between the ground truth and prediction is >= 0.5, 
    # and the overlap between the prediction and the ground truth >= 0.5,
    # the prediction is a match and considered a true positive.
    # If multiple matches exist, the match with the highest pair of overlaps is taken.
    joined['overlap1'] = joined['overlaps'].apply(lambda x: eval(str(x))[0])
    joined['overlap2'] = joined['overlaps'].apply(lambda x: eval(str(x))[1])


    joined['potential_TP'] = (joined['overlap1'] >= 0.5) & (joined['overlap2'] >= 0.5)
    joined['max_overlap'] = joined[['overlap1','overlap2']].max(axis=1)
    tp_pred_ids = joined.query('potential_TP') \
        .sort_values('max_overlap', ascending=False) \
        .groupby(['id','predictionstring_gt']).first()['pred_id'].values

    # 3. Any unmatched ground truths are false negatives
    # and any unmatched predictions are false positives.
    fp_pred_ids = [p for p in joined['pred_id'].unique() if p not in tp_pred_ids]

    matched_gt_ids = joined.query('potential_TP')['gt_id'].unique()
    unmatched_gt_ids = [c for c in joined['gt_id'].unique() if c not in matched_gt_ids]

    # Get numbers of each type
    TP = len(tp_pred_ids)
    FP = len(fp_pred_ids)
    FN = len(unmatched_gt_ids)
    #calc microf1
    my_f1_score = TP / (TP + 0.5*(FP+FN))
    return my_f1_score

def collate(d,train=True):
    mask_len = int(d["mask"].sum(axis=1).max())
    if train:
        return {"token" : d['token'][:,:mask_len],
                 "mask" : d['mask'][:,:mask_len],
                 "y" : d['y'][:,:mask_len],
                  "max_len" : mask_len}
    else:
        return {"token" : d['token'][:,:mask_len],
                 "mask" : d['mask'][:,:mask_len],
                  "max_len" : mask_len}

In [7]:
# ================================
# Main
# ================================
train = pd.read_csv(TRAIN_PATH)
IDS = train.id.unique()
id_array = np.array(IDS)

In [11]:
# ================================
# data load
# ================================
targets = np.load(DATA_DIR + f"targets_{max_len}.npy")
train_tokens = np.load(DATA_DIR + f"tokens_{max_len}.npy")
train_attention = np.load(DATA_DIR + f"attention_{max_len}.npy")

In [12]:
# ================================
# train
# ================================
with timer("longformer_large"):
    set_seed(SEED)
    oof = pd.DataFrame()
    oof_pred = np.ndarray((0,max_len,15))
    kf = KFold(n_splits=N_SPLITS, shuffle=SHUFFLE, random_state=SEED)
    for fold, (train_idx, valid_idx) in enumerate(kf.split(id_array)):
        print(f"fold{fold}:start")
        x_train_token, x_train_attention, y_train  = train_tokens[train_idx], train_attention[train_idx], targets[train_idx]
        x_val_token, x_val_attention, y_val  = train_tokens[valid_idx], train_attention[valid_idx], targets[valid_idx]
        train_val = train[train.id.isin(id_array[valid_idx])].reset_index(drop=True)
        
        # dataset
        train_ = TrainDataset( x_train_token, x_train_attention, y_train)
        val_ = TrainDataset( x_val_token, x_val_attention, y_val)
        
        # loader
        train_loader = DataLoader(dataset=train_, batch_size=BATCH_SIZE, shuffle = True ,pin_memory=True)
        val_loader = DataLoader(dataset=val_, batch_size=BATCH_SIZE, shuffle = False , pin_memory=True)
        
        # model
        model = custom_model()
        model = model.to(device)
        
        # optimizer, scheduler
        param_optimizer = list(model.named_parameters())
        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [
            {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': weight_decay},
            {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
        ]
        optimizer = AdamW(optimizer_grouped_parameters,
                          lr=lr,
                          betas=beta,
                          weight_decay=weight_decay,
                          )
        num_train_optimization_steps = int(len(train_loader) * n_epochs)
        num_warmup_steps = int(num_train_optimization_steps * num_warmup_steps_rate)
        scheduler = get_linear_schedule_with_warmup(optimizer,
                                                    num_warmup_steps=num_warmup_steps,
                                                    num_training_steps=num_train_optimization_steps)
        
        criterion = nn.BCEWithLogitsLoss()
        best_val = 0
        
        for epoch in range(n_epochs):
            print(f"============start epoch:{epoch}==============")
            model.train() 
            val_losses_batch = []
            scaler = GradScaler()
            for i, d in tqdm(enumerate(train_loader),total=len(train_loader)):
                d = collate(d)
                ids = d['token'].to(device)
                mask = d['mask'].to(device)
                labels = d['y'].to(device)
                #labels = labels.unsqueeze(-1)
                optimizer.zero_grad()
                with autocast():
                    output = model(ids,mask)
                    loss = criterion(output[mask == 1], labels[mask == 1])
                scaler.scale(loss).backward()
                #torch.nn.utils.clip_grad_norm_(model.parameters(), clip_grad_norm)
                scaler.step(optimizer)
                scaler.update()
                scheduler.step()
            
            y_pred2 = []
            val_preds = np.ndarray((0,max_len,15))
            val_len = np.ndarray(0)
            model.eval()  # switch model to the evaluation mode
            with torch.no_grad():  
                # Predicting on validation set
                
                for d in tqdm(val_loader,total=len(val_loader)):
                    # =========================
                    # data loader
                    # =========================
                    d = collate(d)
                    ids = d['token'].to(device)
                    mask = d['mask'].to(device)
                    with autocast():
                        outputs = model(ids, mask)
                    outputs = np.concatenate([outputs.sigmoid().detach().cpu().numpy(),
                                              np.zeros([len(outputs),max_len - d["max_len"],15])],axis=1)
                    val_preds = np.concatenate([val_preds, outputs], axis=0)
                    val_len = np.concatenate([val_len,np.array([d["max_len"] for i in range(len(ids))])],axis=0)
            val_preds_max = np.argmax(val_preds,axis=-1)
            oof_ = get_preds_collate( dataset='train', verbose=True, text_ids=id_array[valid_idx],
                                      preds = val_preds_max,preds_len=val_len)      
            # COMPUTE F1 SCORE
            f1s = []
            CLASSES = oof_['class'].unique()
            print()
            for c in CLASSES:
                pred_df = oof_.loc[oof_['class']==c].copy()
                gt_df = train_val.loc[train_val['discourse_type']==c].copy()
                f1 = score_feedback_comp(pred_df, gt_df)
                print(c,f1)
                f1s.append(f1)
            score = np.mean(f1s)
            LOGGER.info(f'{fold},{epoch}:{i},val_score:{score}')
            if best_val < score:
                print("save model weight")
                best_val = score
                best_val_preds = val_preds
                oof_best = oof_.copy()
                torch.save(model.state_dict(), MODEL_PATH_BASE + f"_{fold}.pth") # Saving current best model
        oof_best["fold"] = fold
        oof_best.to_csv(OUTPUT_DIR + f"/ex{ex}_oof_{fold}.csv",index=False)
        np.save(OUTPUT_DIR + f"/ex{ex}_oof_npy_{fold}.npy",best_val_preds)

fold0:start


Some weights of the model checkpoint at allenai/longformer-large-4096 were not used when initializing LongformerModel: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing LongformerModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).




100%|██████████| 3119/3119 [31:33<00:00,  1.65it/s]
100%|██████████| 780/780 [03:16<00:00,  3.98it/s]
100%|██████████| 3119/3119 [00:22<00:00, 139.68it/s]
2022-01-19 03:27:48,979 - INFO - Note: NumExpr detected 12 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
2022-01-19 03:27:48,980 - INFO - NumExpr defaulting to 8 threads.



Lead 0.8244919503826867
Claim 0.598008030453147
Evidence 0.6693015573383672
Position 0.6931382793451759
Concluding Statement 0.8076486179477471
Counterclaim 0.49957947855340623


2022-01-19 03:27:52,708 - INFO - 0,0:3118,val_score:0.638386100365825


Rebuttal 0.3765347885402456
save model weight


100%|██████████| 3119/3119 [31:36<00:00,  1.64it/s]
100%|██████████| 780/780 [03:17<00:00,  3.95it/s]
100%|██████████| 3119/3119 [00:20<00:00, 149.21it/s]



Lead 0.8230042016806722
Claim 0.5840109953040888
Position 0.6792452830188679
Counterclaim 0.5201708590412909
Evidence 0.7052485017252861


2022-01-19 04:03:15,143 - INFO - 0,1:3118,val_score:0.6462100393454898


Concluding Statement 0.7942868478476492
Rebuttal 0.41750358680057387
save model weight


100%|██████████| 3119/3119 [31:38<00:00,  1.64it/s]
100%|██████████| 780/780 [03:20<00:00,  3.89it/s]
100%|██████████| 3119/3119 [00:21<00:00, 143.00it/s]



Lead 0.827892400104466
Claim 0.6065274449389478
Counterclaim 0.5382786495482644
Rebuttal 0.44505494505494503
Evidence 0.7259547383309759
Position 0.6964631356677811


2022-01-19 04:38:50,135 - INFO - 0,2:3118,val_score:0.6679394585810322


Concluding Statement 0.8354048964218456
save model weight


100%|██████████| 3119/3119 [31:44<00:00,  1.64it/s]
100%|██████████| 780/780 [03:18<00:00,  3.94it/s]
100%|██████████| 3119/3119 [00:21<00:00, 142.99it/s]



Lead 0.8242105263157895
Claim 0.6153592978606692
Evidence 0.7298758049678012
Position 0.7045148247978437


2022-01-19 05:14:29,509 - INFO - 0,3:3118,val_score:0.6715735791189662


Concluding Statement 0.8253850436073483
Counterclaim 0.5418994413407822
Rebuttal 0.45977011494252873
save model weight


100%|██████████| 3119/3119 [31:48<00:00,  1.63it/s]
100%|██████████| 780/780 [03:20<00:00,  3.88it/s]
100%|██████████| 3119/3119 [00:21<00:00, 143.03it/s]



Lead 0.8167511336356361
Claim 0.6144834378725831
Position 0.6993147250543206
Evidence 0.7233589326755984


2022-01-19 05:50:14,970 - INFO - 0,4:3118,val_score:0.6683251206746395


Concluding Statement 0.8248312078019505
Rebuttal 0.4643765903307888
Counterclaim 0.5351598173515981


100%|██████████| 3119/3119 [31:42<00:00,  1.64it/s]
100%|██████████| 780/780 [03:18<00:00,  3.92it/s]
100%|██████████| 3119/3119 [00:21<00:00, 142.23it/s]



Lead 0.8168264110756124
Position 0.7009093971034018
Claim 0.6119661711404107
Evidence 0.7234927234927235


2022-01-19 06:25:42,517 - INFO - 0,5:3118,val_score:0.6689662782538116


Concluding Statement 0.8304549675023213
Rebuttal 0.45431789737171463
Counterclaim 0.5447963800904977
fold1:start


Some weights of the model checkpoint at allenai/longformer-large-4096 were not used when initializing LongformerModel: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing LongformerModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).




100%|██████████| 3119/3119 [31:38<00:00,  1.64it/s]
100%|██████████| 780/780 [03:34<00:00,  3.64it/s]
100%|██████████| 3119/3119 [00:22<00:00, 135.94it/s]



Lead 0.8111658456486043
Position 0.6654797934840663
Claim 0.570453888407054
Evidence 0.6861877764625257
Counterclaim 0.5055041831792162


2022-01-19 07:01:29,564 - INFO - 1,0:3118,val_score:0.6282022533604491


Rebuttal 0.357487922705314
Concluding Statement 0.8011363636363636
save model weight


100%|██████████| 3119/3119 [31:48<00:00,  1.63it/s]
100%|██████████| 780/780 [03:36<00:00,  3.60it/s]
100%|██████████| 3119/3119 [00:21<00:00, 145.50it/s]



Lead 0.8197251414713015
Position 0.6949647705791373
Claim 0.5995160654999718
Evidence 0.7066674905765309
Counterclaim 0.4814432989690722
Rebuttal 0.4158125915080527


2022-01-19 07:37:24,031 - INFO - 1,1:3118,val_score:0.6482837782479026


Concluding Statement 0.8198570891312523
save model weight


100%|██████████| 3119/3119 [31:50<00:00,  1.63it/s]
100%|██████████| 780/780 [03:21<00:00,  3.88it/s]
100%|██████████| 3119/3119 [00:21<00:00, 145.01it/s]



Lead 0.8238172920065253
Position 0.7002724795640327
Claim 0.6185258964143426
Evidence 0.7182409485983998
Counterclaim 0.5326135852451642


2022-01-19 08:13:11,187 - INFO - 1,2:3118,val_score:0.6697372200939685


Rebuttal 0.45962732919254656
Concluding Statement 0.8350630096367679
save model weight


100%|██████████| 3119/3119 [31:53<00:00,  1.63it/s]
100%|██████████| 780/780 [03:21<00:00,  3.87it/s]
100%|██████████| 3119/3119 [00:21<00:00, 144.61it/s]



Position 0.7050754458161865
Claim 0.6360865910046238
Evidence 0.714318547460354
Lead 0.8288288288288288
Counterclaim 0.528441879637263
Rebuttal 0.45662100456621


2022-01-19 08:49:01,691 - INFO - 1,3:3118,val_score:0.671180765067831


Concluding Statement 0.8288930581613508
save model weight


100%|██████████| 3119/3119 [31:52<00:00,  1.63it/s]
100%|██████████| 780/780 [03:20<00:00,  3.90it/s]
100%|██████████| 3119/3119 [00:21<00:00, 144.75it/s]



Position 0.7024668568551771
Claim 0.623512909761546
Evidence 0.7211099204782584
Lead 0.8212613605067475
Counterclaim 0.5353356890459364
Rebuttal 0.47348951911220716


2022-01-19 09:24:49,877 - INFO - 1,4:3118,val_score:0.6723984787139953


Concluding Statement 0.8296130952380952
save model weight


100%|██████████| 3119/3119 [31:51<00:00,  1.63it/s]
100%|██████████| 780/780 [03:20<00:00,  3.90it/s]
100%|██████████| 3119/3119 [00:21<00:00, 143.95it/s]



Position 0.6983967935871743
Evidence 0.720900140646976
Claim 0.6224131607900666
Lead 0.8242424242424242
Counterclaim 0.5300133392618942
Rebuttal 0.45780206435944143


2022-01-19 10:00:37,322 - INFO - 1,5:3118,val_score:0.6692939151930534


Concluding Statement 0.8312894834633965
fold2:start


Some weights of the model checkpoint at allenai/longformer-large-4096 were not used when initializing LongformerModel: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing LongformerModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).




100%|██████████| 3119/3119 [31:51<00:00,  1.63it/s]
100%|██████████| 780/780 [03:06<00:00,  4.18it/s]
100%|██████████| 3119/3119 [00:24<00:00, 129.23it/s]



Lead 0.7972177635098984
Position 0.6423022598870056
Claim 0.600750547274054
Evidence 0.6975435305153309
Concluding Statement 0.8044545111362779
Rebuttal 0.37212360289283364


2022-01-19 10:36:09,844 - INFO - 2,0:3118,val_score:0.6295012563269288


Counterclaim 0.4921165790731008
save model weight


100%|██████████| 3119/3119 [31:52<00:00,  1.63it/s]
100%|██████████| 780/780 [03:22<00:00,  3.86it/s]
100%|██████████| 3119/3119 [00:22<00:00, 140.30it/s]



Lead 0.8155136268343816
Position 0.6857635893011217
Claim 0.601051500932783
Evidence 0.719372793105512


2022-01-19 11:11:53,625 - INFO - 2,1:3118,val_score:0.6501740403996585


Concluding Statement 0.8411352746037597
Counterclaim 0.4939759036144578
Rebuttal 0.3944055944055944
save model weight


100%|██████████| 3119/3119 [31:44<00:00,  1.64it/s]
100%|██████████| 780/780 [03:04<00:00,  4.24it/s]
100%|██████████| 3119/3119 [00:22<00:00, 139.61it/s]



Lead 0.8146341463414634
Position 0.6932256932256933
Claim 0.5996029892573563
Evidence 0.7128919433024483
Concluding Statement 0.8350206224221972
Counterclaim 0.5512765036780615


2022-01-19 11:47:18,420 - INFO - 2,2:3118,val_score:0.6673108360198118


Rebuttal 0.4645239539114615
save model weight


100%|██████████| 3119/3119 [31:46<00:00,  1.64it/s]
100%|██████████| 780/780 [03:22<00:00,  3.86it/s]
100%|██████████| 3119/3119 [00:22<00:00, 139.67it/s]



Lead 0.8151581243184297
Position 0.693654990085922
Claim 0.623730625334046
Evidence 0.7264848624913335
Concluding Statement 0.8402167819099233
Counterclaim 0.5427180168216025


2022-01-19 12:23:03,177 - INFO - 2,3:3118,val_score:0.6701089127976079


Rebuttal 0.44879898862199746
save model weight


100%|██████████| 3119/3119 [31:49<00:00,  1.63it/s]
100%|██████████| 780/780 [03:04<00:00,  4.24it/s]
100%|██████████| 3119/3119 [00:22<00:00, 139.69it/s]



Lead 0.812150279776179
Position 0.6981547316742848
Claim 0.6229385307346327
Evidence 0.7319835277968428


2022-01-19 12:58:33,201 - INFO - 2,4:3118,val_score:0.6699654351044476


Concluding Statement 0.8287813147485402
Counterclaim 0.5381850853548967
Rebuttal 0.4575645756457565


100%|██████████| 3119/3119 [31:52<00:00,  1.63it/s]
100%|██████████| 780/780 [03:04<00:00,  4.23it/s]
100%|██████████| 3119/3119 [00:22<00:00, 140.24it/s]



Lead 0.8118179398456216
Position 0.697995620683847
Claim 0.6224500581333897
Evidence 0.7251179510222422
Concluding Statement 0.8277520814061055
Counterclaim 0.5400516795865633


2022-01-19 13:33:56,183 - INFO - 2,5:3118,val_score:0.6685879657428357


Rebuttal 0.45493042952208107
fold3:start


Some weights of the model checkpoint at allenai/longformer-large-4096 were not used when initializing LongformerModel: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing LongformerModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).




100%|██████████| 3119/3119 [31:50<00:00,  1.63it/s]
100%|██████████| 780/780 [02:57<00:00,  4.39it/s]
100%|██████████| 3119/3119 [00:24<00:00, 129.64it/s]



Position 0.6543762305351709
Claim 0.5592059438123984
Evidence 0.6570430426471557
Concluding Statement 0.8182861514919664
Lead 0.7978910369068541


2022-01-19 14:09:18,885 - INFO - 3,0:3118,val_score:0.6182162866100187


Counterclaim 0.4904661016949153
Rebuttal 0.3502454991816694
save model weight


100%|██████████| 3119/3119 [31:51<00:00,  1.63it/s]
100%|██████████| 780/780 [02:58<00:00,  4.38it/s]
100%|██████████| 3119/3119 [00:22<00:00, 141.62it/s]



Position 0.6911966987620357
Claim 0.6062060497823418
Evidence 0.7130476649013
Concluding Statement 0.835353347007272
Lead 0.8241361526560083


2022-01-19 14:44:37,299 - INFO - 3,1:3118,val_score:0.6443028821623636


Counterclaim 0.4802139037433155
Rebuttal 0.3599663582842725
save model weight


100%|██████████| 3119/3119 [31:48<00:00,  1.63it/s]
100%|██████████| 780/780 [02:57<00:00,  4.38it/s]
100%|██████████| 3119/3119 [00:22<00:00, 140.72it/s]



Position 0.7037854354099474
Claim 0.6246918872575287
Evidence 0.7292978773034756
Concluding Statement 0.8377061469265368
Lead 0.8182057353328072


2022-01-19 15:20:00,286 - INFO - 3,2:3118,val_score:0.6652059154591935


Counterclaim 0.5173439048562933
Rebuttal 0.4254104211277659
save model weight


100%|██████████| 3119/3119 [31:45<00:00,  1.64it/s]
100%|██████████| 780/780 [02:53<00:00,  4.49it/s]
100%|██████████| 3119/3119 [00:19<00:00, 160.80it/s]



Position 0.7052161976664378
Claim 0.6123454146367578
Evidence 0.709325513196481
Concluding Statement 0.8462251901317009
Lead 0.816953642384106


2022-01-19 15:55:12,719 - INFO - 3,3:3118,val_score:0.6629571429452588


Counterclaim 0.5075960679177838
Rebuttal 0.4430379746835443


100%|██████████| 3119/3119 [31:22<00:00,  1.66it/s]
100%|██████████| 780/780 [02:53<00:00,  4.48it/s]
100%|██████████| 3119/3119 [00:19<00:00, 159.96it/s]



Position 0.7047945205479452
Claim 0.6224425226745413
Evidence 0.7287230989956959
Concluding Statement 0.845299777942265
Lead 0.8262004175365344


2022-01-19 16:29:52,846 - INFO - 3,4:3118,val_score:0.6657081775376174


Counterclaim 0.5024342745861733
Rebuttal 0.430062630480167
save model weight


100%|██████████| 3119/3119 [31:24<00:00,  1.65it/s]
100%|██████████| 780/780 [02:53<00:00,  4.49it/s]
100%|██████████| 3119/3119 [00:19<00:00, 160.80it/s]



Position 0.7004701141705842
Claim 0.6132579327678291
Evidence 0.7217200920675911
Concluding Statement 0.8377679231337768
Lead 0.8258811152025249


2022-01-19 17:04:44,327 - INFO - 3,5:3118,val_score:0.6661717581070915


Counterclaim 0.5205104831358249
Rebuttal 0.4435946462715105
save model weight
fold4:start


Some weights of the model checkpoint at allenai/longformer-large-4096 were not used when initializing LongformerModel: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing LongformerModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).




100%|██████████| 3119/3119 [31:21<00:00,  1.66it/s]
100%|██████████| 780/780 [02:46<00:00,  4.68it/s]
100%|██████████| 3118/3118 [00:21<00:00, 143.99it/s]



Lead 0.8228600982673907
Claim 0.586342355916698
Evidence 0.6928797854181907
Concluding Statement 0.8104256516032252
Position 0.6619013581129378


2022-01-19 17:39:34,227 - INFO - 4,0:3118,val_score:0.6287593823178385


Counterclaim 0.4987012987012987
Rebuttal 0.3282051282051282
save model weight


100%|██████████| 3119/3119 [31:54<00:00,  1.63it/s]
100%|██████████| 780/780 [03:05<00:00,  4.21it/s]
100%|██████████| 3118/3118 [00:22<00:00, 139.24it/s]



Lead 0.8248348745046236
Evidence 0.7041631874030777
Claim 0.5825725182017797
Concluding Statement 0.8187066974595842
Position 0.6839902336937566


2022-01-19 18:15:02,742 - INFO - 4,1:3118,val_score:0.6509250433529447


Counterclaim 0.5227272727272727
Rebuttal 0.41948051948051945
save model weight


100%|██████████| 3119/3119 [31:26<00:00,  1.65it/s]
100%|██████████| 780/780 [02:46<00:00,  4.69it/s]
100%|██████████| 3118/3118 [00:19<00:00, 160.64it/s]



Lead 0.8192256341789053
Claim 0.6117386042759178
Evidence 0.6984051416329445
Position 0.6984180410636149


2022-01-19 18:49:49,297 - INFO - 4,2:3118,val_score:0.6631572717981126


Concluding Statement 0.8262150220913107
Counterclaim 0.5387387387387388
Rebuttal 0.44935972060535506
save model weight


100%|██████████| 3119/3119 [31:35<00:00,  1.65it/s]
100%|██████████| 780/780 [03:05<00:00,  4.20it/s]
100%|██████████| 3118/3118 [00:21<00:00, 144.38it/s]



Lead 0.820812317494027
Position 0.6982568962599425
Claim 0.6096029068258499
Evidence 0.714572864321608
Counterclaim 0.5413595413595413


2022-01-19 19:25:05,663 - INFO - 4,3:3118,val_score:0.666751420558991


Concluding Statement 0.8304435115897062
Rebuttal 0.4522119060622611
save model weight


100%|██████████| 3119/3119 [31:46<00:00,  1.64it/s]
100%|██████████| 780/780 [02:50<00:00,  4.59it/s]
100%|██████████| 3118/3118 [00:22<00:00, 139.10it/s]



Lead 0.8209074024940303
Position 0.7029021976178493
Evidence 0.7183316738577277
Claim 0.6091820580474934
Concluding Statement 0.8334849863512284
Counterclaim 0.5359531772575251


2022-01-19 20:00:18,735 - INFO - 4,4:3118,val_score:0.6653745091264629


Rebuttal 0.43686006825938567


100%|██████████| 3119/3119 [31:16<00:00,  1.66it/s]
100%|██████████| 780/780 [02:46<00:00,  4.68it/s]
100%|██████████| 3118/3118 [00:19<00:00, 159.19it/s]



Lead 0.8227176220806794
Position 0.6990810359231412
Evidence 0.7202504052316807
Claim 0.6076226179318963


2022-01-19 20:34:46,054 - INFO - 4,5:3118,val_score:0.6652452075375989


Concluding Statement 0.8241938422299143
Counterclaim 0.5394120153387303
Rebuttal 0.4434389140271493


2022-01-19 20:34:46,612 - INFO - [longformer_large] done in 63741 s


In [13]:
oof = pd.DataFrame()
for i in range(5):
    oof__ = pd.read_csv(OUTPUT_DIR + f"/ex{ex}_oof_{i}.csv")
    oof = pd.concat([oof,oof__]).reset_index(drop=True)
# COMPUTE F1 SCORE
f1s = []
CLASSES = oof['class'].unique()
for c in CLASSES:
    pred_df = oof.loc[oof['class']==c].copy()
    gt_df = train.loc[train['discourse_type']==c].copy()
    f1 = score_feedback_comp(pred_df, gt_df)
    print(c,f1)
    f1s.append(f1)
score = np.mean(f1s)
LOGGER.info(f'CV:{score}')

Lead 0.8218649517684887
Claim 0.6171306481638179
Evidence 0.7227554284936356
Position 0.7000402630519393
Concluding Statement 0.8329197377680655
Counterclaim 0.5368616691742633


2022-01-19 20:40:14,043 - INFO - CV:0.6696554829787125


Rebuttal 0.4560156824307768
