In [1]:
!curl https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py -q
!python pytorch-xla-env-setup.py --version nightly --apt-packages libomp5 libopenblas-dev
!export XLA_USE_BF16=1

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  5116  100  5116    0     0  16037      0 --:--:-- --:--:-- --:--:-- 16037
Updating... This may take around 2 minutes.
Updating TPU runtime to pytorch-nightly ...
Found existing installation: torch 1.5.0
Uninstalling torch-1.5.0:
  Successfully uninstalled torch-1.5.0
Found existing installation: torchvision 0.6.0a0+35d732a
Uninstalling torchvision-0.6.0a0+35d732a:
Done updating TPU runtime
  Successfully uninstalled torchvision-0.6.0a0+35d732a
Copying gs://tpu-pytorch/wheels/torch-nightly-cp37-cp37m-linux_x86_64.whl...

Operation completed over 1 objects/121.6 MiB.                                    
Copying gs://tpu-pytorch/wheels/torch_xla-nightly-cp37-cp37m-linux_x86_64.whl...

Operation completed over 1 objects/128.3 MiB.                                    
Copying gs://tpu-pytorch/wheels/torchvision-nig

In [2]:
! pip install transformers -q

You should consider upgrading via the '/opt/conda/bin/python3.7 -m pip install --upgrade pip' command.[0m


In [3]:
import re
import os
import sys
import pandas as pd
from pathlib import Path
import matplotlib.cm as cm
import numpy as np
import pandas as pd
from typing import *
from tqdm.notebook import tqdm
from sklearn.utils.extmath import softmax
from sklearn import model_selection
from sklearn.metrics import classification_report, f1_score, accuracy_score

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import transformers
from transformers import AdamW



In [5]:
import torch_xla.core.xla_model as xm

In [6]:
def seed_all(seed = 42):
  """
  Fix seed for reproducibility
  """
  # python RNG
  import random
  random.seed(seed)

  # pytorch RNGs
  import torch
  torch.manual_seed(seed)
  torch.backends.cudnn.deterministic = True
  if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)

  # numpy RNG
  import numpy as np
  np.random.seed(seed)

In [7]:
class config:
  SEED = 42
  KFOLD = 5
  SAVE_DIR = '.'
  TRAIN_FILE = '../input/mclwic/data.csv'
  TEST_FILE = ''
  OOF_FILE = os.path.join(SAVE_DIR, 'oof.csv')
  MAX_LEN = 172
  MODEL = 'xlm-roberta-base'
  TOKENIZER = transformers.AutoTokenizer.from_pretrained(MODEL)
  EPOCHS = 20
  TRAIN_BATCH_SIZE = 16
  VALID_BATCH_SIZE = 16
  DEVICE = xm.xla_device(1)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=512.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=5069051.0, style=ProgressStyle(descript…




In [8]:
class AverageMeter:
    """
    Computes and stores the average and current value
    Source : https://www.kaggle.com/abhishek/bert-base-uncased-using-pytorch/
    """
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [9]:
class EarlyStopping:
    """
    Early stopping utility
    Source : https://www.kaggle.com/abhishek/bert-base-uncased-using-pytorch/
    """
    
    def __init__(self, patience=7, mode="max", delta=0.001):
        self.patience = patience
        self.counter = 0
        self.mode = mode
        self.best_score = None
        self.early_stop = False
        self.delta = delta
        if self.mode == "min":
            self.val_score = np.Inf
        else:
            self.val_score = -np.Inf

    def __call__(self, epoch_score, model, model_path):
        if self.mode == "min":
            score = -1.0 * epoch_score
        else:
            score = np.copy(epoch_score)
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(epoch_score, model, model_path)
        elif score < self.best_score + self.delta:
            self.counter += 1
            print('EarlyStopping counter: {} out of {}'.format(self.counter, self.patience))
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(epoch_score, model, model_path)
            self.counter = 0

    def save_checkpoint(self, epoch_score, model, model_path):
        if epoch_score not in [-np.inf, np.inf, -np.nan, np.nan]:
            print('Validation score improved ({} --> {}). Saving model!'.format(self.val_score, epoch_score))
            xm.save(model.state_dict(), model_path)
        self.val_score = epoch_score

In [10]:
def preprocess(text):
  text = text.strip()
  text = ' '.join(text.split())
  return text

In [11]:
def process_data(word1, text1, word2, text2, tokenizer, max_len, label):
  
  text1 = preprocess(text1)
  text2 = preprocess(text2)
  
  text = '<s>' + word1 + '</s>' + text1 + '</s>' + word2 + '</s>' + text2 + '</s>'

  token_ids = tokenizer.encode(text, add_special_tokens=False)
  mask = [1] * len(token_ids)

  padding = max_len - len(token_ids)
  
  if padding>=0:
    token_ids = token_ids + ([0] * padding)
    mask = mask + ([0] * padding)
  else:
    token_ids = token_ids[0:max_len]
    mask = mask[0:max_len]

  label = 1 if label=='T' else 0

  assert len(token_ids)==max_len
  assert len(mask)==max_len

  return {'text':text,
          'ids':token_ids,
          'mask':mask,
          'label':label
          }

In [12]:
class Dataset:
    def __init__(self, pos, lemma, word1, text1, word2, text2, label):
        self.pos = pos
        self.lemma = lemma
        self.word1 = word1
        self.word2 = word2
        self.text1 = text1
        self.text2 = text2
        self.label = label

        self.tokenizer = config.TOKENIZER
        self.max_len = config.MAX_LEN
    
    def __len__(self):
        return len(self.text1)

    def __getitem__(self, item):
        data = process_data(
            self.word1[item],
            self.text1[item], 
            self.word2[item],
            self.text2[item],
            self.tokenizer,
            self.max_len,
            self.label[item],
        )

        return {
            'ids': torch.tensor(data["ids"], dtype=torch.long),
            'mask': torch.tensor(data["mask"], dtype=torch.long),
            'text': data['text'],
            'label': data['label'],
        }

In [13]:
def get_loss(logits, targets):
    loss_fn = nn.CrossEntropyLoss()
    return loss_fn(logits, targets)

In [14]:
class SimModel(nn.Module):
    def __init__(self, num_class):
        super(SimModel, self).__init__()
        self.backbone = transformers.XLMRobertaModel.from_pretrained(config.MODEL)
        self.drop = nn.Dropout(0.2)
        self.cls = nn.Linear(768, num_class)

    def forward(self, ids, mask):
        outputs = self.backbone(input_ids=ids, attention_mask=mask)
        pooled = outputs[1]
        x = self.drop(pooled)
        logits = self.cls(x)
        return logits

In [15]:
def train_fn(data_loader, model, optimizer, device):
  model.train()
  losses = AverageMeter()
  tk0 = tqdm(data_loader, total=len(data_loader))
  
  for bi, d in enumerate(tk0):
    ids = d['ids']
    mask = d['mask']
    label = d['label']

    ids = ids.to(device, dtype=torch.long)
    label = label.to(device, dtype=torch.long)
    mask = mask.to(device, dtype=torch.long)

    model.zero_grad()
    logits = model(ids, mask)
    
    loss = get_loss(logits, label)

    loss.backward()
    # optimizer.step()
    xm.optimizer_step(optimizer, barrier=True)

    losses.update(loss.item(), ids.size(0))
    tk0.set_postfix(loss=losses.avg)


In [16]:
def eval_fn(data_loader, model, device):
  model.eval()
  losses = AverageMeter()
  tk0 = tqdm(data_loader, total=len(data_loader))
  yt, yp = [], []

  for bi, d in enumerate(tk0):
    ids = d['ids']
    mask = d['mask']
    label = d['label']

    ids = ids.to(device, dtype=torch.long)
    label = label.to(device, dtype=torch.long)
    mask = mask.to(device, dtype=torch.long)

    with torch.no_grad():
        logits = model(ids, mask)        
       
    loss = get_loss(logits, label)
    preds = torch.softmax(logits, axis=-1).detach().cpu().numpy()

    pred_labels = np.argmax(preds, axis=1).flatten()
    ground_labels = label.to('cpu').numpy()

    yt = yt + ground_labels.tolist()
    yp = yp + pred_labels.tolist()

    losses.update(loss.item(), ids.size(0))
    tk0.set_postfix(loss=losses.avg)

  return accuracy_score(yt, yp)


In [17]:
def test_fn(data_loader, model, device):
  model.eval()
  tk0 = tqdm(data_loader, total=len(data_loader))
  test_preds = []

  for bi, d in enumerate(tk0):
    ids = d['ids']
    mask = d['mask']
    label = d['label']
    
    ids = ids.to(device, dtype=torch.long)
    mask = mask.to(device, dtype=torch.long)
    label = label.to(device, dtype=torch.long)

    with torch.no_grad():
        logits = model(ids, mask)        
    
    preds = torch.softmax(logits, axis=-1).detach().cpu().numpy()
    preds = preds[:, 1]
    test_preds = test_preds + preds.tolist()

  return test_preds

In [18]:
def run(df_train, df_val, fold=None):
  train_dataset = Dataset(
        pos = df_train.pos.values,
        lemma = df_train.lemma.values,
        word1 = df_train.word1.values,
        text1 = df_train.sentence1.values,
        word2 = df_train.word2.values,
        text2 = df_train.sentence2.values,
        label = df_train.target.values
    )
  
  valid_dataset = Dataset(
        pos = df_val.pos.values,
        lemma = df_val.lemma.values,
        word1 = df_val.word1.values,
        text1 = df_val.sentence1.values,
        word2 = df_val.word2.values,
        text2 = df_val.sentence2.values,
        label = df_val.target.values
    )
  
  train_data_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=config.TRAIN_BATCH_SIZE,
        num_workers=4
    )

  valid_data_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=config.VALID_BATCH_SIZE,
        num_workers=2
    )
  
  
  model = SimModel(num_class=2)
  device = config.DEVICE
  model.to(device)

  lr = 5e-6
  param_optimizer = list(model.named_parameters())
  no_decay = ['bias', 'gamma', 'beta']
  optimizer_grouped_parameters = [
      {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
      'weight_decay_rate': 0.01},
      {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
      'weight_decay_rate': 0.0}
  ]
  optimizer = AdamW(optimizer_grouped_parameters, lr=lr)

  es = EarlyStopping(patience=6, mode="max")

  print('Starting training....')
  for epoch in range(config.EPOCHS):
    train_fn(train_data_loader, model, optimizer, device)
    valid_loss = eval_fn(valid_data_loader, model, device)
    print(f'Epoch :{epoch + 1} | Validation Score :{valid_loss}')
    if fold is None:
      es(valid_loss, model, model_path=os.path.join(config.SAVE_DIR, f"model.bin"))
    else:
      es(valid_loss, model, model_path=os.path.join(config.SAVE_DIR, f"model_{fold}.bin"))
    if es.early_stop:
      print('Early stopping')
      break

  print('Predicting for OOF')
  if fold is None:
    model.load_state_dict(torch.load(os.path.join(config.SAVE_DIR, 'model.bin')))
  else:
    model.load_state_dict(torch.load(os.path.join(config.SAVE_DIR, f'model_{fold}.bin')))
    
  model.to(device)
  
  test_predictions = test_fn(valid_data_loader, model, device)
  return test_predictions

In [19]:
def run_fold(fold_idx):
  """
    Perform k-fold cross-validation
  """

  seed_all(config.SEED)
  scores = pd.DataFrame()

  df_train = pd.read_csv(config.TRAIN_FILE)
  # df_val = pd.read_csv(config.VAL_FILE)
  
  # concatenating train and validation set
  train = df_train #pd.concat([df_train, df_val]).reset_index()
  
  # dividing folds
  kf = model_selection.StratifiedKFold(n_splits=config.KFOLD, shuffle=False, random_state=config.SEED)
  idx = None

  for fold, (train_idx, val_idx) in enumerate(kf.split(X=train, y=train.lang.values)):
      train.loc[val_idx, 'kfold'] = fold
      if fold==fold_idx:
        idx = val_idx
  
  train_augment = train.copy()
  
  def swap_columns(df, c1, c2):
    df['temp'] = df[c1]
    df[c1] = df[c2]
    df[c2] = df['temp']
    df.drop(columns=['temp'], inplace=True)

  swap_columns(train_augment, 'sentence1', 'sentence2')
  swap_columns(train_augment, 'word1', 'word2')
  
  if os.path.isfile(config.OOF_FILE):
    scores = pd.read_csv(config.OOF_FILE)
    print('Found oof file')
  else:
    scores = train.copy()
    scores['oof'] = 0
    scores.to_csv(config.OOF_FILE, index=False)
    print('Created oof file')
  
  train_combined = pd.concat([train, train_augment], axis=0)
  
  df_train = train_combined[train_combined.kfold!=fold_idx]
  df_val = train[train.kfold==fold_idx]

  y = run(df_train, df_val, fold_idx)
  scores.loc[idx, 'oof'] = y
  
  scores.to_csv(config.OOF_FILE, index=False)


In [20]:
run_fold(0)



Created oof file


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1115590446.0, style=ProgressStyle(descr…


Starting training....


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :1 | Validation Score :0.52
Validation score improved (-inf --> 0.52). Saving model!


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :2 | Validation Score :0.6857692307692308
Validation score improved (0.52 --> 0.6857692307692308). Saving model!


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :3 | Validation Score :0.7157692307692308
Validation score improved (0.6857692307692308 --> 0.7157692307692308). Saving model!


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :4 | Validation Score :0.7126923076923077
EarlyStopping counter: 1 out of 6


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :5 | Validation Score :0.7257692307692307
Validation score improved (0.7157692307692308 --> 0.7257692307692307). Saving model!


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :6 | Validation Score :0.7315384615384616
Validation score improved (0.7257692307692307 --> 0.7315384615384616). Saving model!


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :7 | Validation Score :0.7296153846153847
EarlyStopping counter: 1 out of 6


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :8 | Validation Score :0.7261538461538461
EarlyStopping counter: 2 out of 6


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :9 | Validation Score :0.7396153846153846
Validation score improved (0.7315384615384616 --> 0.7396153846153846). Saving model!


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :10 | Validation Score :0.7419230769230769
Validation score improved (0.7396153846153846 --> 0.7419230769230769). Saving model!


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :11 | Validation Score :0.7492307692307693
Validation score improved (0.7419230769230769 --> 0.7492307692307693). Saving model!


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :12 | Validation Score :0.7480769230769231
EarlyStopping counter: 1 out of 6


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :13 | Validation Score :0.7473076923076923
EarlyStopping counter: 2 out of 6


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :14 | Validation Score :0.7334615384615385
EarlyStopping counter: 3 out of 6


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :15 | Validation Score :0.7392307692307692
EarlyStopping counter: 4 out of 6


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :16 | Validation Score :0.7342307692307692
EarlyStopping counter: 5 out of 6


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :17 | Validation Score :0.7503846153846154
Validation score improved (0.7492307692307693 --> 0.7503846153846154). Saving model!


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :18 | Validation Score :0.7488461538461538
EarlyStopping counter: 1 out of 6


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :19 | Validation Score :0.7565384615384615
Validation score improved (0.7503846153846154 --> 0.7565384615384615). Saving model!


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :20 | Validation Score :0.75
EarlyStopping counter: 1 out of 6
Predicting for OOF


HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))




In [21]:
run_fold(1)



Found oof file
Starting training....


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :1 | Validation Score :0.5396153846153846
Validation score improved (-inf --> 0.5396153846153846). Saving model!


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :2 | Validation Score :0.6823076923076923
Validation score improved (0.5396153846153846 --> 0.6823076923076923). Saving model!


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :3 | Validation Score :0.7038461538461539
Validation score improved (0.6823076923076923 --> 0.7038461538461539). Saving model!


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :4 | Validation Score :0.7061538461538461
Validation score improved (0.7038461538461539 --> 0.7061538461538461). Saving model!


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :5 | Validation Score :0.6961538461538461
EarlyStopping counter: 1 out of 6


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :6 | Validation Score :0.6846153846153846
EarlyStopping counter: 2 out of 6


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :7 | Validation Score :0.6796153846153846
EarlyStopping counter: 3 out of 6


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :8 | Validation Score :0.6961538461538461
EarlyStopping counter: 4 out of 6


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :9 | Validation Score :0.7
EarlyStopping counter: 5 out of 6


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :10 | Validation Score :0.6911538461538461
EarlyStopping counter: 6 out of 6
Early stopping
Predicting for OOF


HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))




In [22]:
run_fold(2)



Found oof file
Starting training....


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :1 | Validation Score :0.49230769230769234
Validation score improved (-inf --> 0.49230769230769234). Saving model!


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :2 | Validation Score :0.681923076923077
Validation score improved (0.49230769230769234 --> 0.681923076923077). Saving model!


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :3 | Validation Score :0.7076923076923077
Validation score improved (0.681923076923077 --> 0.7076923076923077). Saving model!


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :4 | Validation Score :0.71
Validation score improved (0.7076923076923077 --> 0.71). Saving model!


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :5 | Validation Score :0.7196153846153847
Validation score improved (0.71 --> 0.7196153846153847). Saving model!


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :6 | Validation Score :0.7342307692307692
Validation score improved (0.7196153846153847 --> 0.7342307692307692). Saving model!


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :7 | Validation Score :0.7353846153846154
Validation score improved (0.7342307692307692 --> 0.7353846153846154). Saving model!


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :8 | Validation Score :0.7457692307692307
Validation score improved (0.7353846153846154 --> 0.7457692307692307). Saving model!


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :9 | Validation Score :0.7488461538461538
Validation score improved (0.7457692307692307 --> 0.7488461538461538). Saving model!


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :10 | Validation Score :0.7434615384615385
EarlyStopping counter: 1 out of 6


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :11 | Validation Score :0.7442307692307693
EarlyStopping counter: 2 out of 6


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :12 | Validation Score :0.7480769230769231
EarlyStopping counter: 3 out of 6


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :13 | Validation Score :0.7407692307692307
EarlyStopping counter: 4 out of 6


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :14 | Validation Score :0.7453846153846154
EarlyStopping counter: 5 out of 6


HBox(children=(FloatProgress(value=0.0, max=1300.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))


Epoch :15 | Validation Score :0.7407692307692307
EarlyStopping counter: 6 out of 6
Early stopping
Predicting for OOF


HBox(children=(FloatProgress(value=0.0, max=163.0), HTML(value='')))




In [23]:
# run_fold(3)

In [24]:
# run_fold(4)

# Run Results

In [25]:
df = pd.read_csv(config.OOF_FILE)
df['gold'] = df['target'].map({'T':1, 'F':0})
df.head(3)

Unnamed: 0,lang,word1,sentence1,word2,sentence2,lemma,pos,target,kfold,oof,gold
0,ar-ar,ملاك,ونظرا لأهمية هذه المسائل لسير عمل المحكمة مستق...,ملاك,ولا توجد حراسة أمام جميع البعثات الدبلوماسية ب...,مَلاك,NOUN,T,0.0,0.996796,1
1,ar-ar,ملاك,ونظرا لأهمية هذه المسائل لسير عمل المحكمة مستق...,ملاكهما,وأعربت عن رغبتها في الحصول على معلومات بشأن مو...,مَلاك,NOUN,T,0.0,0.015161,1
2,ar-ar,فوضى,ويؤدي هذا المرض، الذي ينتشر بين أكبر قطاعات ال...,فوضى,والواقع أن آلية نزع السلاح المتعددة الأطراف تع...,فَوضَى,NOUN,T,0.0,0.999804,1


In [26]:
df['pred'] = (df['oof']>=0.5)*1
print(classification_report(df['gold'].values, df['pred'].values))

              precision    recall  f1-score   support

           0       0.61      0.81      0.70      6500
           1       0.72      0.49      0.58      6500

    accuracy                           0.65     13000
   macro avg       0.66      0.65      0.64     13000
weighted avg       0.66      0.65      0.64     13000



In [27]:
from sklearn.metrics import roc_auc_score
roc_auc_score(df['gold'].values, df['oof'].values)

0.6163320710059172

In [28]:
thresholds = np.arange(0, 1, 0.001)
ascores = [accuracy_score(df['gold'].values, (df['oof']>=t)*1) for t in thresholds]
idx = np.argmax(ascores)
print(thresholds[idx], ascores[idx])

0.561 0.6475384615384615


# Test Predictions

In [29]:
# threshold = 0.584
# df = pd.read_csv(config.TEST_FILE)

# test_dataset = Dataset(
#       text = df.Text.values,
#       label = df.Label.values,
#     )
  
# test_data_loader = torch.utils.data.DataLoader(
#       test_dataset,
#       batch_size=config.VALID_BATCH_SIZE,
#       num_workers=4
#   )

# scores = pd.DataFrame()

# model = transformers.BertForSequenceClassification.from_pretrained(config.MODEL, num_labels=2)
# device = config.DEVICE
# model.to(device)

# for i in range(config.KFOLD):
#   model.load_state_dict(torch.load(os.path.join(config.SAVE_DIR, f'model_{i}.bin')))
#   y_preds = test_fn(test_data_loader, model, device)
#   scores[f'prob_{i}'] = y_preds



# scores['avg'] = (scores['prob_0'] + scores['prob_1'] + scores['prob_2'] + scores['prob_3'] + scores['prob_4'])/5
# scores['preds'] = (scores['avg']>=threshold)*1
# scores['Labels'] = scores['preds'].map({1:'INFORMATIVE', 0:'UNINFORMATIVE'})
# scores.to_csv(os.path.join(config.SAVE_DIR, 'scores.csv'), index=False)

# with open(os.path.join(config.SAVE_DIR, 'submission.txt'), 'w') as f:
#   for i in scores['Labels'].values:
#     f.write(i+'\n')