In [1]:
class CFG:
  dataset='./dataset/polarization_ds.csv'
  train_df='./dataset/media_split/train.csv'
  valid_df='./dataset/media_split/valid.csv'
  test_df='./dataset/media_split/test.csv'
  target_cols= 'bias_text' # ['left', 'center', 'right'] 
  classes=3
  model='bert-base-cased'
  embedd_dim=768
  criterion = 'crossentropy' # ['crossentropy', 'mse', 'l1', 'focal']
  main_metric = 'f1_macro'
  model_file = './models/best_bert_base_ce.pt'
  model_file_emotion = './models/best_bert_base_ce_ed.pt'
  triplet_pretrain = False
  emotion_pretrain = True
  triplet_model = './models/bert_base_triplet.pt'
  emotion_model = '../emotion_detection/models/ed_best_bert_base.pt'
  # just use it
  apex=True
  gradient_checkpointing=True
  num_cycles=0.5
  num_warmup_steps=0
  epochs=5
  encoder_lr=2e-5
  decoder_lr=2e-5
  min_lr=1e-6
  eps=1e-6
  betas=(0.9, 0.999)
  batch_size=128
  max_len=512
  weight_decay=0.01
  # gradient_accumulation_steps=1
  max_grad_norm=1000
  seed=0
  scheduler='cosine' # ['linear', 'cosine']
  batch_scheduler=True

In [2]:
import warnings
warnings.filterwarnings("ignore")

# Load libraries and data

In [3]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.nn import Parameter
import torch.nn.functional as F
from torch.optim import Adam, SGD, AdamW
from torch.utils.data import DataLoader, Dataset

from transformers import AutoTokenizer, AutoModel, AutoConfig
from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup

from tqdm import tqdm
import gc

In [4]:
def set_seed(seed):
  np.random.seed(seed)
  torch.manual_seed(seed)
  if torch.cuda.is_available(): # для GPU отдельный seed
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
set_seed(CFG.seed)
# есть стохастические операции на GPU
# сделаем их детерминированными для воспроизводимости
torch.backends.cudnn.determinstic = True
torch.backends.cudnn.benchmark = False

In [5]:
CFG.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
CFG.tokenizer = AutoTokenizer.from_pretrained(CFG.model)

# Dataset

In [6]:
class TrainDataset(Dataset):
    def __init__(self, cfg, df):
        self.cfg = cfg
        self.texts = (df['title'] + ' ' + df['content']).values
        self.labels = df[cfg.target_cols].values

    def __len__(self):
        return len(self.texts)

    def get_text(self, idx):
        # tokenization
        inputs = self.cfg.tokenizer.encode_plus(
            self.texts[idx], 
            return_tensors=None, 
            add_special_tokens=True, 
            max_length=CFG.max_len,
            pad_to_max_length=True,
            truncation=True
        )
        for k, v in inputs.items():
            inputs[k] = torch.tensor(v, dtype=torch.long)
        return inputs
    
    def get_labels(self, idx):
        if CFG.criterion != 'crossentropy' and CFG.criterion != 'focal':
           return torch.tensor(self.labels[idx], dtype=torch.float)
        return torch.tensor(self.labels[idx]).type(torch.LongTensor)

    def __getitem__(self, idx):
        inputs = self.get_text(idx)
        label = self.get_labels(idx)
        return inputs, label

def collate(inputs):
		# reduce sequence length
    mask_len = int(inputs["attention_mask"].sum(axis=1).max())
    for k, v in inputs.items():
        inputs[k] = inputs[k][:,:mask_len]
    return inputs

# Model

In [7]:
class TransformerModel(nn.Module):

    def __init__(self):
        super(TransformerModel, self).__init__()

        self.model = AutoModel.from_pretrained(CFG.model)
        if CFG.gradient_checkpointing:
            self.model.gradient_checkpointing_enable()
        self.clf =  nn.Linear(CFG.embedd_dim, CFG.classes)
        if not CFG.criterion == 'crossentropy':
            self.sm = nn.Softmax(dim=-1)
        torch.nn.init.xavier_uniform_(self.clf.weight)

    def forward(self, input_id, mask):
        # sequence has [CLF] token in the beginning
        # bert() returns first vector as pooling of sentence
        _, x = self.model(input_ids= input_id, attention_mask=mask, return_dict=False)
        out = self.clf(x)
        if not CFG.criterion == 'crossentropy':
            return self.sm(out)
        return out

# Build landscape

In [8]:
class FocalLoss(nn.Module):
    def __init__(self, gamma=0, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, input, target):
        target = target.view(-1,1)

        logpt = F.log_softmax(input)
        logpt = logpt.gather(1,target)
        logpt = logpt.view(-1)
        pt = torch.exp(logpt)

        loss = -1 * (1-pt)**self.gamma * logpt
        if self.reduction == 'mean': 
          return loss.mean()
        elif self.reduction == 'sum':
          return loss.sum()
        else:
          raise NotImplementedError(f'Not implemented reduction: {self.reduction}')

In [9]:
if CFG.criterion == 'crossentropy':
  criterion = nn.CrossEntropyLoss()
elif CFG.criterion == 'mse':
  criterion = nn.MSELoss()
elif CFG.criterion == 'l1':
  criterion = nn.SmoothL1Loss()
elif CFG.criterion == 'focal':
  criterion = FocalLoss(5)
else:
  raise NotImplementedError('Change loss')

In [10]:
def eval(model, test_loader, criterion):
    model.eval()
    test_loss = []
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = collate(inputs)
            # move inputs to device
            mask = inputs['attention_mask'].to(CFG.device)
            input_id = inputs['input_ids'].squeeze(1).to(CFG.device)
            labels = labels.to(CFG.device)

            with torch.cuda.amp.autocast(enabled=CFG.apex, dtype=torch.float16):
                y_preds = model(input_id, mask)
                loss = criterion(y_preds, labels)
            test_loss += [loss.detach().cpu().item()]
    test_loss = np.mean(test_loss)
    return test_loss

In [11]:
defeault_model_sd = TransformerModel().state_dict()
# emotion_model = TransformerModel()
emotion_model_sd = torch.load(CFG.model_file_emotion)
# trained_model = TransformerModel()
trained_model_sd = torch.load(CFG.model_file)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [12]:
steps = torch.arange(-1, 2, 0.1)

In [13]:
df = pd.read_csv(CFG.dataset, index_col='id')
target = (df['bias_text'] == 'center') + (df['bias_text'] == 'right') * 2
df['bias_text'] = target
df['left'] = (df['bias_text'] == 0).astype(float)
df['center'] = (df['bias_text'] == 1).astype(float)
df['right'] = (df['bias_text'] == 2).astype(float)

In [14]:

test_df = pd.read_csv(CFG.test_df)
test_df = df.loc[test_df['ID']]
test_ds = TrainDataset(CFG, test_df)
test_loader = torch.utils.data.DataLoader(test_ds, batch_size=CFG.batch_size, shuffle=False)

In [15]:
res = pd.DataFrame()
x = []
y = []
z = []
model = TransformerModel()
model.to(CFG.device)
for a in tqdm(steps):
    for b in steps:
        # theta = (1 - a - b) theta_0 + a theta_1 + b theta_2
        sd = model.state_dict()
        for param in sd:
            sd[param] = defeault_model_sd[param].to(CFG.device) * (1 - a - b)
            sd[param] += emotion_model_sd[param].to(CFG.device) * a
            sd[param] += trained_model_sd[param].to(CFG.device) * b
        model.load_state_dict(sd)
        x += [a.item()]
        y += [b.item()]
        loss = eval(model, test_loader, criterion)
        z += [loss]
        res.loc[a.item(), b.item()] = loss


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 30/30 [3:19:20<00:00, 398.69s/it]  


In [16]:
st = [s.item() for s in steps]

In [20]:
from plotly.offline import plot

data = [
    {
        'x': np.array(st),
        'y': np.array(st),
        'z': np.log1p(res.loc[st, st].values),
        'type': 'surface'
    }
]

plot({'data': data}, filename='./emotion_ce_lls_log.html')

'./emotion_ce_lls_log.html'

In [18]:
res.to_csv('emotion_ce_lls.csv')

In [19]:
res

Unnamed: 0,-1.0,-0.9,-0.8,-0.7,-0.6,-0.5,-0.4,-0.3,-0.2,-0.1,...,1.0,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9
-1.0,3.378589,3.304859,3.281837,3.202447,3.137291,3.116984,3.073838,3.084408,2.975886,2.970322,...,1.252789,1.30862,1.791297,2.684402,3.609387,4.858372,6.024449,7.287312,8.826478,9.708188
-0.9,3.224568,3.159688,3.094933,3.067536,2.996241,2.975577,2.928751,2.883264,2.907658,2.760276,...,1.249298,1.59546,2.370243,3.459853,4.412065,5.799234,7.051221,8.257887,9.916562,10.861424
-0.8,3.069773,3.049135,2.98929,2.886831,2.848768,2.80633,2.770845,2.803904,2.713529,2.679239,...,1.474948,2.061548,3.073409,3.992667,5.036086,6.31994,7.969764,9.121057,10.854528,12.337419
-0.7,2.917802,2.910188,2.843884,2.753237,2.727752,2.690129,2.64927,2.657621,2.537399,2.449785,...,1.840122,2.756858,3.877424,5.130231,6.387883,7.722816,9.144928,10.663971,12.299328,13.988453
-0.6,2.815444,2.72717,2.664081,2.641096,2.530844,2.520275,2.482556,2.455829,2.392537,2.31687,...,2.354101,3.421606,4.671469,5.817651,7.170709,8.694456,10.215847,11.731513,13.506437,14.867561
-0.5,2.62612,2.57213,2.571936,2.476876,2.428519,2.396711,2.312859,2.351475,2.242361,2.157967,...,2.944812,4.137918,5.483498,6.825687,8.038215,9.713788,11.28217,12.886781,15.053834,16.092656
-0.4,2.48719,2.46409,2.380802,2.355681,2.266862,2.201655,2.167387,2.152378,2.107604,2.026431,...,3.589862,4.614495,6.200075,7.565239,9.332503,10.667767,12.547845,14.116135,16.083812,17.578896
-0.3,2.380003,2.34209,2.287004,2.188158,2.165188,2.07026,2.070886,2.032622,1.963876,1.912736,...,4.10451,5.397053,7.051427,8.452704,9.816259,11.639931,13.723214,15.127698,17.281643,19.161475
-0.2,2.225729,2.235211,2.171853,2.055082,2.007016,1.961702,1.920705,1.912722,1.829708,1.754931,...,5.027236,6.529642,8.039694,9.60406,11.187895,12.893019,14.77017,16.728001,18.690114,19.942265
-0.1,2.192497,2.073232,1.983705,1.980437,1.86592,1.827445,1.790677,1.755012,1.703092,1.622992,...,5.787693,7.364455,8.930594,10.574565,12.253592,14.037985,15.991027,17.773132,20.295927,21.211923
