### Установка и импорт всех необходимых зависимостей

In [None]:
!pip install -q razdel
!pip install -q pymorphy2
!pip install -q git+https://github.com/ahmados/rusynonyms.git
!pip install -q natasha

In [None]:
import xml.etree.ElementTree as ET
import pandas as pd

import nltk
from nltk.corpus import stopwords
import re
import pymorphy2
from razdel import tokenize
from razdel import sentenize
import string
from natasha import (
    MorphVocab,
    NewsMorphTagger,
    NewsEmbedding,
    Segmenter,
    NewsSyntaxParser,
    Doc
)

import torch
import tensorflow_hub as hub
from torch import nn
from torch.utils.data import Dataset, DataLoader
import transformers
import numpy as np

from tqdm import tqdm
import os
import sys
from typing import *

from lime.lime_text import LimeTextExplainer
import shap

nltk.download('stopwords')
nltk.download('punkt')
rus_stopwords = stopwords.words('russian')
punctuation = list(string.punctuation)

### Работа с данными (kaggle)

In [None]:
datasets_folder = '/kaggle/input/sw-datasets/Russian-Sentiment-Analysis-Evaluation-Datasets'
datasets = ['SentiRuEval-2015-telecoms', 'SentiRuEval-2015-banks', 'SentiRuEval-2016-banks', 'SentiRuEval-2016-telecoms']
samples = ['test.xml', 'train.xml', 'test_etalon.xml']

In [None]:
def extract_data(path: str) -> pd.DataFrame:
    """
    функция для извлечения данных из xml
    """
    tree = ET.parse(path)
    root = tree.getroot()
    DataFrame = dict()
    database = root.findall('database')[0]
    DataFrame_columns = list()

    for idx, table in enumerate(database.findall('table')):
        for column in table.findall('column'):
            DataFrame[column.attrib['name']] = list()
            DataFrame_columns.append(column.attrib['name'])
        if idx == 0:
            break

    for table in database.findall('table'):
        for column in table.findall('column'):
            DataFrame[column.attrib['name']].append(column.text)

    data = pd.DataFrame(DataFrame, columns=DataFrame_columns)
    return data

# инициализация всех путей (kaggle)
banks_dataset = datasets[2]
path2samples = os.path.join(datasets_folder, banks_dataset)
banks = ['sberbank', 'vtb', 'gazprom', 'alfabank', 'bankmoskvy', 'raiffeisen', 'uralsib', 'rshb']

path2test = os.path.join(path2samples, samples[2])
data_test = extract_data(path2test)

path2train = os.path.join(path2samples, samples[1])
data_train = extract_data(path2train)

In [None]:
def extract_text_features(data: pd.DataFrame) -> pd.DataFrame:
    """
    функция для первичной обработки текста от лишних символов
    """
    extracted_data = dict()
    extracted_data['text'] = list()
    extracted_data['0class'] = list()
    extracted_data['1class'] = list()

    for idx in range(len(data)):
        row = data.iloc[idx, :]
        banks_review = row[banks]
        unique_labels = set(banks_review)
        unique_labels.remove('NULL')

        # убираем все ненужные знаки
        filtered_text = re.sub('http[A-z|:|.|/|0-9]*', '', row['text']).strip()
        filtered_text = re.sub('@\S*', '', filtered_text).strip()
        filtered_text = re.sub('#', '', filtered_text).strip()
        new_text = filtered_text

        # сохраняем только уникальные токены (без придатка xml NULL)
        unique_labels = list(unique_labels)
        while len(unique_labels) < 2:
            unique_labels.append(unique_labels[-1])
        extracted_data['text'].append(new_text)
        for idx, label in enumerate(unique_labels):
            text_label = int(label) + 1
            extracted_data[f'{idx}' + 'class'].append(text_label)

    extracted_data = pd.DataFrame(extracted_data)
    
    # возвращаем dataframe
    return extracted_data

extracted_test = extract_text_features(data_test)
extracted_train = extract_text_features(data_train)

In [None]:
# пример твита из датасета
extracted_test.iloc[3308].text

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# анализ распределения таргетов на твитах
fig, axes = plt.subplots(1, 2, figsize=(8, 5))
plt.subplots_adjust(hspace=0.15, wspace=0.3)

graph1 = sns.countplot(data=extracted_train, x='0class', ax=axes[0])
graph1.set(xlabel='class_num', ylabel='amount of class', title='Amount of classes according 1 label')
graph1.grid(True)

graph2 = sns.countplot(data=extracted_train, x='1class', ax=axes[1])
graph2.set(xlabel='class_num', ylabel='amount of class', title='Amount of classes according 2 label')
graph2.grid(True)

None

### Инициализируем модель (fine-tune) для решения нашей задачи классификации

In [None]:
learning_rate = 1e-05


class BERTmy(torch.nn.Module):
    def __init__(self, n_classes: int) -> None:
        super(BERTmy, self).__init__()
        self.rubert = transformers.AutoModel.from_pretrained(
            "DeepPavlov/rubert-base-cased-sentence"
        )
        self.tokenizer = transformers.AutoTokenizer.from_pretrained(
            "DeepPavlov/rubert-base-cased-sentence", 
            do_lower_case=True,
            add_additional_tokens=True
        )
        
        hidden_size_output = self.rubert.config.hidden_size
        self.classifier = torch.nn.Sequential(
            torch.nn.Linear(hidden_size_output, hidden_size_output, bias=True),
            torch.nn.Dropout(0.05),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_size_output, n_classes),
        )

    def forward(
        self, input_ids: torch.Tensor, attention_mask: torch.Tensor, 
        token_type_ids: torch.Tensor, output_attentions: bool=False
    ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
        rubert_output = self.rubert(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            return_dict=True,
            output_attentions=output_attentions
        )
        if not output_attentions:
            pooled = rubert_output['pooler_output']
        else:
            pooled, attentions = rubert_output['pooler_output'], rubert_output['attentions']

        output = self.classifier(pooled)

        if not output_attentions:
            return output
        else:
            return output, attentions
    
    def configure_optimizer(
        self, use_scheduler: bool=False
    ) -> torch.optim:
        # freeze part of params
        encoder_size = 0
        for param in self.rubert._modules['encoder'].parameters():
            encoder_size += 1
        encoder_size_half = encoder_size // 2
        for idx, param in enumerate(self.rubert._modules['encoder'].parameters()):
            param.requires_grad = False
            if idx >= encoder_size_half:
                break
        
        # Adam
        optimizer = torch.optim.Adam(
            params=[
                {'params':self.rubert._modules['embeddings'].parameters(), 'lr':4e-6},
                {'params':self.rubert._modules['encoder'].parameters(), 'lr':4e-6},
                {'params':self.rubert._modules['pooler'].parameters(), 'lr':4e-6},
                {'params':self.classifier.parameters(), 'lr':9e-5}
            ],
            lr=learning_rate
        )
        if use_scheduler:
            # scheduler
            scheduler = torch.optim.lr_scheduler.ExponentialLR(
                optimizer, gamma=0.96
            )
        
            return optimizer, scheduler
        
        else:
            return optimizer

device = 'cuda' if torch.cuda.is_available() else 'cpu'
num_cls = len(pd.unique(extracted_train['0class']))
bert = BERTmy(num_cls)
if torch.cuda.is_available():
    bert = bert.cuda()
optimizer, scheduler = bert.configure_optimizer(use_scheduler=True)

### Инициализируем class для нашего датасета

In [None]:
train_batch_size = 32
val_batch_size = 16

class SentimentData(Dataset):
    # инициализация датасета
    def __init__(
        self, dataframe: pd.DataFrame, mode: str, 
        col_name: str, split_param: float=0.9
    ) -> None:
        self.mode = mode # train/test
        self.data = dataframe # data
        self.col_name = col_name # column for analyzing
        
        data_size = self.data.shape[0]
        if self.mode in ['val', 'train']:
            if self.mode == 'train':
                self.data = self.data.iloc[:int(data_size * split_param)]
            else:
                self.data = self.data.iloc[int(data_size * split_param):]
        
        assert self.mode in ['val', 'train', 'test']

    # для получения размера датасета
    def __len__(self) -> int:
        return self.data.shape[0]

    # для получения элемента по индексу
    def __getitem__(
        self, index: int
    ) -> Dict[str, Union[str, torch.Tensor]]:
        text = self.data.iloc[index][self.col_name]
        target1 = self.data.iloc[index]['0class']
        target2 = self.data.iloc[index]['1class']

        return {
            'text': text,
            'target1': torch.tensor(target1, dtype=torch.long),
            'target2': torch.tensor(target2, dtype=torch.long)
        }

### Инициализируем наши DataLoaders

In [None]:
train = SentimentData(
    dataframe=extracted_train,
    split_param=1.0,
    mode='train',
    col_name='text'
)

val = SentimentData(
    dataframe=extracted_train,
    mode='val',
    col_name='text'
)

test = SentimentData(
    dataframe=extracted_test,
    mode='test',
    col_name='text'
)

train_loader = DataLoader(train, batch_size=train_batch_size, shuffle=True)
# val_loader = DataLoader(val, batch_size=val_batch_size, shuffle=False)
loaders = {
    'train': train_loader,
    # 'val': val_loader
}

### Дообучение модели

In [None]:
rubert_tokenizer = bert.tokenizer


def train_model(
    epochs: int, model: torch.nn.Module, loaders: List[DataLoader], 
    optimizer: torch.optim, scheduler: torch.optim.lr_scheduler
) -> torch.nn.Module:
    # cross entropy loss
    loss_function1 = torch.nn.CrossEntropyLoss()
    loss_function2 = torch.nn.CrossEntropyLoss()
    
    # извлечение DataLoaders
    if len(loaders) > 1:
        train_loader = loaders['train']
        val_loader = loaders['val']
        steps_per_epoch = [('train', train_loader), ('val', val_loader)]
    else:
        train_loader = loaders['train']
        steps_per_epoch = [('train', train_loader)]

    # обучение по эпохам
    for epoch in range(epochs):
        for mode, loader in steps_per_epoch:
            # сохранение статистик
            train_loss = 0
            n_correct = 0
            processed_data = 0
            
            # train/val 
            if mode == 'train':
                model.train()
                requires_grad_mode = True
            else:
                model.eval()
                requires_grad_mode = False
            
            # проход по батчам
            for data in tqdm(loader):
                # обнуляем градиенты
                optimizer.zero_grad()

                # извлечение входных данных для модели
                inputs = rubert_tokenizer(
                    data['text'], padding=True, truncation=True, 
                    add_special_tokens=True, return_tensors='pt'
                )
                ids = inputs['input_ids'].to(device)
                mask = inputs['attention_mask'].to(device)
                token_type_ids = inputs["token_type_ids"].to(device)
                target1 = data['target1'].to(device)
                target2 = data['target2'].to(device)
                
                # устанавливаем необходимость вычислять/не_вычислять градиенты
                with torch.set_grad_enabled(requires_grad_mode):
                    outputs = model(ids, mask, token_type_ids)
                    preds = torch.argmax(outputs.data, dim=1)

                    # настраиваем модели на конкретный target
                    if all(target1 == target2):
                        loss1 = loss_function1(outputs, target1)
                        train_loss += loss1.item() * outputs.size(0)
                        n_correct += torch.sum(preds == target1)
                        if mode == 'train':
                            # вычисляем градиенты и обновляем веса
                            loss1.backward()
                            optimizer.step()
                    # если у твита более чем 1 метка, то настраиваем на обе
                    else:
                        loss1 = loss_function1(outputs, target1) * 0.5
                        loss2 = loss_function2(outputs, target2) * 0.5
                        loss_all = loss1 + loss2
                        train_loss += loss_all.item() * outputs.size(0)

                        mask_singular = target1 == target2
                        mask_multiple = target1 != target2
                        singular = preds[mask_singular]
                        n_correct += torch.sum(singular == target1[mask_singular])
                        multiple = preds[mask_multiple]
                        n_correct += torch.sum((multiple == target1[mask_multiple]) & (multiple == target2[mask_multiple]))
                        if mode == 'train':
                            # вычисляем градиенты и обновляем веса
                            loss_all.backward()
                            optimizer.step()     
                    processed_data += outputs.size(0)

            # вычисляем ошибку и точность прогноза на эпохе
            loader_loss = train_loss / processed_data
            loader_acc = n_correct.cpu().numpy() / processed_data
            print(f'{epoch + 1} epoch with {mode} mode has: {loader_loss} loss, {loader_acc} acc')
        
        # делаем шаг для sheduler оптимайзера
        scheduler.step()

    return model

In [None]:
epochs = 12
bert = train_model(epochs, bert, loaders, optimizer, scheduler)

In [None]:
mode_process = input('Load weights? (y/n)')
if mode_process == 'n':
    torch.save(bert.state_dict(), 'bert_weights_pooled.pth')
elif mode_process == 'y':
    bert.load_state_dict(torch.load('/kaggle/input/bert-weights-better/bert_weights_pooled.pth'))
else:
    assert mode_process in ['n', 'y']
bert.eval()
None

### Вычисление итоговых показателей

In [None]:
def calculate_accuracy(
    model: torch.nn.Module, SentimentData:Dataset
) -> float:
    model.eval()
    loader = DataLoader(SentimentData, batch_size=10, shuffle=False)
    n_correct = 0
    processed_data = 0
    
    for data in tqdm(loader):
        inputs = model.tokenizer(
            data['text'], padding=True, 
            add_special_tokens=True, return_tensors='pt'
        )
        ids = inputs['input_ids'].to(device)
        mask = inputs['attention_mask'].to(device)
        token_type_ids = inputs["token_type_ids"].to(device)
        target1 = data['target1'].to(device)
        target2 = data['target2'].to(device)
        
        with torch.no_grad():
            outputs = model(ids, mask, token_type_ids)
            preds = torch.argmax(outputs.data, dim=1)
            mask_singular = target1 == target2
            mask_multiple = target1 != target2
            singular = preds[mask_singular]
            n_correct += torch.sum(singular == target1[mask_singular])
            multiple = preds[mask_multiple]
            if len(multiple) > 0:
                n_correct += torch.sum((multiple == target1[mask_multiple]) & (multiple == target2[mask_multiple]))
            processed_data += outputs.size(0)
        
    loader_acc = n_correct.cpu().numpy() / processed_data
    
    return loader_acc

def calculate_f1_class(
    model: torch.nn.Module, SentimentData: Dataset, class_num: int
) -> float:
    model.eval()
    loader = DataLoader(SentimentData, batch_size=10, shuffle=False)
    true_positive = 0
    false_positive, false_negative = 0, 0
    
    for data in tqdm(loader):
        inputs = model.tokenizer(
            data['text'], padding=True, 
            add_special_tokens=True, return_tensors='pt'
        )
        ids = inputs['input_ids'].to(device)
        mask = inputs['attention_mask'].to(device)
        token_type_ids = inputs["token_type_ids"].to(device)
        target1 = data['target1'].to(device)
        
        with torch.no_grad():
            outputs = model(ids, mask, token_type_ids)
            
            preds = torch.argmax(outputs.data, dim=1)
            preds = preds.cpu().numpy()
            target1 = target1.cpu().numpy()
            
            mask_positive = target1 == class_num
            mask_negative = target1 != class_num
            
            true_positive += np.sum(preds[mask_positive] == class_num)
            false_positive += np.sum(preds[mask_negative] == class_num)
            false_negative += np.sum(preds[mask_positive] != class_num)
        
    precision = true_positive / (true_positive + false_positive)
    recall = true_positive / (true_positive + false_negative)
    loader_f1 = 2 * precision * recall / (precision + recall)
    
    return loader_f1

In [None]:
test_acc = calculate_accuracy(bert, test)
class_neg_f1 = calculate_f1_class(bert, test, 0)
class_neu_f1 = calculate_f1_class(bert, test, 1)
class_pos_f1 = calculate_f1_class(bert, test, 2)

In [None]:
# общая accuracy и f1 по классам
test_acc, class_neg_f1, class_neu_f1, class_pos_f1

### Backdoor attacks on neural network(adversial examples)

#### USE metric for similarity between original sentence and spoiled sentence

In [None]:
def use_score(original, adversial, use_bert_encoder=False, model=None):
    from scipy.spatial.distance import cosine
    # Load pre-trained universal sentence encoder model
    if not use_bert_encoder:
        # using DAN from tensorflow
        use_encoder = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")

        sentences_orig = list()
        sentences_adv = list()
        for pair in zip(original, adversial):
            orig, adv = pair
            sentences_orig.append(orig)
            sentences_adv.append(adv)

        # get embs of texts
        sentences_orig_emb = use_encoder(sentences_orig)
        sentences_adv_emb = use_encoder(sentences_adv)

        # calculate use_score with DAN
        use_scores = list()
        for pair in zip(sentences_orig_emb, sentences_adv_emb):
            orig_emb, adv_emb = pair[0], pair[1]
            use_score_one = 1 - cosine(orig_emb, adv_emb)
            use_scores.append(use_score_one)
    else:
        # using BERT itself
        def get_inputs(text): # get inputs for model
            inputs = model.tokenizer(
                text, padding=True, 
                add_special_tokens=True, 
                return_tensors='pt'
            )
            ids = inputs['input_ids'].type(torch.long).to(device)
            mask = inputs['attention_mask'].type(torch.long).to(device)
            token_type_ids = inputs["token_type_ids"].type(torch.long).to(device)
            
            return ids, mask, token_type_ids

        # calculate use_score with BERT
        use_scores = list()
        for pair in zip(original, adversial):
            orig, adv = pair[0], pair[1]
            orig_inputs = get_inputs(orig)
            adv_inputs = get_inputs(adv)
            orig_outputs = model.rubert(*orig_inputs)
            adv_outputs = model.rubert(*adv_inputs)
            orig_pooled, adv_pooled = orig_outputs[1], adv_outputs[1]
            orig_pooled = orig_pooled.cpu().detach().numpy()
            adv_pooled = adv_pooled.cpu().detach().numpy()
            use_score_one = 1 - cosine(orig_pooled, adv_pooled)
            use_scores.append(use_score_one)
    
    return use_scores, np.mean(use_scores)

### Prepare data adversarial generating

In [None]:
# выбираем текст для генерации состязательных примеров с сохранением исходной пропорции
limit_neu = 1300
limit_pos = 270
limit_neg = 550
adversial_examples_pos = extracted_test[extracted_test['0class'] == 2]
adversial_examples_neu = extracted_test[extracted_test['0class'] == 1]
adversial_examples_neg = extracted_test[extracted_test['0class'] == 0]

adversial_examples_pos = adversial_examples_pos.head(limit_pos)
adversial_examples_neu = adversial_examples_neu.head(limit_neu)
adversial_examples_neg = adversial_examples_neg.head(limit_neg)

adversial_examples = pd.concat([adversial_examples_pos, adversial_examples_neu, adversial_examples_neg])
adversial_examples_char = adversial_examples.sample(frac=1)

print('Размер текста для генерации: ', len(adversial_examples_char))
print('Баланс классов: ')
print(np.unique(adversial_examples_char['0class'], return_counts=True))

### Work with word importance

In [None]:
def gather_back_tokens(tokens: List[str], tokens_type: str) -> str:
    """
    для превращения токенов в предложение
    tokens: список токенов
    tokens_type: natasha или razdel
    """
    assert tokens_type in ['razdel', 'natasha']

    sent = ''
    prev_end = None
    for token in tokens:

        if tokens_type == 'natasha':
            token_text = token['text']
            token_start, token_stop = token['start'], token['stop']
        else:
            token_text = token.text
            token_start, token_stop = token.start, token.stop
        
        if not prev_end is None:
            sent += (token_start - prev_end) * ' '

        sent += token_text
        prev_end = token_stop
 
    return sent


# get inputs for model
def get_inputs(text):
    inputs = bert.tokenizer(
        text, padding=True, truncation=True, 
        add_special_tokens=True, return_tensors='pt'
    )

    ids = inputs['input_ids'].type(torch.long).to(device)
    mask = inputs['attention_mask'].type(torch.long).to(device)
    token_type_ids = inputs["token_type_ids"].type(torch.long).to(device)

    # return input for model
    return ids, mask, token_type_ids


def predict_text(text):
    """
    for Lime: return probability distribution of text
    """
    # get model outputs
    ids, mask, token_type_ids = get_inputs(text)
    with torch.no_grad():
        outputs = bert(ids, mask, token_type_ids)
    
    # get probs
    probs = torch.nn.functional.softmax(outputs, dim=1).cpu().detach().numpy()

    return probs


def RazdelSplit(text):
        
    return [raz_tok.text for raz_tok in list(tokenize(text))]

def NatashaSplit(text):
    
    segmenter = Segmenter()
    text_doc = Doc(text.lower())
    text_doc.segment(segmenter)
    
    return [nat_tok['text'] for nat_tok in text_doc]


# get words score to final output
def extract_essential_words(
    tokens: List[str], target: int, tok_imoprtance: str, 
    tokens_type: str, num_samples: int=850, num_features: int=150
) -> List[Tuple[str, int]]:
    """
    возвращает список слов по убыванию важности
    причем если на вход поданы токены natasha
    то вернет токены natasha
    а если на вход - токены razdel
    то вернет токены razdel
    """

    assert tok_imoprtance in ['loss', 'lime', 'shap']
    assert tokens_type in ['razdel', 'natasha']

    # список для наиболее важных слов
    essential_words = list()
    
    # восстанавливаем текст из слов
    text_to_explain = gather_back_tokens(tokens,tokens_type)

    if tok_imoprtance == 'lime':
        
        if tokens_type == 'razdel':
            Spliter = RazdelSplit
        elif tokens_type == 'natasha':
            Spliter = NatashaSplit
        # создаем Explainer
        explainer = LimeTextExplainer(
            class_names=['Neg', 'Neu', 'Pos'],
            split_expression=Spliter
        )

        # "объясняем" текст
        explanation = explainer.explain_instance(
            text_to_explain, predict_text, 
            num_features=num_features, num_samples=num_samples
        )

        # создаем mapping из токена в его вес LogReg
        explanation_list = explanation.as_list()
        tok2weight = {token:weight for token, weight in explanation_list}
        
        # создаем список из токенов, их важности и позиции в тексте
        for token in tokens:
            if tokens_type == 'razdel':
                token_text = token.text.lower()
            else:
                token_text = token['text'].lower()
            
            essential_words.append((
                token, tok2weight[token_text]
            ))
        
        # создаем функцию сравнения важности
        sort_func = lambda x: np.abs(x[1])
    
    elif tok_imoprtance == 'shap':

        def f(x):
            print(x)
            import time
            time.sleep(1)
            tv = torch.tensor(
                [
                    tokenizer.encode(v, padding="max_length", max_length=128, truncation=True)
                    for v in x
                ]
            ).cuda()
            attention_mask = (tv != 0).type(torch.int64).cuda()
            outputs = model(tv, attention_mask=attention_mask)[0].detach().cpu().numpy()
            scores = (np.exp(outputs).T / np.exp(outputs).sum(-1)).T
            val = sp.special.logit(scores)
            return val

        def custom_tokenizer(s, return_offsets_mapping=True):
            """Custom tokenizers conform to a subset of the transformers API."""
            pos = 0
            offset_ranges = []
            input_ids = []
            for m in re.finditer(r"\W", s):
                start, end = m.span(0)
                offset_ranges.append((pos, start))
                input_ids.append(s[pos:start])
                pos = end
            if pos != len(s):
                offset_ranges.append((pos, len(s)))
                input_ids.append(s[pos:])
            out = {}
            out["input_ids"] = input_ids
            if return_offsets_mapping:
                out["offset_mapping"] = offset_ranges
            return out

        masker = shap.maskers.Text(custom_tokenizer)
        explainer = shap.Explainer(f, masker, output_names=labels)
    
    elif mode == 'alti':
        
        pass
        
    elif mode == 'loss':
        
        loss = torch.nn.CrossEntropyLoss()
        
        # get inputs and outputs from model
        ids, mask, token_type_ids = get_inputs(text_to_explain)
        outputs = bert(ids, mask, token_type_ids)

        # calculate loss for original text
        loss_score_integral = loss(outputs.cpu(), torch.tensor([target], dtype=torch.long))

        for idx, token in enumerate(tokens):
            # get text without one token
            tokens_copy = tokens.copy()
            tokens_copy.pop(idx)
            text_to_explain = gather_back_tokens(tokens_copy, tokens_type=tokens_type)

            # calculate loss without current word
            ids, mask, token_type_ids = get_inputs(text_to_explain)
            with torch.no_grad():
                outputs = bert(ids, mask, token_type_ids)
            loss_score_part = loss(outputs.cpu(), torch.tensor([target], dtype=torch.long))
            # add our score of change
            essential_words.append((
                token, (loss_score_part - loss_score_integral).cpu().detach().numpy()
            ))
            # создаем функцию сравнения важности
            sort_func = lambda x: x[1]
    
    # сортируем токены по важности
    essential_words = sorted(essential_words, key=sort_func, reverse=True)

    # возвращаем только слова и их позиции в тексте
    essential_words = [(word, pos) for word, score, pos in essential_words]

    return essential_words


def extract_random_words(
    tokens: List[str]
) -> List[Tuple[str, int]]:
    """
    возвращает список слов в случайном порядке
    """
    permutation = np.random.permutation(len(tokens))

    return [tokens[idx] for idx in permutation]

### word-level attacks

In [None]:
# подгружаем обученные word2vec rusvectors 
from gensim.downloader import load
rus_vectors = load('word2vec-ruscorpora-300')

In [None]:
# функция для генерации порчи уровня слов
def extract_spoiled_text_word_level(
        dataframe: pd.DataFrame, 
        wordlen: int=2, dist2synonym: int=0, 
        word2subs: int=1, word_importance: str='loss'
    ) -> List[Tuple[int, int, str]]:
    """
    wordlen: длина слова для порчи
    word_importance: 'random', 'loss', 'lime' (тип выбора слов для порчи)
    word2subs: сколько слов заменить в тексте
    dist2synonym: расстояние до синонима в списке наиболее похожих слов
    """
    
    assert word_importance in ['random', 'loss', 'lime']
    assert wordlen >=2
    assert dist2synonym >= 0
    assert word2subs >= 1
    
    from ru_synonyms import SynonymsGraph
    sg = SynonymsGraph()
    
    # words to change per one sentance
    words2change = list()
    # морфологический анализатор
    morph = pymorphy2.MorphAnalyzer()
    # natasha's embs
    emb = NewsEmbedding()
    # natasha's morph tagger
    morph_tagger = NewsMorphTagger(emb)
    # natasha's segmenter
    segmenter = Segmenter()
    
    pbar = tqdm(dataframe['text'])
    for idx, sent, target1 in zip(
            range(len(dataframe['text'])), 
            dataframe['text'], 
            dataframe['0class']
        ):
        # инициализация natasha's Doc
        sent_doc = Doc(sent.lower())
        sent_doc.segment(segmenter)
        sent_doc.tag_morph(morph_tagger)
        
        # get tokens of our text from natasha
        natasha_tokens = list()
        for token in sent_doc.tokens:
            try:
                start = token.start
            except:
                start = 0
            natasha_tokens.append({
                'start': start,
                'stop': token.stop,
                'text': token.text,
                'tag': token.pos
            })
        # список всех замен для текста
        sub_word = list()

        # just one word
        if len(natasha_tokens) == 1:
            while len(sub_word) < word2subs:
                sub_word.append(None)
            words2change.append(sub_word)
            continue
        
        # extract essential words
        if word_importance in ['loss', 'lime']:
            words2spoil_order = extract_essential_words(
                tokens, target1, spoil_init, tokens_type='natasha'
            )
        elif word_importance in ['random']:
            words2spoil_order = extract_random_words(natasha_tokens)
            
        sub_words_amount = 0
        for natasha_word in words2spoil_order:
            # получаем характеристики слова
            token = natasha_word['text']
            token_tag = natasha_word['tag']
            tok_start, tok_stop = natasha_word['start'], natasha_word['stop']
            if len(token) > wordlen and sub_words_amount < word2subs:
                # get normalized form of word
                normal_form_token = morph.parse(token)[0].normal_form
                # generate key for rusvectors
                rus_vectors_word = normal_form_token + f'_{token_tag}'
                try:
                    # try to find synonym
                    synonyms = rus_vectors.most_similar(rus_vectors_word, topn=500)
                except:
                    # if there is no synonym
                    synonyms = None
                    
                # if we find synonyms
                if not synonyms is None:
                    # search synonyms with the same tag
                    synonyms_tagged = [synonym for synonym in synonyms if synonym[0].split('_')[1] == token_tag]
                    word_synonym = synonyms_tagged[dist2synonym]
                    # save synonym and idxes of word
                    sub_word.append((tok_start, tok_stop, word_synonym))
                    sub_words_amount += 1
                
                # search for synonym with RuWordNet
                else:
                    try:
                        # have found synonym
                        if sg.is_in_dictionary(normal_form_token):
                            gen = list(sg.get_list(normal_form_token))
                    except:
                        # there is no synonym
                        gen = None

                    if not gen is None and dist2synonym < len(gen):
                        word_synonym = gen[dist2synonym]
                        # save synonym and idxes of word
                        subs_word.append((tok_start, tok_stop, word_synonym))
                        sub_words_amount += 1

        # заполняем пропусками, если не смогли заменить достаточно слов
        while len(sub_word) < word2subs:
            sub_word.append(None)
        words2change.append(sub_word)
        
        pbar.update(1)
        #pbar.set_description(f'Total processed: {idx + 1}')
    
    return words2change

In [None]:
# задаем сколько слов и на каком расстоянии от исходного заменять
word_changes = [1]
positions = [0, 1, 2, 3, 4]
text_word_changes = dict()
word_importance = 'random'
adversial_examples_word = extracted_test

In [None]:
def clean_up_synonyms(words2change):
    """
    удаляем лишние символы у полученных синонимов
    """
    cleaned_synonyms = list()
    for change_in_sent in words2change:
        new_change_in_sent = list()
        for change in change_in_sent:
            if not change is None:
                start, stop, synonym = change
                synonym = synonym.split('_')[0]
                synonym_parts = synonym.split('::')
                if len(synonym_parts) > 1:
                    synonym = synonym_parts[1]
                else:
                    synonym = synonym_parts[0]
                new_change_in_sent.append((start, stop, synonym))
            else:
                new_change_in_sent.append(None)
        cleaned_synonyms.append(new_change_in_sent)

    return cleaned_synonyms

for word_change in word_changes:
    for position in positions:
        # извлекаем синонимы для слов
        words2change = extract_spoiled_text_word_level(
            bert, adversial_examples_word,
            dist2synonym=position,
            word2subs=word_change,
            word_importance=word_importance
        )
        # очищаем синонимы для слов
        text_word_changes[(word_importance, word_change, position)] = clean_up_synonyms(words2change)

In [None]:
# импортируем предобученный BERT для задачи MLM
from transformers import AutoTokenizer, AutoModelForMaskedLM

MLM_tokenizer = AutoTokenizer.from_pretrained("ai-forever/ruBert-large")
MLM = AutoModelForMaskedLM.from_pretrained("ai-forever/ruBert-large").to(device)

In [None]:
def extract_mask_from_synonyms(
    mlm_model: transformers.AutoModel, mlm_tok: transformers.AutoTokenizer, 
    dataframe: pd.DataFrame, text_word_changes: List[Tuple[int, int, str]], 
    iter2find: int=5
) -> Dict[Tuple[str, int, int], List[Tuple[int, int, str]]]:    
    """
    функция для добавления окончания лемматизированного синонима
    """
    def pass_the_model(part_lemmatized_sent: str, return_prob: bool=False):
        """
        получения выхода MLM модели от токенизированного текста с добавлением [MASK]
        """
        inputs = mlm_tok(
            part_lemmatized_sent, 
            truncation=True, 
            return_tensors='pt'
        )
        ids = inputs['input_ids'].to(device)
        mask = inputs['attention_mask'].to(device)
        token_type_ids = inputs['token_type_ids'].to(device)
        mask_token_index = (inputs.input_ids == mlm_tok.mask_token_id)[0].nonzero(as_tuple=True)[0]

        with torch.no_grad():
            logits = mlm_model(
                input_ids=ids,
                attention_mask=mask,
                token_type_ids=token_type_ids
            ).logits
        
        logits = logits.cpu().detach()
        logits_mask = torch.squeeze(logits[0, mask_token_index], dim=0)
        # получение вероятностей
        probs = torch.nn.functional.softmax(logits_mask, dim=0)
        predicted_token_id = probs.argmax(dim=-1)
        # очищаем ненужную часть от BPETokenizer
        new_token = mlm_tok.decode(predicted_token_id).replace('#', '')
        
        if return_prob:
            # если нужно вернуть вероятность токена
            token_prob = probs[probs.argmax(dim=-1)].numpy()[0]
            return new_token, token_prob
        else:
            return new_token
    
    # исходный текст
    original_text = dataframe['text']
    # все сделанные модификации
    changes_description = text_word_changes.keys()
    # все сделанные модификации
    # но уже с окончаниями для встраивания в контекст
    text_word_changes_ended = dict()
    
    # проходимся по всем созданным заменам
    for change_description in changes_description:
        words2change_ended = list()
        # проходимся по всем парам (замены, текст)
        for sub_words, text in tqdm(zip(
            text_word_changes[change_description], 
            original_text
        )):
            # проходимся по каждому подобранному синониму
            sub_words_ended = list()
            for sub_word in sub_words:
                if sub_word is None:
                    # нечего менять
                    continue
                start, stop, synonym = sub_word
                # токенизируем текст
                synonym_tokens = mlm_tok.tokenize(synonym)
                synonym_tokens = [token.replace('#', '') for token in synonym_tokens]
                # если BPETokenizer разбил токен на замену на более чем 1 часть
                if len(synonym_tokens) > 1:
                    # заменяем последнюю лексему слова на [MASK] и предсказываем
                    synonym_tokens[-1] = '[MASK]'
                    synonym_masked = ''.join(synonym_tokens)

                    text_copy = list(text.copy())
                    text_copy[start:stop] = synonym_masked

                    synonym_end = pass_the_model(''.join(text_copy))

                    synonym_tokens[-1] = synonym_end.replace('#', '')
                    sub_words_ended.append((start, stop, ''.join(synonym_tokens)))
                # если BPETokenizer разбил токена на замену на 1 часть
                else:
                    tokens_prob = list()
                    # каждую итерацию удаляем один символ с конца
                    # и заменяем удаленную часть на [MASK]
                    for i in range(iter2find):
                        synonym_tokens = list(synonym)
                        # if there is no symbols to delete more
                        if len(synonym_tokens) < i + 2:
                            break
                        for j in range(i):
                            synonym_tokens.pop(-1)
                        synonym_tokens.append('[MASK]')
                        synonym_masked = ''.join(synonym_tokens)

                        text_copy = list(text.copy())
                        text_copy[start:stop] = synonym_masked

                        new_token, token_prob = pass_the_model(''.join(text_copy), True)

                        tokens_prob.append((new_token, token_prob, i))
                    # sorted by the most possible token
                    tokens_prob = sorted(tokens_prob, key=lambda x: x[1], reverse=True)

                    # выбираем окончание с наибольшей вероятнсотью и заменяем i последних симболов на него
                    if len(tokens_prob) > 0: 
                        symbols2delete = tokens_prob[0][2]
                        initial_synonym = list(synonym)
                        for i in range(symbols2delete):
                            initial_synonym.pop(-1)
                        initial_synonym.extend(tokens_prob[0][0].replace('#', ''))
                        sub_words_ended.append((start, stop, ''.join(initial_synonym)))
                    else:
                        sub_words_ended.append((start, stop, synonym))
            
            words2change_ended.append(sub_words_ended)
                
        text_word_changes_ended[change_description] = words2change_ended
        
    return text_word_changes_ended

In [None]:
text_word_changes_ended = extract_mask_from_synonyms(MLM, MLM_tokenizer, adversial_examples_word, text_word_changes)

In [None]:
def get_scores_word_spoiled_text(
    model: torch.nn.Module, dataframe: pd.DataFrame, 
    text_word_changes_ended: Dict[Tuple[str, int, int], List[Tuple[int, int, str]]] 
) -> Tuple[Dict[str, float], Dict[str, float], Dict[str, float], pd.DataFrame]:
    
    # функция для замены
    def sub_source_text(
        original: pd.Series, subs_ended: List[Tuple[int, int, str]]
    ) -> List[str]:
        # сохраняем состязательные примеры
        adversial = list()
        # проходим по парам (текст, его замены)
        for orig_sent, sub_words in tqdm(zip(original, subs_ended)):
            orig_sent = list(orig_sent)
            # сдвиг при замене на синонимы
            shift = 0
            # сортируем по порядку встраивания в текст
            sub_words_sorted = sorted(sub_words, key=lambda x:x[0])
            for sub_word in sub_words:
                start, stop, synonym_ended = sub_word
                # встраиваем синоним с окончанием
                orig_sent[start + shift:stop + shift] = synonym_ended
                # изменяем сдвиг
                shift += (len(synonym_ended) - (stop-start))
            # сохраянем состязательный пример
            adversial.append(''.join(orig_sent))
        return adversial
    
    changes_description = text_word_changes_ended.keys()
    original_text = dataframe['text']
    dan_scores = dict()
    bert_scores = dict()
    acc_scores = dict()

    for change_description in changes_description:
        words2changes_ended = text_word_changes_ended[change_description]
        adversial_text = sub_source_text(original_text, words2changes_ended)
        
        col_unique_part = ''.join(change_description)
        col_name = f'{col_unique_part}_WordSpoiledText'        
        dataframe[col_name] = adversial_text
        # считаем сходство по bert
        _, use_result_word_bert = use_score(
            dataframe['text'],
            dataframe[col_name],
            use_bert_encoder=True,
            model=model
        )
        # считаем сходство по dan
        _, use_result_word = use_score(
            dataframe['text'],
            dataframe[col_name]
        )

        sentidata = SentimentData(
            dataframe=dataframe,
            mode='test',
            col_name=col_name
        )
        # производим замер качества
        spoiled_accuracy_word = calculate_accuracy(model, sentidata)
        # сохраняем рехультаты
        dan_scores[col_name] = use_result_word
        bert_scores[col_name] = use_result_word_bert
        acc_scores[col_name] = spoiled_accuracy_word
                
    return dan_scores, bert_scores, acc_scores, dataframe

In [None]:
dan_scores_word, bert_scores_word, acc_scores_word, adversial_examples_word = get_scores_word_spoiled_text(
    bert, adversial_examples_word,
    text_word_changes_ended
)