In [1]:
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=0

env: CUDA_DEVICE_ORDER=PCI_BUS_ID
env: CUDA_VISIBLE_DEVICES=0


In [None]:
%pip install captum
%pip install torchtext==0.8.1
%pip install transformers
%pip install SentencePiece
%pip install underthesea
%pip install nltk
%pip install protobuf

In [1]:
import pandas as pd
#from underthesea import word_tokenize, sent_tokenize, text_normalize
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from captum.attr import  LayerIntegratedGradients, visualization as viz, LayerConductance
from collections import defaultdict
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
#import seaborn as sns


class Colors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKCYAN = '\033[96m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'

In [2]:
df = pd.read_csv('test_subset.csv')
df

Unnamed: 0,user,instance_id,text,sentiment,negative object_spans,negative spans,positive spans,positive object_spans
0,ekaterini@gmail.com,898,Nur Politiker wollen Krieg ! Was wenn keiner m...,negative,"['Politiker ', 'Krieg ']","['Was wenn keiner mehr Bock hat', 'zu sterben'...",...,...
1,ekaterini@gmail.com,833,Der Klimawandel ist ein völlig normaler Prozes...,positive,...,...,völlig normaler Prozess,Klimawandel
2,ekaterini@gmail.com,1428,Man will die Stadthalle für einen #Bürgerdialo...,negative,Demokraten,"['missliebige Partei ', 'Was denken die']",...,...
3,ekaterini@gmail.com,431,"Besonders beliebt ist auch ""Wir mussten währen...",negative,Corona,die während Corona auch gern auf die Maske ver...,...,...
4,ekaterini@gmail.com,1315,Die USA sollten sich mäßigen Israel zu ermahne...,positive,...,...,handelt moralisch richtig,"['Israel ', 'Israel ']"
...,...,...,...,...,...,...,...,...
295,ekaterini@gmail.com,1134,Auch Israel begeht Kriegsverbrechen an der Ziv...,negative,Israel,"['begeht Kriegsverbrechen', 'aber Kritik an Is...",...,...
296,ekaterini@gmail.com,127,Sogenannte Zivilisten? geht's noch?,negative,...,geht's noch?,...,...
297,ekaterini@gmail.com,366,"Ganz normal, dass wir in Zeiten der Klimakrise...",negative,Klimakrise,"['hypokapitalistischen', 'Nice one, Humanity',...",...,...
298,ekaterini@gmail.com,473,Kleine Nachhilfe: Ein paar hunderttausend Grün...,negative,Grüne,eine Klatsche bekommen habt,...,...


In [5]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cpu


In [6]:
model_path = "Hina541/fine-tuned-xlm_roberta-german-WithNeutral"

# load model from your desired model path
model = AutoModelForSequenceClassification.from_pretrained(model_path)
model.to(device)
model.eval()
model.zero_grad()

# load tokenizer
tokenizer = AutoTokenizer.from_pretrained('cardiffnlp/twitter-xlm-roberta-base-sentiment')

In [7]:
id2label = model.config.id2label
label2id = model.config.label2id
print(label2id)
print(id2label)

{'negative': 0, 'neutral': 1, 'positive': 2}
{0: 'negative', 1: 'neutral', 2: 'positive'}


In [8]:
df['label'] = df['sentiment'].map(label2id)
df

Unnamed: 0,user,instance_id,text,sentiment,negative object_spans,negative spans,positive spans,positive object_spans,label
200,ekaterini@gmail.com,389,Wieviele wurden von ihrem Arzt nach Hause gesc...,negative,"['Long Covid', 'Impfschäden']",['Wieviele wurden von ihrem Arzt nach Hause ge...,...,...,0
282,ekaterini@gmail.com,594,"Das Erzeugen von ""Klimaangst"" ist die elegante...",negative,Klimaangst,elegante Variante des Kindesmissbrauchs.,...,...,0
51,ekaterini@gmail.com,1431,Ich hab nachgedacht über die enorme Häufing vo...,negative,Impfschäden,"['ziemlicher Müll', 'es gibt richtig viele dum...",...,...,0


### Getting attributions for each word from captum

In [9]:
class XAI:
    def __init__(self, text_, label_, tokenizer_, model_, device_):
        self.text = text_
        self.label = label_
        self.tokenizer = tokenizer_
        self.model = model_
        self.ref_token_id = self.tokenizer.pad_token_id
        self.sep_token_id = self.tokenizer.sep_token_id
        self.cls_token_id = self.tokenizer.cls_token_id
        self.device = device_
        self.input_ids = None
        self.ref_input_ids = None

    def construct_input_ref(self):
        text_ids = self.tokenizer.encode(self.text, add_special_tokens=False)
        input_ids = [self.cls_token_id] + text_ids + [self.sep_token_id]
        ref_input_ids = [self.cls_token_id] + [self.ref_token_id] * len(text_ids) + [self.sep_token_id]

        self.input_ids = torch.tensor([input_ids], device=device)
        self.ref_input_ids = torch.tensor([ref_input_ids], device=device)

        return self.input_ids, self.ref_input_ids

    def custom_forward(self, inputs):
        return torch.softmax(self.model(inputs)[0], dim=1)[0]  # multi-class
        #return torch.sigmoid(self.model(inputs)[0])[0]  # binary

    def visualize(self):
        self.input_ids, self.ref_input_ids = self.construct_input_ref()
        self.all_tokens = tokenizer.convert_ids_to_tokens(self.input_ids[0])

        lig = LayerIntegratedGradients(self.custom_forward, self.model.roberta.embeddings)
        attributions, delta = lig.attribute(inputs=self.input_ids,
                                            baselines=self.ref_input_ids,
                                            n_steps=500,
                                            internal_batch_size=3,
                                            return_convergence_delta=True)

        attributions = attributions.sum(dim=-1).squeeze()
        attributions_sum = attributions / torch.norm(attributions)

        score_bert = self.custom_forward(self.input_ids)
        prod_pred = score_bert.max()
        class_pred = score_bert.argmax()

        print(f'{Colors.OKCYAN}Text:{Colors.ENDC} {self.text} \n'
              f'{Colors.OKCYAN}Predicted Probability:{Colors.ENDC} {prod_pred:,.2f}\n'
              f'{Colors.OKCYAN}Predicted Class:{Colors.ENDC} {class_pred} '
              f'({id2label[class_pred.item()]}) vs. True Class: {self.label} ({id2label[self.label]})')

        score_vis = viz.VisualizationDataRecord(attributions_sum,
                                                pred_prob=prod_pred,
                                                pred_class=class_pred,
                                                true_class=self.label,
                                                attr_class=class_pred,
                                                attr_score=attributions_sum.sum(),
                                                raw_input_ids=self.all_tokens,
                                                convergence_score=delta)

        viz.visualize_text([score_vis])
        return attributions_sum

        
    def get_topk_attributed_tokens(self, attrs, k=5):
        if len(attrs) <=5 :
            values = torch.topk(attrs, k)
        else:
            values = torch.topk(attrs,len(attrs))
        values,indices = [t.cpu().numpy() for t in values]
        top_tokens = [self.all_tokens[idx] for idx in indices]
        words =[]
        only_words=[]
        
        encoded = tokenizer(self.text,padding=True,truncation=True,return_tensors="pt")
        word_ids = encoded.word_ids()
        input_ids = encoded["input_ids"].tolist()[0]
        for word_id in sorted(set(word_ids) - {None}):
            token_ids = [input_ids[i] for i in range(len(input_ids)) if word_ids[i]==word_id]
            word = tokenizer.decode(token_ids,clean_up_tokenization_spaces=True).strip()
            position_ids_token = np.where(np.array(encoded.word_ids())==word_id)
            #print('position ids',list(position_ids_token))
            attributions_grouped = []
            indices_grouped = []
            for position_id in position_ids_token:
                #print(len(position_id))
                if len(position_id) > 1:
                    for i in range(len(position_id)):
                        indices_index = list(indices).index(position_id[i])
                        indices_grouped.append(indices_index)
                        attributions_grouped.append(list(values)[indices_index])
                else:
                    indices_index = list(indices).index(position_ids_token)
                    indices_grouped.append(indices_index)
                    attributions_grouped.append(list(values)[indices_index])
            words.append({"word":word,'attributions':attributions_grouped,'indices':indices_grouped})
        only_words.append(word)
        print(words)
        return pd.DataFrame({'Word': top_tokens, 'Index': indices, 'Attribution': values}),words

In [10]:
df = df.reset_index()

In [None]:
#Calling the XAI function and saving the results to a file, the attributions and each word are saved as column 'Words'
results = []
for index, row in df.iterrows():
    text = row['text']
    label = df['label'].values[index]
    negative_spans = row['negative spans']
    negative_object_spans = row['negative object_spans']
    positive_spans = row['positive spans']
    positive_object_spans = row['positive object_spans']
    xai = XAI(text, label, tokenizer, model, device)
    attrs = xai.visualize()
    topk_df,words = xai.get_topk_attributed_tokens(attrs, k=5)
    score_bert = xai.custom_forward(xai.input_ids)
    prod_pred = score_bert.max()
    class_pred = score_bert.argmax()
    result = {'Text': text, 'Actual Label': label, 'Predicted Label': class_pred.item(), 'Words':words, 'negative_object_spans':negative_object_spans, 'negative_spans':negative_spans, 'positive_object_spans':positive_object_spans,'positive_spans':positive_spans}
    results.append(result)
    #print(results)
# create a new dataframe with the results
results_df = pd.DataFrame(results)
results_df.to_csv('results_test_subset_fine-tuned-xlm_roberta-german-Test.csv')