In [121]:
from transformers import BertTokenizer

In [122]:
import torch
import torch.nn as nn
#torch.cuda.set_device(1)
CUDA_DEVICE=5
SEED=1111
torch.cuda.set_device(CUDA_DEVICE)
torch.manual_seed(SEED)
device = torch.device("cuda:"+str(CUDA_DEVICE))

In [123]:
from torch.utils.data import DataLoader,TensorDataset
BATCH_SIZE=32
MAX_LEN=64
EPOCHS=5
NUM_CLASS=3
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [124]:
def pad_tokens(tokens,length):
    if len(tokens)<length:
        padding=[0]*(length-len(tokens))
        tokens=tokens+padding
    return tokens

In [125]:
import pickle as pkl
import numpy as np
data=pkl.load(open('/home/ruicao/NLP/textual/hate-speech-detection/angrybert/angry-MTL/split_data_tokens/dt.pkl','rb'))
train_mask=[]
train_input=[]
train_labels=[]
train_sent=[]
test_mask=[]
test_labels=[]
test_input=[]
test_sent=[]
for i in range(5):
    print ('Preparing for the',i,'-th split data')
    info=data[str(i)]
    cur_mask=[]
    cur_labels=[]
    cur_input=[]
    cur_sent=[]
    for row in info:
        sent=row['sent']
        ans=row['label']
        target=np.zeros((NUM_CLASS),dtype=np.float64)
        target[ans]=1.0
        encode_sent=row['bert_token']
        token=pad_tokens(encode_sent,MAX_LEN)
        cur_input.append(token)
        mask=[int(num>0) for num in token]
        cur_labels.append(target)
        cur_mask.append(mask)
        cur_sent.append(sent)
    if i==4:
        test_labels.extend(cur_labels)
        test_input.extend(cur_input)
        test_mask.extend(cur_mask)
        test_sent.extend(cur_sent)
    else:
        train_labels.extend(cur_labels)
        train_input.extend(cur_input)
        train_mask.extend(cur_mask)
        train_sent.extend(cur_sent)

Preparing for the 0 -th split data
Preparing for the 1 -th split data
Preparing for the 2 -th split data
Preparing for the 3 -th split data
Preparing for the 4 -th split data


In [126]:
train_input=torch.tensor(np.array(train_input,dtype=np.int64))
test_input=torch.tensor(np.array(test_input,dtype=np.int64))
train_mask=torch.tensor(np.array(train_mask,dtype=np.int64))
test_mask=torch.tensor(np.array(test_mask,dtype=np.int64))
train_labels=torch.tensor(np.array(train_labels))
test_labels=torch.tensor(np.array(test_labels))

In [127]:
train_data=TensorDataset(train_input,train_mask,train_labels)
test_data=TensorDataset(test_input,test_mask,test_labels)
train_loader=DataLoader(train_data,BATCH_SIZE,shuffle=True,num_workers=1)
test_loader=DataLoader(test_data,BATCH_SIZE,shuffle=False,num_workers=1)

In [128]:
print (len(train_loader),len(test_loader))

310 78


In [129]:
from transformers import BertForSequenceClassification,AdamW,BertConfig

In [130]:
model=BertForSequenceClassification.from_pretrained(
    'bert-base-uncased',
    num_labels=NUM_CLASS
)
mdoel=model.to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [131]:
optimizer=AdamW(model.parameters(),
               lr=2e-5,
               eps=1e-8
               )

In [132]:
from transformers import get_linear_schedule_with_warmup
num_training_steps=len(train_loader) * EPOCHS
scheduler=get_linear_schedule_with_warmup(optimizer,
                                          num_warmup_steps=0,
                                          num_training_steps=num_training_steps
                                         )

In [133]:
import torch.nn as nn
from sklearn.metrics import f1_score,recall_score,precision_score,accuracy_score,classification_report,precision_recall_fscore_support,roc_auc_score

def compute_multi_loss(r_pred,r_labels):
    loss=nn.functional.binary_cross_entropy_with_logits(r_pred,r_labels)
    return loss
def compute_other(logits,labels):
    #acc=compute_score(logits,labels)
    logits=np.argmax(logits.cpu().numpy(),axis=1)
    label=np.argmax(labels.cpu().numpy(),axis=1)
    length=logits.shape[0]

    f1=f1_score(label,logits,average='weighted',labels=np.unique(label))
    recall=recall_score(label,logits,average='weighted',labels=np.unique(label))
    precision=precision_score(label,logits,average='weighted',labels=np.unique(label))

    result=classification_report(label,logits)
    print (result)
    information=result.split('\n')
    #print(information,result)
    cur=information[2].split('     ')
    h_p=float(cur[3].strip())
    h_r=float(cur[4].strip())
    h_f=float(cur[5].strip())
    total=[]
    
    total.append(precision*100)
    total.append(recall*100)
    total.append(f1*100)
    total.append(h_p*100)
    total.append(h_r*100)
    total.append(h_f*100)
    return total

In [134]:
def evaluate_model(baseline,test_info):
    t_loss=0.0
    print ('Length of iterations for evaluation is:',len(test_info))
    for i,(tokens,masks,labels) in enumerate(test_info):
        with torch.no_grad():
            tokens=tokens.long().to(device)
            labels=labels.float().to(device)
            masks=masks.long().to(device)
            #print (labels)
            pred=baseline(tokens,token_type_ids=None,attention_mask=masks)[0]
            b_loss=compute_multi_loss(pred,labels)
            t_loss+=b_loss
        if i==0:
            t_pred=pred
            t_labels=labels
        else:
            t_pred=torch.cat((t_pred,pred),dim=0)
            t_labels=torch.cat((t_labels,labels),dim=0)
    avg_loss=t_loss
    total=compute_other(t_pred,t_labels)
    return avg_loss,total

In [135]:
for epoch in range(EPOCHS):
    total_loss=0.0
    model.train(True)
    for i,(tokens,masks,labels) in enumerate(train_loader):
        #print(labels)
        #print (tokens.shape)
        #print (tokens)
        tokens=tokens.long().to(device)
        labels=labels.float().to(device)
        #print (labels)
        #print (labels.shape,labels[0])
        masks=masks.long().to(device)
        pred=model(tokens,token_type_ids=None,attention_mask=masks)[0]
        #print (pred)
        loss=compute_multi_loss(pred,labels)
        total_loss+=loss
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(),1.0)
        optimizer.step()
        scheduler.step()#updating the learning rate
        optimizer.zero_grad()
        if i==0:
            t_pred=pred
            t_labels=labels
        else:
            t_pred=torch.cat((t_pred,pred),dim=0)
            t_labels=torch.cat((t_labels,labels),dim=0)
    #train_score=compute_score(t_pred,t_labels)
    print ('Training loss and score is',total_loss,'in Epoch',epoch)
    model.train(False)
    eval_loss,cur_total=evaluate_model(model,test_loader)
    print ('Evaluation loss and score is',eval_loss,'in Epoch',epoch)
    print('\teval precision: %.2f ' % (cur_total[0]))
    print('\teval recall: %.2f ' % (cur_total[1]))
    print('\teval f1: %.2f ' % (cur_total[2]))
    print('\teval hate precision: %.2f ' % (cur_total[3]))
    print('\teval hate recall: %.2f ' % (cur_total[4]))
    print('\teval hate f1: %.2f \n' % (cur_total[5]))

Training loss and score is tensor(76.3678, device='cuda:5', grad_fn=<AddBackward0>) in Epoch 0
Length of iterations for evaluation is: 78
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       143
           1       0.92      0.99      0.95      1919
           2       0.88      0.90      0.89       416

    accuracy                           0.91      2478
   macro avg       0.60      0.63      0.61      2478
weighted avg       0.86      0.91      0.89      2478

Evaluation loss and score is tensor(13.4691, device='cuda:5') in Epoch 0
	eval precision: 86.14 
	eval recall: 91.44 
	eval f1: 88.71 
	eval hate precision: 0.00 
	eval hate recall: 0.00 
	eval hate f1: 0.00 



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Training loss and score is tensor(47.1985, device='cuda:5', grad_fn=<AddBackward0>) in Epoch 1
Length of iterations for evaluation is: 78
              precision    recall  f1-score   support

           0       0.68      0.19      0.30       143
           1       0.93      0.98      0.95      1919
           2       0.90      0.88      0.89       416

    accuracy                           0.92      2478
   macro avg       0.83      0.68      0.71      2478
weighted avg       0.91      0.92      0.91      2478

Evaluation loss and score is tensor(12.6205, device='cuda:5') in Epoch 1
	eval precision: 90.90 
	eval recall: 91.97 
	eval f1: 90.59 
	eval hate precision: 68.00 
	eval hate recall: 19.00 
	eval hate f1: 30.00 

Training loss and score is tensor(36.4303, device='cuda:5', grad_fn=<AddBackward0>) in Epoch 2
Length of iterations for evaluation is: 78
              precision    recall  f1-score   support

           0       0.67      0.21      0.32       143
           1       0.

In [136]:
tokenizer=BertTokenizer.from_pretrained('bert-base-uncased')
ref_token_id=tokenizer.pad_token_id
sep_token_id=tokenizer.sep_token_id
cls_token_id=tokenizer.cls_token_id
print (ref_token_id,sep_token_id,cls_token_id)

0 102 101


In [137]:
from captum.attr import visualization as viz
from captum.attr import IntegratedGradients, LayerConductance, LayerIntegratedGradients,TokenReferenceBase
from captum.attr import configure_interpretable_embedding_layer, remove_interpretable_embedding_layer

In [138]:
import seaborn as sns
import matplotlib.pyplot as plt

In [139]:
def construct_input_ref_pair(text, ref_token_id, sep_token_id, cls_token_id):
    text_ids = tokenizer.encode(text, add_special_tokens=False)
    input_ids = [cls_token_id] + text_ids + [sep_token_id]
    ref_input_ids = [cls_token_id] + [ref_token_id] * len(text_ids) + [sep_token_id]

    return torch.tensor([input_ids], device=device), torch.tensor([ref_input_ids], device=device), len(text_ids)

#distinguish the two sentences
def construct_input_ref_token_type_pair(input_ids, sep_ind=0):
    seq_len = input_ids.size(1)
    token_type_ids = torch.tensor([[0 if i <= sep_ind else 1 for i in range(seq_len)]], device=device)
    ref_token_type_ids = torch.zeros_like(token_type_ids, device=device)# * -1
    return token_type_ids, ref_token_type_ids

def construct_input_ref_pos_id_pair(input_ids):
    seq_length = input_ids.size(1)
    position_ids = torch.arange(seq_length, dtype=torch.long, device=device)
    # we could potentially also use random permutation with `torch.randperm(seq_length, device=device)`
    ref_position_ids = torch.zeros(seq_length, dtype=torch.long, device=device)

    position_ids = position_ids.unsqueeze(0).expand_as(input_ids)
    ref_position_ids = ref_position_ids.unsqueeze(0).expand_as(input_ids)
    return position_ids, ref_position_ids
    
def construct_attention_mask(input_ids):
    return torch.ones_like(input_ids)

In [140]:
def summarize_attributions(attributions):
    attributions = attributions.sum(dim=-1).squeeze(0)
    attributions = attributions / torch.norm(attributions)
    return attributions
def vis_attributes(attribution_sum,prediction,label,tokens,delta):
    ans,index=prediction.max(1)
    #print(ans,index)
    #pred=(ans.detach().cpu().numpy(),index)
    #print (type(attribution_sum),type(ans),type(delta))
    vis=viz.VisualizationDataRecord(
        attribution_sum.detach().cpu().numpy(),
        ans.detach().cpu().numpy().item(),
        str(index.detach().cpu().numpy()),
        str(label),
        'none',
        attribution_sum.sum().detach().cpu().numpy(),       
        all_tokens,
        delta.detach().cpu().numpy())
    viz.visualize_text([vis])

In [141]:
def predict(inputs, token_type_ids=None, position_ids=None, attention_mask=None):
    return torch.sigmoid(model(inputs, token_type_ids=token_type_ids,
                 position_ids=position_ids, attention_mask=attention_mask )[0])
def bert_forward_func(inputs, token_type_ids=None, position_ids=None, attention_mask=None):
    pred = predict(inputs,
                   token_type_ids=token_type_ids,
                   position_ids=position_ids,
                   attention_mask=attention_mask)
    return pred.max(1).values

In [142]:
lig=LayerIntegratedGradients(bert_forward_func,mdoel.bert.embeddings)

In [153]:
import pickle as pkl
import numpy as np
from preprocessing import clean_text
data=pkl.load(open('/home/ruicao/NLP/textual/hate-speech-detection/angrybert/angry-MTL/split_data_tokens/dt.pkl','rb'))
test=data['6']

In [154]:
count=0
for i,row in enumerate(test):
    if count>20:
        break
    label=row['label']
    sent=row['sent']
    if label==0:
        continue
    
    input_ids, ref_input_ids, sep_id = construct_input_ref_pair(clean_text(sent), ref_token_id, sep_token_id, cls_token_id)
    token_type_ids, ref_token_type_ids = construct_input_ref_token_type_pair(input_ids, sep_id)
    position_ids, ref_position_ids = construct_input_ref_pos_id_pair(input_ids)
    attention_mask = construct_attention_mask(input_ids)
    indices = input_ids[0].detach().tolist()
    all_tokens = tokenizer.convert_ids_to_tokens(indices)
    pred=predict(input_ids, token_type_ids=token_type_ids, position_ids=position_ids, attention_mask=attention_mask)
    #info=torch.max(pred)
    if pred.max(1)[1]!=0:
        continue
    count+=1
    print ('Sentence:',sent,clean_text(sent))
    print ('Prediction:',pred,pred.max(1)[1])
    attribution,delta=lig.attribute(inputs=input_ids,baselines=ref_input_ids,
                                    additional_forward_args=(token_type_ids,position_ids,attention_mask),
                                    return_convergence_delta=True)
    attribution_sum=summarize_attributions(attribution)
    vis_attributes(attribution_sum,pred,label,all_tokens,delta)

Sentence: RT @ESPNSecondTake_: If this ugly to you, you either a gay nigga or a hating ass bitch http://t.co/H0wAPpsmgg RT USER If this ugly to you you either a gay nigga or a hating ass bitch
Prediction: tensor([[0.8029, 0.2622, 0.0150]], device='cuda:5', grad_fn=<SigmoidBackward>) tensor([0], device='cuda:5')


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,[0] (0.80),none,-0.63,[CLS] rt user if this ugly to you you either a gay ni ##gga or a hating ass bitch [SEP]
,,,,


Sentence: RT @EnglandBailey: Happy birthday to the nicest faggot ever &#128129; http://t.co/vZygNI9qtQ RT USER Happy birthday to the nicest faggot ever 
Prediction: tensor([[0.5302, 0.4781, 0.0065]], device='cuda:5', grad_fn=<SigmoidBackward>) tensor([0], device='cuda:5')


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,[0] (0.53),none,-1.73,[CLS] rt user happy birthday to the nice ##st fa ##gg ##ot ever [SEP]
,,,,


Sentence: RT @EvanBecker513: @whattheflocka omg you're a nigger. RT USER oh my god you are a nigger
Prediction: tensor([[0.7379, 0.3383, 0.0116]], device='cuda:5', grad_fn=<SigmoidBackward>) tensor([0], device='cuda:5')


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,[0] (0.74),none,1.2,[CLS] rt user oh my god you are a ni ##gger [SEP]
,,,,


Sentence: RT @FemsHaveBallz: LOL at my mentions 

Liberal1: you're a racist bitch

Liberal2: yeah kill all whites

(Liberal1 is white)

Liberal1: Not&#8230; RT USER laugh out loud at my mentions Liberal1 you are a racist bitchLiberal2 yeah kill all whitesLiberal1 is whiteLiberal1 Not
Prediction: tensor([[0.9503, 0.1019, 0.0631]], device='cuda:5', grad_fn=<SigmoidBackward>) tensor([0], device='cuda:5')


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,[0] (0.95),none,-0.4,[CLS] rt user laugh out loud at my mentions liberal ##1 you are a racist bitch ##lib ##eral ##2 yeah kill all whites ##lib ##eral ##1 is white ##lib ##eral ##1 not [SEP]
,,,,


Sentence: RT @FemsHaveBallz: The looters in #Ferguson are so retarded, that they risk arrest to get free food when EBT already pays for their food LO&#8230; RT USER The looters in #Ferguson are so retarded that they risk arrest to get free food when EBT already pays for their food LO
Prediction: tensor([[0.6795, 0.2912, 0.0201]], device='cuda:5', grad_fn=<SigmoidBackward>) tensor([0], device='cuda:5')


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,[0] (0.68),none,-0.5,[CLS] rt user the lo ##ote ##rs in # ferguson are so re ##tar ##ded that they risk arrest to get free food when e ##bt already pays for their food lo [SEP]
,,,,


Sentence: RT @Fewjr: @1MarKus_A @MakEitSndGoOd she curved regular niggas daily. Especially us Austin niggas. Damn yellow ppl &#128127; RT USER she curved regular niggas daily Especially us Austin niggas Damn yellow people 
Prediction: tensor([[0.7792, 0.2571, 0.0154]], device='cuda:5', grad_fn=<SigmoidBackward>) tensor([0], device='cuda:5')


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,[0] (0.78),none,0.54,[CLS] rt user she curved regular ni ##gga ##s daily especially us austin ni ##gga ##s damn yellow people [SEP]
,,,,


Sentence: RT @FirearmsDaily: Statistics don't lie, if you like gun control you're a queer guy. #FD RT USER Statistics do not lie if you like gun control you are a queer guy #FD
Prediction: tensor([[0.9418, 0.1220, 0.0609]], device='cuda:5', grad_fn=<SigmoidBackward>) tensor([0], device='cuda:5')


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,[0] (0.94),none,0.05,[CLS] rt user statistics do not lie if you like gun control you are a queer guy # f ##d [SEP]
,,,,


Sentence: RT @HawkinsUSA: Party of Racists just elected a black dude in the South. The party of War on Women just elected first ever woman in redneck&#8230; RT USER Party of Racists just elected a black dude in the South The party of War on Women just elected first ever woman in redneck
Prediction: tensor([[0.7945, 0.1596, 0.0596]], device='cuda:5', grad_fn=<SigmoidBackward>) tensor([0], device='cuda:5')


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,[0] (0.79),none,-1.01,[CLS] rt user party of racist ##s just elected a black dude in the south the party of war on women just elected first ever woman in red ##neck [SEP]
,,,,


Sentence: RT @Hermit_Thrush: I suck a giant dick, i suck a skinny dick &#9835; i suck a faggot dick, i suck a nigger dick &#9835; i suck the dicks that remind me&#8230; RT USER I suck a giant dick i suck a skinny dick  i suck a faggot dick i suck a nigger dick  i suck the dicks that remind me
Prediction: tensor([[0.6069, 0.4689, 0.0067]], device='cuda:5', grad_fn=<SigmoidBackward>) tensor([0], device='cuda:5')


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,[0] (0.61),none,-0.93,[CLS] rt user i suck a giant dick i suck a skinny dick i suck a fa ##gg ##ot dick i suck a ni ##gger dick i suck the dick ##s that remind me [SEP]
,,,,


Sentence: RT @JimNorton: There is a GIFT SHOP at the 9-11 museum? And they charge admission. It's a mass grave you fucking cunts. What a disgrace. RT USER There is a GIFT SHOP at the 9-11 museum And they charge admission it is a mass grave you fucking cunts What a disgrace
Prediction: tensor([[0.5978, 0.5048, 0.0068]], device='cuda:5', grad_fn=<SigmoidBackward>) tensor([0], device='cuda:5')


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,[0] (0.60),none,0.45,[CLS] rt user there is a gift shop at the 9 - 11 museum and they charge admission it is a mass grave you fucking cu ##nts what a disgrace [SEP]
,,,,


Sentence: RT @Kerb_215: 1,2,3,4 how many niggers are in my store I knowwwww your stealing &#128514;&#128514;&#128514;, that vine still be having me weak RT USER 1234 how many niggers are in my store I knoww your stealing  that vine still be having me weak
Prediction: tensor([[0.7518, 0.2883, 0.0159]], device='cuda:5', grad_fn=<SigmoidBackward>) tensor([0], device='cuda:5')


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,[0] (0.75),none,1.68,[CLS] rt user 123 ##4 how many ni ##gger ##s are in my store i know ##w your stealing that vine still be having me weak [SEP]
,,,,


Sentence: RT @LipstickYoda: Call my son "nigger": even respectable white ppl will be uncomfortable w/ you. Call him "thug":you probably can get off w&#8230; RT USER Call my son nigger even respectable white people will be uncomfortable w you Call him thugyou probably can get off w
Prediction: tensor([[0.7145, 0.3441, 0.0116]], device='cuda:5', grad_fn=<SigmoidBackward>) tensor([0], device='cuda:5')


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,[0] (0.71),none,-1.38,[CLS] rt user call my son ni ##gger even respectable white people will be uncomfortable w you call him thug ##you probably can get off w [SEP]
,,,,


Sentence: RT @MadPatsFan1954: .@FR_INC "coon meat?!" Made me ill to even type that. Racist much? @lybr3 RT USER USER coon meat Made me ill to even type that Racist much USER
Prediction: tensor([[0.8441, 0.1244, 0.0680]], device='cuda:5', grad_fn=<SigmoidBackward>) tensor([0], device='cuda:5')


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,[0] (0.84),none,0.03,[CLS] rt user user co ##on meat made me ill to even type that racist much user [SEP]
,,,,


Sentence: RT @MichalaSmith4: shout out to the faggots who took a million and one selfies on my phone. &#128530;&#128514;&#10084;&#65039; @zzachbarness @michealallen72 http://t.co/&#8230; RT USER shout out to the faggots who took a million and one selfies on my phone  USER
Prediction: tensor([[0.5639, 0.3954, 0.0104]], device='cuda:5', grad_fn=<SigmoidBackward>) tensor([0], device='cuda:5')


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,[0] (0.56),none,0.88,[CLS] rt user shout out to the fa ##gg ##ots who took a million and one self ##ies on my phone user [SEP]
,,,,


Sentence: RT @MidniteBoss: @fourzerotwo fix the fucking snipers you retarded piece of shit. Fucking overpowered as fuck. They aim down faster then ... RT USER fix the fucking snipers you retarded piece of shit Fucking overpowered as fuck They aim down faster then 
Prediction: tensor([[0.6283, 0.4400, 0.0066]], device='cuda:5', grad_fn=<SigmoidBackward>) tensor([0], device='cuda:5')


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,[0] (0.63),none,-1.27,[CLS] rt user fix the fucking sniper ##s you re ##tar ##ded piece of shit fucking over ##powered as fuck they aim down faster then [SEP]
,,,,


Sentence: RT @OfficiallyIce: "That's why you're so good at this game. Cause u sit on your unemployed nigger ass playing all day." RT USER That why you are so good at this game because you sit on your unemployed nigger ass playing all day
Prediction: tensor([[0.7786, 0.2612, 0.0182]], device='cuda:5', grad_fn=<SigmoidBackward>) tensor([0], device='cuda:5')


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,[0] (0.78),none,1.43,[CLS] rt user that why you are so good at this game because you sit on your unemployed ni ##gger ass playing all day [SEP]
,,,,


Sentence: RT @OnlyRosee: The way Tricksnipers act is just ugh. 89% of them are Chief Keef, Soulja Boy niggers. Absolute sewage waste RT USER The way Tricksnipers act is just ugh 89 of them are Chief Keef Soulja Boy niggers Absolute sewage waste
Prediction: tensor([[0.8662, 0.2006, 0.0193]], device='cuda:5', grad_fn=<SigmoidBackward>) tensor([0], device='cuda:5')


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,[0] (0.87),none,0.22,[CLS] rt user the way tricks ##ni ##pers act is just u ##gh 89 of them are chief ke ##ef soul ##ja boy ni ##gger ##s absolute sewage waste [SEP]
,,,,


Sentence: RT @RT_America: US Army removes &#8216;negro&#8217; as official reference to black people after public outrage http://t.co/qtiTEzE7Bh http://t.co/nudHr&#8230; RT USER US Army removes  as official reference to black people after public outrage
Prediction: tensor([[0.3402, 0.3274, 0.1539]], device='cuda:5', grad_fn=<SigmoidBackward>) tensor([0], device='cuda:5')


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,[0] (0.34),none,-2.22,[CLS] rt user us army removes as official reference to black people after public outrage [SEP]
,,,,


Sentence: RT @Rockprincess818: Obama is Allowing millions of illegal aliens, drug cartels, and terrorists to enter our open borders...Liberal retards&#8230; RT USER Obama is Allowing millions of illegal aliens drug cartels and terrorists to enter our open bordersLiberal retards
Prediction: tensor([[0.8614, 0.1134, 0.0754]], device='cuda:5', grad_fn=<SigmoidBackward>) tensor([0], device='cuda:5')


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,[0] (0.86),none,0.0,[CLS] rt user obama is allowing millions of illegal aliens drug cartel ##s and terrorists to enter our open borders ##lib ##eral re ##tar ##ds [SEP]
,,,,


Sentence: RT @SAMMI_boyden: @jizcalifuh it's a metaphor you dumb fucking nigger @savanigga &#128514;&#128514;&#128525;&#128525;&#128069;&#128069; RT USER it is a metaphor you dumb fucking nigger USER 
Prediction: tensor([[0.9466, 0.1294, 0.0434]], device='cuda:5', grad_fn=<SigmoidBackward>) tensor([0], device='cuda:5')


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,[0] (0.95),none,2.3,[CLS] rt user it is a metaphor you dumb fucking ni ##gger user [SEP]
,,,,


Sentence: RT @Smokinonkaya_: She said I hurt her feelings now she dating dykes.. RT USER She said I hurt her feelings now she dating dykes
Prediction: tensor([[0.5003, 0.4604, 0.0119]], device='cuda:5', grad_fn=<SigmoidBackward>) tensor([0], device='cuda:5')


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,[0] (0.50),none,-1.6,[CLS] rt user she said i hurt her feelings now she dating dyke ##s [SEP]
,,,,


In [None]:
def construct_bert_sub_embedding(input_ids, ref_input_ids,
                                   token_type_ids, ref_token_type_ids,
                                   position_ids, ref_position_ids):
    input_embeddings = interpretable_embedding1.indices_to_embeddings(input_ids)
    ref_input_embeddings = interpretable_embedding1.indices_to_embeddings(ref_input_ids)

    input_embeddings_token_type = interpretable_embedding2.indices_to_embeddings(token_type_ids)
    ref_input_embeddings_token_type = interpretable_embedding2.indices_to_embeddings(ref_token_type_ids)

    input_embeddings_position_ids = interpretable_embedding3.indices_to_embeddings(position_ids)
    ref_input_embeddings_position_ids = interpretable_embedding3.indices_to_embeddings(ref_position_ids)
    
    return (input_embeddings, ref_input_embeddings), \
           (input_embeddings_token_type, ref_input_embeddings_token_type), \
           (input_embeddings_position_ids, ref_input_embeddings_position_ids)
    
def construct_whole_bert_embeddings(input_ids, ref_input_ids, \
                                    token_type_ids=None, ref_token_type_ids=None, \
                                    position_ids=None, ref_position_ids=None):
    input_embeddings = interpretable_embedding.indices_to_embeddings(input_ids, token_type_ids=token_type_ids, position_ids=position_ids)
    ref_input_embeddings = interpretable_embedding.indices_to_embeddings(ref_input_ids, token_type_ids=token_type_ids, position_ids=position_ids)
    
    return input_embeddings, ref_input_embeddings