In [1]:
import numpy as np
import pickle
import pprint
import pandas as pd
import torch
from tqdm.notebook import tqdm
from transformers import BertTokenizerFast, BertForSequenceClassification, Trainer, TrainingArguments
from random import shuffle
from perturbation_functions import get_preds_and_scores, calc_suff, calc_necc


In [2]:
perts = pickle.load(open("Data/HateCheck_necc_suff_perturbations.pickle","rb"))
perts['orig_texts'] = [tt.strip(' \n') for tt in perts['orig_texts']]
perts.keys()

dict_keys(['orig_texts', 'necc_perturbed', 'suff_perturbed', 'necc_masks', 'suff_masks'])

In [3]:
necc_masked = []
for orig_text, necc_mask in zip(perts['orig_texts'], perts['necc_masks']):
    orig_text = orig_text.strip().split()
    masked = []
    for masks in necc_mask:
        masked.append(" ".join(['[MASK]' if mm else tt for tt, mm in zip(orig_text, masks)]))
    necc_masked.append(masked)
    
suff_masked = [] 
for orig_text, suff_mask in zip(perts['orig_texts'], perts['suff_masks']):
    orig_text = orig_text.strip().split()
    masked = []
    for masks in suff_mask:
        masked.append(" ".join(['[MASK]' if mm else tt for tt, mm in zip(orig_text, masks)]))
    suff_masked.append(masked)
       

In [4]:
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

# add special tokens for URLs, emojis and mentions (--> see pre-processing)
special_tokens_dict = {'additional_special_tokens': ['[USER]','[EMOJI]','[URL]']}
num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)

datasets = ['CAD_abuse', 
            'Davidson_abuse', 
            'Founta_abuse',
            'CAD_hate',
            'Davidson_hate',
            'Founta_hate']

#datasets = ["Models/Classifiers/" + dd for dd in datasets]


In [5]:

orig_preds = {}
orig_scores = {}
necc_preds = {}
necc_scores = {}
suff_preds = {}
suff_scores = {}


for dataset in datasets:
    print("Classifying HateCheck perturbations with {}.".format(dataset))
  #  model = BertForSequenceClassification.from_pretrained(models_dir +'BERT_{}_weighted/Final'.format(dataset))
    model = BertForSequenceClassification.from_pretrained("Models/Classifiers/{}".format(dataset))
    model.resize_token_embeddings(len(tokenizer))
    model.eval()
    
    total_len = len(perts['orig_texts']) + 2*sum(len(nn) for nn in perts['necc_perturbed']) + 2*sum(len(nn) for nn in perts['suff_perturbed'])
 
    with tqdm(total=total_len) as pbar:
        orig_preds[dataset], orig_scores[dataset] = get_preds_and_scores(perts['orig_texts'], tokenizer, model, pbar)
        
        necc_preds[dataset] = []
        necc_scores[dataset] = []
    
        for tt in perts['necc_perturbed']:
            pp, ss = get_preds_and_scores(tt, tokenizer, model, pbar)
            necct_preds[dataset].append(pp)
            necc_scores[dataset].append(ss)
            
        suff_preds[dataset] = []
        suff_scores[dataset] = []
    
        for tt in perts['suff_perturbed']:
            pp, ss = get_preds_and_scores(tt, tokenizer, model, pbar)
            suff_preds[dataset].append(pp)
            suff_scores[dataset].append(ss)
            
        
final_results = {
                'orig_preds': orig_preds,
                'orig_scores': orig_scores,
                'necc_preds': necc_pert_preds,
                'necc_scores': necc_pert_scores,
                'suff_preds': suff_pert_preds,
                'suff_scores': suff_pert_scores,
                }


Classifying HateCheck perturbations with CAD_abuse.


  0%|          | 0/132360 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [10]:
pickle.dump(final_results, open("Data/HateCheck_necc_suff_preds.pickle", "wb"))

In [51]:
with open("Data/ILM/compound_dataset/train.txt", "r") as ff:
    compound_dataset = ff.read().split("\n\n\n")
compound_dataset = [tt.strip(" :`.,") for tt in compound_dataset]
shuffle(compound_dataset)
compound_dataset = compound_dataset[:5000]

In [56]:
baseline_preds = {}
baseline_scores = {}
for dataset in datasets: 
    model = BertForSequenceClassification.from_pretrained("Models/Classifiers/{}".format(dataset))
    model.resize_token_embeddings(len(tokenizer))
    model.eval()
    preds, scores = get_preds_and_scores(compound_dataset, tokenizer, model)
    baseline_preds[dataset] = sum(preds)/len(preds)
    baseline_scores[dataset] = sum(scores)/len(scores)

In [59]:
pickle.dump({'baseline_preds':baseline_preds, 'baseline_scores':baseline_scores}, open("Classifier_baselines.pickle", "wb"))

In [86]:
necc_results = {}
necc_results_nb = {}
suff_results = {}
suff_results_nb = {}

for dataset in datasets:
    
    ## NECCESSITY CALCULATIONS
    neccs = []
    for oo, pp, mm in zip(final_results['orig_preds'][dataset], 
                          final_results['necc_preds'][dataset], 
                          perts['necc_masks']):
        pp = np.array(pp)
        neccs.append(calc_necc(oo, pp, mm))
    necc_results[dataset] = neccs 
    
    neccs_nb = []
    for oo, pp, mm in zip(final_results['orig_scores'][dataset], 
                          final_results['necc_scores'][dataset], 
                          perts['necc_masks']):
        pp = np.array(pp)
        neccs_nb.append(calc_necc(oo, pp, mm))
    necc_results_nb[dataset] = neccs_nb
    
    ## SUFFICIENCY CALCULATIONS
    baseline_pred = baseline_preds[dataset]
    baseline_score = baseline_scores[dataset]
    
    suffs = []
    for pp, mm in zip(final_results['suff_preds'][dataset], perts['suff_masks']):
        pp = np.array(pp)
        suffs.append(calc_suff(baseline_pred, pp, mm))
    suff_results[dataset] = suffs 
    
    suffs_nb = []
    for pp, mm in zip(final_results['suff_scores'][dataset], perts['suff_masks']):
        pp = np.array(pp)
        suffs_nb.append(calc_suff(baseline_score, pp, mm))
    suff_results_nb[dataset] = suffs_nb     
    

In [91]:
hatecheck_necc_suff_results = {
    'necc_results': necc_results,
    'necc_results_nb': necc_results_nb,
    'suff_results': suff_results, 
    'suff_results_nb': suff_results_nb
}

pickle.dump(hatecheck_necc_suff_results, open('Data/HateCheck_necc_suff_results_all.pickle', 'wb'))

In [12]:
# Now get the predictions for all models for the entire hatecheck suite
hc_test_cases_all = pd.read_csv("hatecheck-data/test_suite_cases.csv", index=False)

In [10]:
hc_test_cases_all = hc_test_cases_all.test_case.tolist()
hc_preds = {}
hc_scores = {}
for dataset in datasets: 
    model = BertForSequenceClassification.from_pretrained("Models/Classifiers/{}".format(dataset))
    model.resize_token_embeddings(len(tokenizer))
    model.eval()
    preds, scores = get_preds_and_scores(hc_test_cases_all, tokenizer, model)
    hc_preds[dataset] = preds
    hc_scores[dataset] = scores

pickle.dump({'preds': hc_preds, 'scores':hc_scores}, open('Data/HateCheck_results_all_models.pickle', "wb"))

In [13]:
for dataset in datasets:
    hc_test_cases_all['{}_pred'.format(dataset)] = hc_preds[dataset]
    hc_test_cases_all['{}_score'.format(dataset)] = hc_scores[dataset]

Unnamed: 0,functionality,case_id,test_case,label_gold,target_ident,direction,focus_words,focus_lemma,ref_case_id,ref_templ_id,...,BERT_davidson2017_1_weighted_pred,BERT_davidson2017_1_weighted_score,BERT_founta2018_1_weighted_pred,BERT_founta2018_1_weighted_score,BERT_CAD_hate_pred,BERT_CAD_hate_score,BERT_davidson2017_weighted_pred,BERT_davidson2017_weighted_score,BERT_founta2018_weighted_pred,BERT_founta2018_weighted_score
0,derog_neg_emote_h,1,I hate women.,hateful,women,general,hate,hate,,,...,1,0.987005,1,0.989412,0,0.477585,0,0.083175,1,0.990185
1,derog_neg_emote_h,2,I hate trans people.,hateful,trans people,general,hate,hate,,,...,1,0.865187,1,0.986793,1,0.523131,1,0.994292,1,0.975954
2,derog_neg_emote_h,3,I hate gay people.,hateful,gay people,general,hate,hate,,,...,1,0.967316,1,0.991046,1,0.985266,1,0.993939,1,0.990166
3,derog_neg_emote_h,4,I hate black people.,hateful,black people,general,hate,hate,,,...,1,0.909598,1,0.991238,1,0.986397,1,0.994157,1,0.975602
4,derog_neg_emote_h,5,I hate disabled people.,hateful,disabled people,general,hate,hate,,,...,0,0.121863,1,0.989154,0,0.002243,1,0.994348,1,0.988610
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3723,spell_leet_h,3897,Gay people are all complete 4ssholes.,hateful,gay people,general,4ssholes,asshole,1232.0,256.0,...,1,0.989414,1,0.984351,1,0.991244,1,0.967804,1,0.975607
3724,spell_leet_h,3898,Black people are all complete 4ssholes.,hateful,black people,general,4ssholes,asshole,1233.0,256.0,...,1,0.985969,1,0.977180,1,0.974721,0,0.250225,1,0.975588
3725,spell_leet_h,3899,Disabled people are all complete 4ssholes.,hateful,disabled people,general,4ssholes,asshole,1234.0,256.0,...,1,0.956798,0,0.007620,0,0.043165,0,0.264191,0,0.014212
3726,spell_leet_h,3900,Muslims are all complete 4ssholes.,hateful,Muslims,general,4ssholes,asshole,1235.0,256.0,...,1,0.976569,1,0.978502,1,0.990368,1,0.974557,1,0.959037


In [14]:
pickle.dump(hc_test_cases_all, open('Data/HateCheck_templates_and_results.pickle', "wb"))