In [1]:
import pandas as pd
import numpy as np
import os
import pickle
import re
from collections import Counter
from tqdm import tqdm

import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel

import ilm.ilm.tokenize_util
from ilm.ilm.infer import infill_with_ilm
from perturbation_functions import calculate_necc_and_suff, gen_num_samples_table, gen_probs_table

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
%load_ext autoreload
%autoreload 2


In [3]:
# MODEL_DIR = './ilm/Models/ILM/'
# MASK_CLS = 'ilm.ilm.mask.hierarchical.MaskHierarchical'

# tokenizer = ilm.ilm.tokenize_util.Tokenizer.GPT2
# with open(os.path.join(MODEL_DIR, 'additional_ids_to_tokens.pkl'), 'rb') as f:
#     additional_ids_to_tokens = pickle.load(f)
# additional_tokens_to_ids = {v:k for k, v in additional_ids_to_tokens.items()}
# try:
#     ilm.ilm.tokenize_util.update_tokenizer(additional_ids_to_tokens, tokenizer)
# except ValueError:
#     print('Already updated')
# print(additional_tokens_to_ids)

# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model = GPT2LMHeadModel.from_pretrained(MODEL_DIR)


In [4]:
MODEL_DIR = './ilm/Models/ILM/'

tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2")

with open(os.path.join(MODEL_DIR, 'additional_ids_to_tokens.pkl'), 'rb') as f:
    additional_ids_to_tokens = pickle.load(f)
additional_tokens_to_ids = {v:k for k, v in additional_ids_to_tokens.items()}
try:
    ilm.ilm.tokenize_util.update_tokenizer(additional_ids_to_tokens, tokenizer)
except ValueError:
    print('Already updated')
print(additional_tokens_to_ids)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.eval()
_ = model.to(device)


Already updated
{'<|startofinfill|>': 50257, '<|endofinfill|>': 50258, '<|infill_document|>': 50259, '<|infill_paragraph|>': 50260, '<|infill_sentence|>': 50261, '<|infill_ngram|>': 50262, '<|infill_word|>': 50263}


In [5]:
test_suite_cases = pd.read_csv("hatecheck-data/test_suite_cases.csv", index_col="case_id") #.drop(columns=['Unnamed:0'])
test_suite_cases.drop(columns=['Unnamed: 0'], inplace=True)
# target_ds = ['women', 'Muslims', 'men']
# funcs = ['derog_neg_emote_h', 'derog_neg_attrib_h', 'derog_dehum_h']
# 
# test_suite_cases = test_suite_cases[test_suite_cases.target_ident.isin(target_ds) & 
                                    # test_suite_cases.functionality.isin(funcs)]
# tts = [text for _, text in test_suite_cases.test_case.items()]
# 
# with open("Data/HateCheck_test_suite_cases.txt", "w") as f:
    # f.write("\n".join(tts))

## Adding men and catholic to the dataset

In [6]:
dic_men = {'SLUR_P_space_add' : 'men',
    'IDENTITY_S_space_add' : 'm e n',
    'IDENTITY_P_leet' : 'men',
    'IDENTITY_P' : 'men',
    'SLUR_S_leet' : 'boy',
    'SLUR_P_leet' : 'boys',
    'IDENTITY_A' : 'men',
    'IDENTITY_S' : 'man',
    'IDENTITY_P_char_del' : 'mn',
    'IDENTITY_P_space_add' : 'm e n',
    'SLUR_S_space_add' : 'b o y',
    'SLUR_S' : 'guy',
    'SLUR_P' : 'guys',
    'IDENTITY_S_char_del' : 'gu'
}


dic_catholics= {'SLUR_P_space_add' : 'Chrisian',
    'IDENTITY_S_space_add' : 'c a t h o l i c',
    'IDENTITY_P_leet' : 'Christians',
    'IDENTITY_P' : 'Catholics',
    'SLUR_S_leet' : 'Popist',
    'SLUR_P_leet' : 'Popists',
    'IDENTITY_A' : 'catholic',
    'IDENTITY_S' : 'Catholic',
    'IDENTITY_P_char_del' : 'Cathlics',
    'IDENTITY_P_space_add' : 'c a t h o l i c',
    'SLUR_S_space_add' : 'P o p i s t s',
    'SLUR_S' : 'Christian',
    'SLUR_P' : 'Christians',
    'IDENTITY_S_char_del' : 'Cathlic'
}


In [7]:

def add_word_hatecheck(word_df: str, df: pd.DataFrame):
    indices = []
    for sentence in test_suite_cases[pd.notna(test_suite_cases['target_ident'])]['case_templ'].unique():
        index = int((df["case_templ"] == sentence).idxmax())
        indices.append(index)
        
        new_row = df.iloc[index].copy()

        match = re.search(r'\[(.*?)\]', sentence) #search in the case_templ sentence the part of the template to replace
        if match:
            to_replace = match.group(1)
        new_row['test_case']  = new_row['case_templ'].replace(f"[{to_replace}]", word_df[to_replace]) # replace with the right sentence 
        new_row['target_ident'] = word_df['IDENTITY_P'] #change the label

        df = pd.concat([df.iloc[:index+1], pd.DataFrame([new_row]), df.iloc[index+1:]]).reset_index(drop=True)
    
    return df

# Call the function and print the result
updated_df = add_word_hatecheck(dic_men, test_suite_cases)
updated_df = add_word_hatecheck(dic_catholics, updated_df)


In [8]:
test_suite_cases=updated_df
target_ds = ['women', 'Muslims', 'men', 'Catholics']
funcs = ['derog_neg_emote_h', 'derog_neg_attrib_h', 'derog_dehum_h']

test_suite_cases = test_suite_cases[test_suite_cases.target_ident.isin(target_ds) & 
                                    test_suite_cases.functionality.isin(funcs)]
tts = [text for _, text in test_suite_cases.test_case.items()]

with open("Data/HateCheck_test_suite_cases.txt", "w") as f:
    f.write("\n".join(tts))

In [9]:
tts

['I hate women. ',
 'I hate Catholics.',
 'I hate men.',
 'I hate Muslims. ',
 "I really can't stand women like you. ",
 "I really can't stand Catholics like you.",
 "I really can't stand men like you.",
 "I really can't stand Muslims like you. ",
 'I despise women. ',
 'I despise Catholics.',
 'I despise men.',
 'I despise Muslims. ',
 'I detest women. ',
 'I detest Catholics.',
 'I detest men.',
 'I detest Muslims. ',
 'I absolutely loathe women. ',
 'I absolutely loathe Catholics.',
 'I absolutely loathe men.',
 'I absolutely loathe Muslims. ',
 'Disgusted by all the women around here. ',
 'Disgusted by all the Catholics around here.',
 'Disgusted by all the men around here.',
 'Disgusted by all the Muslims around here. ',
 'I have so much disdain for these women. ',
 'I have so much disdain for these Catholics.',
 'I have so much disdain for these men.',
 'I have so much disdain for these Muslims. ',
 'I have so much hatred for you women. ',
 'I have so much hatred for you Catholic

In [10]:
type(tokenizer)

transformers.models.gpt2.tokenization_gpt2.GPT2Tokenizer

In [11]:
# generate approximately 100 perturbations for each token. 
num_samples = gen_num_samples_table(20, 100)
probs_table = gen_probs_table(20)
mask_tokn = additional_tokens_to_ids['<|infill_ngram|>']

orig_texts = []
necc_perturbed = []
suff_perturbed = []
necc_masks = []
suff_masks = []

with open("Data/HateCheck_test_suite_cases.txt", "r") as ff:
    with tqdm(total=120) as pbar:
        for text in ff:
            necc_pp, suff_pp, necc_mm, suff_mm = calculate_necc_and_suff(text, ilm_tokenizer=tokenizer, ilm_model=model, cl_tokenizer=None, cl_model=None, num_samples=num_samples,
                               mask_tokn=mask_tokn, additional_tokens_to_ids=additional_tokens_to_ids, probs_table=probs_table, 
                               return_pert_only=True)

            orig_texts.append(text)
            necc_perturbed.append(necc_pp)
            suff_perturbed.append(suff_pp)
            necc_masks.append(necc_mm)
            suff_masks.append(suff_mm)
            pbar.update(1)
    
necc_suff_perturbations = {'orig_texts': orig_texts, 
                           'necc_perturbed': necc_perturbed, 
                           'suff_perturbed': suff_perturbed,
                           'necc_masks': necc_masks,
                           'suff_masks': suff_masks}

#pickle.dump(necc_suff_perturbations, open('Data/HateCheck_necc_suff_perturbations.pickle', 'wb'))
pickle.dump(necc_suff_perturbations, open('Data/HateCheck_necc_suff_perturbations_2.pickle', 'wb'))

  0%|          | 0/120 [00:00<?, ?it/s]




ValueError: Tokenizer must be from Tokenizer enum

In [15]:
from ilm.ilm.tokenize_util_test import TestTokenizeUtil
test = TestTokenizeUtil()
test.test_tokenize_custom(
)

In [None]:
with open("Data/hatecheck_perturbations/orig_texts.txt", "w") as ff:
    ff.write("\n".join(necc_suff_perturbations['orig_texts']))

In [None]:
with open("Data/hatecheck_perturbations/necc_perturbations.tsv", "w") as ff:
    for ll in necc_suff_perturbations['necc_perturbed']:
        ff.write("\t".join(ll))
        ff.write("\n")

In [None]:
with open("Data/hatecheck_perturbations/suff_perturbations.tsv", "w") as ff:
    for ll in necc_suff_perturbations['suff_perturbed']:
        ff.write("\t".join(ll))
        ff.write("\n")

In [None]:
with open("Data/hatecheck_perturbations/necc_masks.tsv", "w") as ff: 
    for ll in necc_suff_perturbations['necc_masks']:
        llist = ll.astype(int).astype(str).tolist()
        ff.write("\t".join([" ".join(ii) for ii in llist]))
        ff.write("\n")

In [None]:
with open("Data/hatecheck_perturbations/suff_masks.tsv", "w") as ff: 
    for ll in necc_suff_perturbations['suff_masks']:
        llist = ll.astype(int).astype(str).tolist()
        ff.write("\t".join([" ".join(ii) for ii in llist]))
        ff.write("\n")