### 0. Import libraries

In [1]:
# These two commands are needed
# !pip install transformers
# !pip install accelerate

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
# import libraries
import re
import spacy
import torch
import numpy as np
import transformers
import pandas as pd
from tqdm import tqdm
from functools import reduce

# import local scripts
from data_loader import DatasetLoader
from alignment_metrics import *
from gpt2_model import GPT2Model

# produce repeatable results
np.random.seed(seed=42)
transformers.set_seed(42)

# enable CUDNN deterministic mode
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

nlp = spacy.load("en_core_web_sm")

  return torch._C._cuda_getDeviceCount() > 0
2024-06-29 01:02:52.485603: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-06-29 01:02:52.607489: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-06-29 01:02:53.304492: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/lib:/usr/local/nvidia/lib:/usr/

### 1. Load the data

Below are the names of the datasets used by the authors to check if contrastive explanations identify linguistically appropriate evidence.

In [4]:
anaphor_agreement_datasets = ["anaphor_gender_agreement", "anaphor_number_agreement"]

# not sure about the one below
argument_structure_datasets = ["animate_subject_passive"]

determiner_noun_agreement_datasets = [
    "determiner_noun_agreement_1",
    "determiner_noun_agreement_2",
    "determiner_noun_agreement_irregular_1",
    "determiner_noun_agreement_irregular_2",
    "determiner_noun_agreement_with_adj_2",
    "determiner_noun_agreement_with_adj_irregular_1",
    "determiner_noun_agreement_with_adj_irregular_2",
    "determiner_noun_agreement_with_adjective_1",
    "determiner_noun_agreement_with_adj_irregular_1",
    "determiner_noun_agreement_with_adj_irregular_2"
]

npi_licesing = [
    "npi_present_1",
    "npi_present_2"
]

subject_verb_agreement = [
    "distractor_agreement_relational_noun"
    "irregular_plural_subject_verb_agreement_1",
    "irregular_plural_subject_verb_agreement_2",
    "regular_plural_subject_verb_agreement_1",
    "regular_plural_subject_verb_agreement_2",
]

In [5]:
data_loader = DatasetLoader()
data = pd.DataFrame(data_loader.load_data(argument_structure_datasets[0])["train"])
data

Unnamed: 0,sentence_good,sentence_bad,field,linguistics_term,UID,simple_LM_method,one_prefix_method,two_prefix_method,lexically_identical,pair_id
0,Amanda was respected by some waitresses.,Amanda was respected by some picture.,syntax,s-selection,animate_subject_passive,True,True,False,False,0
1,Some lake was passed by some cashiers.,Some lake was passed by some phenomena.,syntax,s-selection,animate_subject_passive,True,True,False,False,1
2,Lisa was kissed by the boys.,Lisa was kissed by the blouses.,syntax,s-selection,animate_subject_passive,True,True,False,False,2
3,Amanda isn't respected by the children.,Amanda isn't respected by the cups.,syntax,s-selection,animate_subject_passive,True,True,False,False,3
4,The glove was noticed by some woman.,The glove was noticed by some mouse.,syntax,s-selection,animate_subject_passive,True,True,False,False,4
...,...,...,...,...,...,...,...,...,...,...
995,This pork was taken by some guy.,This pork was taken by some fish.,syntax,s-selection,animate_subject_passive,True,True,False,False,995
996,Newspaper articles weren't scanned by the men.,Newspaper articles weren't scanned by the phot...,syntax,s-selection,animate_subject_passive,True,True,False,False,996
997,Liam is talked about by the dancers.,Liam is talked about by the university.,syntax,s-selection,animate_subject_passive,True,True,False,False,997
998,Monet is sounded like by the man.,Monet is sounded like by the bread.,syntax,s-selection,animate_subject_passive,True,True,False,False,998


### 2. Prepare anaphor agreement data

In [6]:
def get_main_verb(sentence):
    doc = nlp(sentence)
    main_verbs = []

    for token in doc:
        if token.pos_ == "VERB":
            main_verbs.append(token.text)

    assert len(main_verbs) == 1

    return main_verbs[0]

In [7]:
extracted_sentences_data = []
good_bad_sentences = data[["sentence_good", "sentence_bad"]].to_numpy()
dif_lengths = 0

for good_sentence, bad_sentence in tqdm(good_bad_sentences):
    # remove characters that are not letters or apostrophes and replace contractions
    good_sentence_cleaned = re.sub(r"[^\w\s']", "", good_sentence).replace("n't", " not")
    bad_sentence_cleaned = re.sub(r"[^\w\s']", "", bad_sentence).replace("n't", " not")

    # tokenize the text
    good_sentence_tokenized = good_sentence_cleaned.split(" ")
    bad_sentence_tokenized = bad_sentence_cleaned.split(" ")

    # consider only pairs of sentences with the same number of words
    number_considered_words = min(len(good_sentence_tokenized), len(bad_sentence_tokenized))
    if len(good_sentence_tokenized) != len(bad_sentence_tokenized):
        dif_lengths += 1

    good_sentence_tokenized = good_sentence_tokenized[:number_considered_words]
    bad_sentence_tokenized = bad_sentence_tokenized[:number_considered_words]

    # get the main verb of the sentence
    main_verb = get_main_verb(good_sentence)

    # get the common part of the two sentences (until the first different word)
    same_tokens = np.array(good_sentence_tokenized) == np.array(bad_sentence_tokenized)
    index_first_diff_token = np.where(same_tokens == False)[0][0]

    common_sentence_tokenized = good_sentence_tokenized[:index_first_diff_token]
    common_sentence = " ".join(common_sentence_tokenized)

    # construct an array where 1s represent the position of the target word
    known_evidence = np.zeros(len(common_sentence_tokenized))
    evidence_index = np.where(np.array(common_sentence_tokenized) == main_verb)[0][0]
    known_evidence[evidence_index] = 1

    # get the correct and foil words
    correct_word = good_sentence_tokenized[index_first_diff_token]
    foil_word = bad_sentence_tokenized[index_first_diff_token]

    extracted_sentences_data.append([known_evidence, common_sentence, correct_word, foil_word])

print(f"The number of pairs with different number of words: {dif_lengths}")

100%|██████████| 1000/1000 [00:04<00:00, 219.52it/s]

The number of pairs with different number of words: 12





In [8]:
gpt2model = GPT2Model()

In [9]:
def match_evidence_with_tokenization(space_tokenization, gpt_tokenization, known_evidence):
    # this function adds 0s or 1s in the known evidence to match the gpt tokens

    space_tokenization_index = 0
    updated_known_evidence = []
    accumulated_string = gpt_tokenization[0]

    for gpt_tokenization_index, gpt_token in enumerate(gpt_tokenization[1:]):
        if accumulated_string == space_tokenization[space_tokenization_index]:
            updated_known_evidence.append(known_evidence[space_tokenization_index])
            gpt_tokenization_index += 1
            accumulated_string = gpt_tokenization[gpt_tokenization_index]
            space_tokenization_index += 1

        else:
            accumulated_string += gpt_token
            updated_known_evidence.append(known_evidence[space_tokenization_index])

    updated_known_evidence.append(known_evidence[space_tokenization_index])

    return updated_known_evidence

In [10]:
different_tokenization = 0
gradient_norm_dot_product = []
gradient_norm_probes_needed = []


for sentence_data in tqdm(extracted_sentences_data[:10]):
    known_evidence = sentence_data[0]
    sentence = sentence_data[1]
    correct_word = sentence_data[2]
    foil_word = sentence_data[3]

    saliency_map = gpt2model.get_contrastive_gradient_norm(sentence, correct_word, foil_word)
    extracted_words = [explanation[0].strip() for explanation in saliency_map]
    explanation = [explanation[1] for explanation in saliency_map]

    if sentence.split(" ") != extracted_words:
        known_evidence = match_evidence_with_tokenization(sentence.split(" "), extracted_words, known_evidence)

    updated_known_evidence = match_evidence_with_tokenization(sentence.split(" "), extracted_words, known_evidence)
    gradient_norm_dot_product.append(compute_mean_dot_product([explanation], [updated_known_evidence]))

  gradient_norm = torch.norm(torch.tensor(gradients[i]), p=1)
100%|██████████| 10/10 [00:01<00:00,  5.54it/s]


In [11]:
np.array(gradient_norm_dot_product).mean()

0.1572659759070616