### 0. Import libraries

In [9]:
# These two commands are needed
# !pip install transformers
# !pip install accelerate
# !pip install spacy
# !python -m spacy download en
# !pip uninstall spacy -y
# !python -m pip install spacy==2.1.0.
# !pip install neuralcoref

In [2]:
%load_ext autoreload
%autoreload 2

In [10]:
# import libraries
import re
import spacy
import torch
# import neuralcoref
import numpy as np
import transformers
import pandas as pd
from tqdm import tqdm
from functools import reduce

# import local scripts
from data_loader import DatasetLoader
from alignment_metrics import *
from gpt2_model import GPT2Model

# produce repeatable results
np.random.seed(seed=42)
transformers.set_seed(42)

# enable CUDNN deterministic mode
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

nlp = spacy.load("en_core_web_sm")

  torch.utils._pytree._register_pytree_node(
  torch.utils._pytree._register_pytree_node(
2024-06-29 23:07:02.871429: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-06-29 23:07:03.698557: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
2024-06-29 23:07:03.698663: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directo

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/home/jovyan/.local/lib/python3.8/site-packages/spacy/util.py", line 78, in get_lang_class
    module = importlib.import_module(".lang.%s" % lang, "spacy")
  File "/usr/lib/python3.8/importlib/__init__.py", line 127, in import_module
    return _bootstrap._gcd_import(name[level:], package, level)
  File "<frozen importlib._bootstrap>", line 1014, in _gcd_import
  File "<frozen importlib._bootstrap>", line 991, in _find_and_load
  File "<frozen importlib._bootstrap>", line 975, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 671, in _load_unlocked
  File "<frozen importlib._bootstrap_external>", line 848, in exec_module
  File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
  File "/home/jovyan/.local/lib/python3.8/site-packages/spacy/lang/en/__init__.py", line 4, in <module>
    from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
  File "/home/jovyan/.local/lib/python3.8/site-packages/spacy/la

### 1. Load the data

Below are the names of the datasets used by the authors to check if contrastive explanations identify linguistically appropriate evidence.

In [5]:
anaphor_agreement_datasets = ["anaphor_gender_agreement", "anaphor_number_agreement"]

# not sure about the one below
argument_structure_datasets = ["animate_subject_passive"]

determiner_noun_agreement_datasets = [
    "determiner_noun_agreement_1",
    "determiner_noun_agreement_2",
    "determiner_noun_agreement_irregular_1",
    "determiner_noun_agreement_irregular_2",
    "determiner_noun_agreement_with_adj_2",
    "determiner_noun_agreement_with_adj_irregular_1",
    "determiner_noun_agreement_with_adj_irregular_2",
    "determiner_noun_agreement_with_adjective_1",
    "determiner_noun_agreement_with_adj_irregular_1",
    "determiner_noun_agreement_with_adj_irregular_2"
]

npi_licesing = [
    "npi_present_1",
    "npi_present_2"
]

subject_verb_agreement = [
    "distractor_agreement_relational_noun"
    "irregular_plural_subject_verb_agreement_1",
    "irregular_plural_subject_verb_agreement_2",
    "regular_plural_subject_verb_agreement_1",
    "regular_plural_subject_verb_agreement_2",
]

In [6]:
data_loader = DatasetLoader()
data = pd.DataFrame(data_loader.load_data(argument_structure_datasets[0])["train"])
data

Downloading data:   0%|          | 0.00/47.3k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1000 [00:00<?, ? examples/s]

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

The dataset is stored at ./data/


Unnamed: 0,sentence_good,sentence_bad,field,linguistics_term,UID,simple_LM_method,one_prefix_method,two_prefix_method,lexically_identical,pair_id
0,Amanda was respected by some waitresses.,Amanda was respected by some picture.,syntax,s-selection,animate_subject_passive,True,True,False,False,0
1,Some lake was passed by some cashiers.,Some lake was passed by some phenomena.,syntax,s-selection,animate_subject_passive,True,True,False,False,1
2,Lisa was kissed by the boys.,Lisa was kissed by the blouses.,syntax,s-selection,animate_subject_passive,True,True,False,False,2
3,Amanda isn't respected by the children.,Amanda isn't respected by the cups.,syntax,s-selection,animate_subject_passive,True,True,False,False,3
4,The glove was noticed by some woman.,The glove was noticed by some mouse.,syntax,s-selection,animate_subject_passive,True,True,False,False,4
...,...,...,...,...,...,...,...,...,...,...
995,This pork was taken by some guy.,This pork was taken by some fish.,syntax,s-selection,animate_subject_passive,True,True,False,False,995
996,Newspaper articles weren't scanned by the men.,Newspaper articles weren't scanned by the phot...,syntax,s-selection,animate_subject_passive,True,True,False,False,996
997,Liam is talked about by the dancers.,Liam is talked about by the university.,syntax,s-selection,animate_subject_passive,True,True,False,False,997
998,Monet is sounded like by the man.,Monet is sounded like by the bread.,syntax,s-selection,animate_subject_passive,True,True,False,False,998


### 2. Prepare anaphor agreement data

In [7]:
def get_main_verb(sentence):
    doc = nlp(sentence)
    main_verbs = []

    for token in doc:
        if token.pos_ == "VERB":
            main_verbs.append(token.text)

    assert len(main_verbs) == 1

    return main_verbs[0]

In [8]:
extracted_sentences_data = []
good_bad_sentences = data[["sentence_good", "sentence_bad"]].to_numpy()
dif_lengths = 0

for good_sentence, bad_sentence in tqdm(good_bad_sentences):
    # remove characters that are not letters or apostrophes and replace contractions
    good_sentence_cleaned = re.sub(r"[^\w\s']", "", good_sentence).replace("n't", " not")
    bad_sentence_cleaned = re.sub(r"[^\w\s']", "", bad_sentence).replace("n't", " not")

    # tokenize the text
    good_sentence_tokenized = good_sentence_cleaned.split(" ")
    bad_sentence_tokenized = bad_sentence_cleaned.split(" ")

    # consider only pairs of sentences with the same number of words
    number_considered_words = min(len(good_sentence_tokenized), len(bad_sentence_tokenized))
    if len(good_sentence_tokenized) != len(bad_sentence_tokenized):
        dif_lengths += 1

    good_sentence_tokenized = good_sentence_tokenized[:number_considered_words]
    bad_sentence_tokenized = bad_sentence_tokenized[:number_considered_words]

    # get the main verb of the sentence
    main_verb = get_main_verb(good_sentence)

    # get the common part of the two sentences (until the first different word)
    same_tokens = np.array(good_sentence_tokenized) == np.array(bad_sentence_tokenized)
    index_first_diff_token = np.where(same_tokens == False)[0][0]

    common_sentence_tokenized = good_sentence_tokenized[:index_first_diff_token]
    common_sentence = " ".join(common_sentence_tokenized)

    # construct an array where 1s represent the position of the target word
    known_evidence = np.zeros(len(common_sentence_tokenized))
    evidence_index = np.where(np.array(common_sentence_tokenized) == main_verb)[0][0]
    known_evidence[evidence_index] = 1

    # get the correct and foil words
    correct_word = good_sentence_tokenized[index_first_diff_token]
    foil_word = bad_sentence_tokenized[index_first_diff_token]

    extracted_sentences_data.append([known_evidence, common_sentence, correct_word, foil_word])

print(f"The number of pairs with different number of words: {dif_lengths}")

100%|██████████| 1000/1000 [00:03<00:00, 269.30it/s]

The number of pairs with different number of words: 12





In [9]:
gpt2model = GPT2Model()



In [10]:
def match_evidence_with_tokenization(space_tokenization, gpt_tokenization, known_evidence):
    # this function adds 0s or 1s in the known evidence to match the gpt tokens

    space_tokenization_index = 0
    updated_known_evidence = []
    accumulated_string = gpt_tokenization[0]

    for gpt_tokenization_index, gpt_token in enumerate(gpt_tokenization[1:]):
        if accumulated_string == space_tokenization[space_tokenization_index]:
            updated_known_evidence.append(known_evidence[space_tokenization_index])
            gpt_tokenization_index += 1
            accumulated_string = gpt_tokenization[gpt_tokenization_index]
            space_tokenization_index += 1

        else:
            accumulated_string += gpt_token
            updated_known_evidence.append(known_evidence[space_tokenization_index])

    updated_known_evidence.append(known_evidence[space_tokenization_index])

    return updated_known_evidence

In [11]:
different_tokenization = 0
gradient_norm_dot_product = []
gradient_norm_probes_needed = []


for sentence_data in tqdm(extracted_sentences_data[:10]):
    known_evidence = sentence_data[0]
    sentence = sentence_data[1]
    correct_word = sentence_data[2]
    foil_word = sentence_data[3]

    saliency_map = gpt2model.get_contrastive_gradient_norm(sentence, correct_word, foil_word)
    extracted_words = [explanation[0].strip() for explanation in saliency_map]
    explanation = [explanation[1] for explanation in saliency_map]

    if sentence.split(" ") != extracted_words:
        known_evidence = match_evidence_with_tokenization(sentence.split(" "), extracted_words, known_evidence)

    updated_known_evidence = match_evidence_with_tokenization(sentence.split(" "), extracted_words, known_evidence)
    gradient_norm_dot_product.append(compute_mean_dot_product([explanation], [updated_known_evidence]))

  gradient_norm = torch.norm(torch.tensor(gradients[i]), p=1)
100%|██████████| 10/10 [00:03<00:00,  3.22it/s]


In [12]:
np.array(gradient_norm_dot_product).mean()

0.1572659960258419

In [13]:
data_loader = DatasetLoader()
data = data_loader.load_data(determiner_noun_agreement_datasets[0])

Generating train split: 0 examples [00:00, ? examples/s]

In [14]:
pd.DataFrame(data["train"])

Unnamed: 0,sentence_good,sentence_bad,field,linguistics_term,UID,simple_LM_method,one_prefix_method,two_prefix_method,lexically_identical,pair_id
0,Raymond is selling this sketch.,Raymond is selling this sketches.,morphology,determiner_noun_agreement,determiner_noun_agreement_1,True,True,False,True,0
1,Craig explored that grocery store.,Craig explored that grocery stores.,morphology,determiner_noun_agreement,determiner_noun_agreement_1,True,True,False,True,1
2,Eva has scared these children.,Eva has scared these child.,morphology,determiner_noun_agreement,determiner_noun_agreement_1,True,True,False,True,2
3,Marcus would conceal that pamphlet.,Marcus would conceal that pamphlets.,morphology,determiner_noun_agreement,determiner_noun_agreement_1,True,True,False,True,3
4,Carmen hadn't shocked these customers.,Carmen hadn't shocked these customer.,morphology,determiner_noun_agreement,determiner_noun_agreement_1,True,True,False,True,4
...,...,...,...,...,...,...,...,...,...,...
995,Most gloves disgusted this actress.,Most gloves disgusted this actresses.,morphology,determiner_noun_agreement,determiner_noun_agreement_1,True,True,False,True,995
996,Some pasta had disturbed these museums.,Some pasta had disturbed these museum.,morphology,determiner_noun_agreement,determiner_noun_agreement_1,True,True,False,True,996
997,Those people sound like this art gallery.,Those people sound like this art galleries.,morphology,determiner_noun_agreement,determiner_noun_agreement_1,True,True,False,True,997
998,Many snakes haven't broken these chairs.,Many snakes haven't broken these chair.,morphology,determiner_noun_agreement,determiner_noun_agreement_1,True,True,False,True,998


In [53]:
def extract_determiner_from_target_noun(text, target_noun):
    doc = nlp(text)

    for token in doc:
        if token.text.lower() == target_noun.lower() and token.pos_ == "NOUN":
            for child in token.children:
                if child.dep_ == "det":
                    return child.text
    return None

In [54]:
text = "Those people sound like this art gallery."
target_noun = "gallery"

In [55]:
extract_determiner_from_target_noun(text, target_noun)

'this'

In [60]:
str(pd.DataFrame(data["train"]).sentence_good)

0                          The women meet.
1         The child isn't attacking Becky.
2      The woman cleans every public park.
3      The man works with some pedestrian.
4         The men have revealed Christina.
                      ...                 
995                    The children smile.
996                   The child scratches.
997             The child discovers Steve.
998            The alumni hate all ladies.
999             The woman hasn't departed.
Name: sentence_good, Length: 1000, dtype: object

In [36]:
data_loader = DatasetLoader()
data = data_loader.load_data(npi_licesing[0])

Generating train split: 0 examples [00:00, ? examples/s]

In [46]:
pd.DataFrame(data["train"])['sentence_good'].apply(lambda x: str(x).startswith("Even")).value_counts()

sentence_good
True    1000
Name: count, dtype: int64

In [47]:
# so for NPI the only underlined word will be "Even",
# which is the first one for all the examples in our dataset

In [51]:
data_loader = DatasetLoader()
data = data_loader.load_data(subject_verb_agreement[1])

Downloading data:   0%|          | 0.00/42.7k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1000 [00:00<?, ? examples/s]

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

The dataset is stored at ./data/


In [52]:
pd.DataFrame(data["train"])

Unnamed: 0,sentence_good,sentence_bad,field,linguistics_term,UID,simple_LM_method,one_prefix_method,two_prefix_method,lexically_identical,pair_id
0,The women meet.,The woman meet.,morphology,subject_verb_agreement,irregular_plural_subject_verb_agreement_2,True,False,True,False,0
1,The child isn't attacking Becky.,The children isn't attacking Becky.,morphology,subject_verb_agreement,irregular_plural_subject_verb_agreement_2,True,False,True,False,1
2,The woman cleans every public park.,The women cleans every public park.,morphology,subject_verb_agreement,irregular_plural_subject_verb_agreement_2,True,False,True,False,2
3,The man works with some pedestrian.,The men works with some pedestrian.,morphology,subject_verb_agreement,irregular_plural_subject_verb_agreement_2,True,False,True,False,3
4,The men have revealed Christina.,The man have revealed Christina.,morphology,subject_verb_agreement,irregular_plural_subject_verb_agreement_2,True,False,True,False,4
...,...,...,...,...,...,...,...,...,...,...
995,The children smile.,The child smile.,morphology,subject_verb_agreement,irregular_plural_subject_verb_agreement_2,True,False,True,False,995
996,The child scratches.,The children scratches.,morphology,subject_verb_agreement,irregular_plural_subject_verb_agreement_2,True,False,True,False,996
997,The child discovers Steve.,The children discovers Steve.,morphology,subject_verb_agreement,irregular_plural_subject_verb_agreement_2,True,False,True,False,997
998,The alumni hate all ladies.,The alumnus hate all ladies.,morphology,subject_verb_agreement,irregular_plural_subject_verb_agreement_2,True,False,True,False,998


In [112]:
def extract_subject_from_target_verb(text, target_verb):
    doc = nlp(text)
    for token in doc:
        if token.text.lower() == target_verb.lower() and (token.pos_ == "VERB" or token.pos_ == "AUX"):
            for child in token.children:
                if child.dep_ in {"nsubj", "nsubjpass"}:
                    return child.text
    return None

In [113]:
def get_verb_from_sentence(text):
    doc = nlp(text)
    for token in doc:
        if token.text.lower() and token.pos_ == "VERB":
            return token.text
    return None

In [116]:
text = "The alumni hate all ladies."
extract_subject_from_target_verb(text, get_verb_from_sentence(text))

'alumni'

In [118]:
data_loader = DatasetLoader()
data = data_loader.load_data(anaphor_agreement_datasets[0])

Generating train split: 0 examples [00:00, ? examples/s]

In [119]:
pd.DataFrame(data["train"])

Unnamed: 0,sentence_good,sentence_bad,field,linguistics_term,UID,simple_LM_method,one_prefix_method,two_prefix_method,lexically_identical,pair_id
0,Katherine can't help herself.,Katherine can't help himself.,morphology,anaphor_agreement,anaphor_gender_agreement,True,True,False,False,0
1,Karla could listen to herself.,Karla could listen to himself.,morphology,anaphor_agreement,anaphor_gender_agreement,True,True,False,False,1
2,Marie won't think about herself.,Marie won't think about itself.,morphology,anaphor_agreement,anaphor_gender_agreement,True,True,False,False,2
3,Mark hasn't discussed himself.,Mark hasn't discussed itself.,morphology,anaphor_agreement,anaphor_gender_agreement,True,True,False,False,3
4,Stephen impressed himself.,Stephen impressed itself.,morphology,anaphor_agreement,anaphor_gender_agreement,True,True,False,False,4
...,...,...,...,...,...,...,...,...,...,...
995,Carlos complained about himself.,Carlos complained about itself.,morphology,anaphor_agreement,anaphor_gender_agreement,True,True,False,False,995
996,Benjamin is firing himself.,Benjamin is firing itself.,morphology,anaphor_agreement,anaphor_gender_agreement,True,True,False,False,996
997,Chad kisses himself.,Chad kisses herself.,morphology,anaphor_agreement,anaphor_gender_agreement,True,True,False,False,997
998,Paula was arguing about herself.,Paula was arguing about himself.,morphology,anaphor_agreement,anaphor_gender_agreement,True,True,False,False,998


In [12]:
# neuralcoref.add_to_pipe(nlp)


def extract_coreferents(text, target_token_text):
    doc = nlp(text)

    target_token = None
    for token in doc:
        if token.text.lower() == target_token_text.lower():
            target_token = token
            break

    if not target_token:
        return []

    coref_clusters = doc._.coref_clusters

    coreferents = set()

    for cluster in coref_clusters:
        if target_token in cluster:
            for mention in cluster.mentions:
                if mention != target_token:
                    coreferents.update([token.text for token in mention])

    return list(coreferents)