### 0. Import libraries

In [5]:
# These two commands are needed
# !pip install transformers
# !pip install accelerate
!pip install spacy
# !python -m spacy download en
# !pip uninstall spacy -y
# !python -m pip install spacy==2.1.0.
# !pip install neuralcoref

Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Defaulting to user installation because normal site-packages is not writeable
Collecting pip
  Downloading pip-24.1.1-py3-none-any.whl.metadata (3.6 kB)
Downloading pip-24.1.1-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.0
    Uninstalling pip-24.0:
      Successfully uninstalled pip-24.0
[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source o

In [1]:
%load_ext autoreload
%autoreload 2

In [6]:
# import libraries
import re
import spacy
import torch
# import neuralcoref
import numpy as np
import transformers
import pandas as pd
from tqdm import tqdm
from functools import reduce

# import local scripts
from data_loader import DatasetLoader
from alignment_metrics import *
from gpt2_model import GPT2Model

# produce repeatable results
np.random.seed(seed=42)
transformers.set_seed(42)

# enable CUDNN deterministic mode
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

nlp = spacy.load("en_core_web_sm")

OSError: /usr/lib/x86_64-linux-gnu/libcuda.so.1: file too short

### 1. Load the data

Below are the names of the datasets used by the authors to check if contrastive explanations identify linguistically appropriate evidence.

In [None]:
anaphor_agreement_datasets = ["anaphor_gender_agreement", "anaphor_number_agreement"]

# not sure about the one below
argument_structure_datasets = ["animate_subject_passive"]

determiner_noun_agreement_datasets = [
    "determiner_noun_agreement_1",
    "determiner_noun_agreement_2",
    "determiner_noun_agreement_irregular_1",
    "determiner_noun_agreement_irregular_2",
    "determiner_noun_agreement_with_adj_2",
    "determiner_noun_agreement_with_adj_irregular_1",
    "determiner_noun_agreement_with_adj_irregular_2",
    "determiner_noun_agreement_with_adjective_1",
    "determiner_noun_agreement_with_adj_irregular_1",
    "determiner_noun_agreement_with_adj_irregular_2"
]

npi_licesing = [
    "npi_present_1",
    "npi_present_2"
]

subject_verb_agreement = [
    "distractor_agreement_relational_noun"
    "irregular_plural_subject_verb_agreement_1",
    "irregular_plural_subject_verb_agreement_2",
    "regular_plural_subject_verb_agreement_1",
    "regular_plural_subject_verb_agreement_2",
]

In [None]:
data_loader = DatasetLoader()
data = pd.DataFrame(data_loader.load_data(argument_structure_datasets[0])["train"])
data

### 2. Prepare anaphor agreement data

In [None]:
def get_main_verb(sentence):
    doc = nlp(sentence)
    main_verbs = []

    for token in doc:
        if token.pos_ == "VERB":
            main_verbs.append(token.text)

    assert len(main_verbs) == 1

    return main_verbs[0]

In [None]:
extracted_sentences_data = []
good_bad_sentences = data[["sentence_good", "sentence_bad"]].to_numpy()
dif_lengths = 0

for good_sentence, bad_sentence in tqdm(good_bad_sentences):
    # remove characters that are not letters or apostrophes and replace contractions
    good_sentence_cleaned = re.sub(r"[^\w\s']", "", good_sentence).replace("n't", " not")
    bad_sentence_cleaned = re.sub(r"[^\w\s']", "", bad_sentence).replace("n't", " not")

    # tokenize the text
    good_sentence_tokenized = good_sentence_cleaned.split(" ")
    bad_sentence_tokenized = bad_sentence_cleaned.split(" ")

    # consider only pairs of sentences with the same number of words
    number_considered_words = min(len(good_sentence_tokenized), len(bad_sentence_tokenized))
    if len(good_sentence_tokenized) != len(bad_sentence_tokenized):
        dif_lengths += 1

    good_sentence_tokenized = good_sentence_tokenized[:number_considered_words]
    bad_sentence_tokenized = bad_sentence_tokenized[:number_considered_words]

    # get the main verb of the sentence
    main_verb = get_main_verb(good_sentence)

    # get the common part of the two sentences (until the first different word)
    same_tokens = np.array(good_sentence_tokenized) == np.array(bad_sentence_tokenized)
    index_first_diff_token = np.where(same_tokens == False)[0][0]

    common_sentence_tokenized = good_sentence_tokenized[:index_first_diff_token]
    common_sentence = " ".join(common_sentence_tokenized)

    # construct an array where 1s represent the position of the target word
    known_evidence = np.zeros(len(common_sentence_tokenized))
    evidence_index = np.where(np.array(common_sentence_tokenized) == main_verb)[0][0]
    known_evidence[evidence_index] = 1

    # get the correct and foil words
    correct_word = good_sentence_tokenized[index_first_diff_token]
    foil_word = bad_sentence_tokenized[index_first_diff_token]

    extracted_sentences_data.append([known_evidence, common_sentence, correct_word, foil_word])

print(f"The number of pairs with different number of words: {dif_lengths}")

In [None]:
gpt2model = GPT2Model()

In [None]:
def match_evidence_with_tokenization(space_tokenization, gpt_tokenization, known_evidence):
    # this function adds 0s or 1s in the known evidence to match the gpt tokens

    space_tokenization_index = 0
    updated_known_evidence = []
    accumulated_string = gpt_tokenization[0]

    for gpt_tokenization_index, gpt_token in enumerate(gpt_tokenization[1:]):
        if accumulated_string == space_tokenization[space_tokenization_index]:
            updated_known_evidence.append(known_evidence[space_tokenization_index])
            gpt_tokenization_index += 1
            accumulated_string = gpt_tokenization[gpt_tokenization_index]
            space_tokenization_index += 1

        else:
            accumulated_string += gpt_token
            updated_known_evidence.append(known_evidence[space_tokenization_index])

    updated_known_evidence.append(known_evidence[space_tokenization_index])

    return updated_known_evidence

In [None]:
different_tokenization = 0
gradient_norm_dot_product = []
gradient_norm_probes_needed = []


for sentence_data in tqdm(extracted_sentences_data[:10]):
    known_evidence = sentence_data[0]
    sentence = sentence_data[1]
    correct_word = sentence_data[2]
    foil_word = sentence_data[3]

    saliency_map = gpt2model.get_contrastive_gradient_norm(sentence, correct_word, foil_word)
    extracted_words = [explanation[0].strip() for explanation in saliency_map]
    explanation = [explanation[1] for explanation in saliency_map]

    if sentence.split(" ") != extracted_words:
        known_evidence = match_evidence_with_tokenization(sentence.split(" "), extracted_words, known_evidence)

    updated_known_evidence = match_evidence_with_tokenization(sentence.split(" "), extracted_words, known_evidence)
    gradient_norm_dot_product.append(compute_mean_dot_product([explanation], [updated_known_evidence]))

In [None]:
np.array(gradient_norm_dot_product).mean()

In [None]:
data_loader = DatasetLoader()
data = data_loader.load_data(determiner_noun_agreement_datasets[0])

In [None]:
pd.DataFrame(data["train"])

In [None]:
def extract_determiner_from_target_noun(text, target_noun):
    doc = nlp(text)

    for token in doc:
        if token.text.lower() == target_noun.lower() and token.pos_ == "NOUN":
            for child in token.children:
                if child.dep_ == "det":
                    return child.text
    return None

In [None]:
text = "Those people sound like this art gallery."
target_noun = "gallery"

In [None]:
extract_determiner_from_target_noun(text, target_noun)

In [None]:
str(pd.DataFrame(data["train"]).sentence_good)

In [None]:
data_loader = DatasetLoader()
data = data_loader.load_data(npi_licesing[0])

In [None]:
pd.DataFrame(data["train"])['sentence_good'].apply(lambda x: str(x).startswith("Even")).value_counts()

In [None]:
# so for NPI the only underlined word will be "Even",
# which is the first one for all the examples in our dataset

In [None]:
data_loader = DatasetLoader()
data = data_loader.load_data(subject_verb_agreement[1])

In [None]:
pd.DataFrame(data["train"])

In [None]:
def extract_subject_from_target_verb(text, target_verb):
    doc = nlp(text)
    for token in doc:
        if token.text.lower() == target_verb.lower() and (token.pos_ == "VERB" or token.pos_ == "AUX"):
            for child in token.children:
                if child.dep_ in {"nsubj", "nsubjpass"}:
                    return child.text
    return None

In [None]:
def get_verb_from_sentence(text):
    doc = nlp(text)
    for token in doc:
        if token.text.lower() and token.pos_ == "VERB":
            return token.text
    return None

In [None]:
text = "The alumni hate all ladies."
extract_subject_from_target_verb(text, get_verb_from_sentence(text))

In [4]:
data_loader = DatasetLoader()
data = data_loader.load_data(anaphor_agreement_datasets[0])

NameError: name 'DatasetLoader' is not defined

In [5]:
pd.DataFrame(data["train"])

NameError: name 'pd' is not defined

In [6]:
def extract_reflexive_antecedent(text, target_reflexive):
    doc = nlp(text)

    target_token = None
    for token in doc:
        if token.text.lower() == target_reflexive.lower() and (token.dep_ == "pobj" or token.dep_ == "dobj") :  # Reflexive pronouns usually have 'pobj' dependency
            target_token = token
            break

    if not target_token:
        return None

    for token in target_token.head.lefts:
        if token.dep_ in {"nsubj", "nsubjpass"} and token.pos_ == "NOUN":
            return token.text
    
    return None

text = "Many teenagers were helping themselves."
target_reflexive = "themselves"
antecedent = extract_reflexive_antecedent(text, target_reflexive)

print(f"The antecedent for '{target_reflexive}' is: {antecedent}")

NameError: name 'nlp' is not defined