<a href="https://colab.research.google.com/github/DreRnc/ExplainingExplanations/blob/ModData/Base.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Dataset : **E-SNLI**. \
Model : **Base T5**.

In [None]:
%load_ext autoreload
%autoreload 2
colab = False

In [None]:
if colab:
    !git clone https://github.com/DreRnc/ExplainingExplanations.git
    %cd ExplainingExplanations
    !git checkout seq2seq
    %pip install -r requirements_colab.txt
    

# 1.0 Preparation


Set parameters for the experiments.

In [None]:
MODEL = 't5-small'
    
sizes = {
    'n_train' : 500000,
    'n_val' : 9842,
    'n_test' : 9824
}

# Whether to use the mnli prompt on which the model is pretrained or not
USE_MNLI_PROMPT = False
EXPLANATION_FIRST = False
NUM_EPOCHS = 9

## 1.1 Loading Tokenizer

In [None]:
from transformers import T5Tokenizer

tokenizer = T5Tokenizer.from_pretrained(MODEL, truncation=True, padding=True)

## 1.2 Loading and Tokenizing Dataset

In [None]:
from datasets import load_dataset
from src.preprocess import prepare_dataset
from functools import partial
from src.utils import tokenize_function

In [None]:
dataset = load_dataset("esnli", download_mode="force_redownload")

In [None]:
tokenize_mapping = partial(tokenize_function, tokenizer=tokenizer, use_mnli_format = USE_MNLI_PROMPT)

In [None]:
train_tok, valid_tok, test_tok = prepare_dataset(dataset, tokenize_mapping=tokenize_mapping, sizes = sizes)

## 1.3 Loading SBERT for evaluating sentence similarity

In [None]:
from sentence_transformers import SentenceTransformer

In [None]:
sbert = SentenceTransformer('all-MiniLM-L6-v2')

# 2.0 Tasks

In [None]:
import torch
from functools import partial
import evaluate
from src.utils import compute_metrics, eval_pred_transform_accuracy
from transformers import Seq2SeqTrainingArguments, Seq2SeqTrainer, T5ForConditionalGeneration, DataCollatorForSeq2Seq


In [None]:
import os
if not os.path.exists("results.txt"):
      with open("results.txt", 'w') as file:
           file.write("Model :" + MODEL + '\n')
else:
      with open("results.txt", 'a') as file:
           file.write("Model :" + MODEL + '\n')

In [None]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
    
device

In [None]:
transform_accuracy = partial(eval_pred_transform_accuracy, tokenizer = tokenizer)
compute_accuracy = partial(compute_metrics, pred_transforms=transform_accuracy, metrics = evaluate.load('accuracy'))

In [None]:
standard_args = {
    "save_strategy" : "steps",
    "save_steps" : 4688,

    "save_total_limit" : 2,
    "load_best_model_at_end" : True,
    "metric_for_best_model" : "accuracy",
    "greater_is_better" : True,

    "evaluation_strategy" : "steps",
    "eval_steps" : 4688,
    
    "predict_with_generate" : True,
    "per_device_train_batch_size" : 16,
    "per_device_eval_batch_size" : 16,
}

## 2.1 Task 1: Zero-shot evaluation

In [None]:
directory_1 = 'task1_' + MODEL 

In [None]:
with open("results.txt", 'a') as file:
    file.write('\n' + "Task 1 : Zero-shot" + '\n')

In [None]:
model = T5ForConditionalGeneration.from_pretrained(MODEL)
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

In [None]:
training_args = Seq2SeqTrainingArguments(
    **standard_args,
    output_dir=directory_1,
    generation_max_length=32
)

In [None]:
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=train_tok,
    eval_dataset=valid_tok,
    compute_metrics=compute_accuracy,
    data_collator=data_collator,
    tokenizer=tokenizer,
)

In [None]:
trainer.evaluate(test_tok)

## 2.2 Task 2: Fine tuning without explanations

In [None]:
directory_2 = 'task2_' + MODEL 

In [None]:
with open("results.txt", 'a') as file:
    file.write('\n' + "Task 2 : Fine-tune without explanations" + '\n')

In [None]:
model_ft = T5ForConditionalGeneration.from_pretrained(MODEL)
data_collator_ft = DataCollatorForSeq2Seq(tokenizer, model=model_ft)

In [None]:
training_args_ft = Seq2SeqTrainingArguments(
    **standard_args,
    num_train_epochs = NUM_EPOCHS,
    output_dir=directory_2,
    generation_max_length=32,
)

In [None]:
trainer_ft = Seq2SeqTrainer(
    model=model_ft,
    args=training_args_ft,
    train_dataset=train_tok,
    eval_dataset=valid_tok,
    compute_metrics=compute_accuracy,
    data_collator=data_collator_ft,
    tokenizer=tokenizer,
)

In [None]:
trainer_ft.train(resume_from_checkpoint=True)

In [None]:
best_model_dir = directory_2 + '/best_model'
trainer_ft.save_model(best_model_dir)

In [None]:
with open("results.txt", 'a') as file:
    file.write("Test: " + '\n')

In [None]:
trainer_ft.evaluate(test_tok)

## 2.3 Task 3: Fine Tuning with Explanations

In [None]:
if EXPLANATION_FIRST:
    directory_3 = "task3b_" + MODEL
else:
    directory_3 = "task3_" + MODEL

We need to give as labels the label and the explanation tokenized.

In [None]:
with open("results.txt", 'a') as file:
    file.write('\n' + "Task 3 : Fine-tune with explanations" + '\n')

### Preparing the dataset with labelled explanations

In [None]:
from src.utils import tokenize_function_ex

In [None]:
dataset_explanations = load_dataset("esnli", download_mode="force_redownload")

In [None]:
tokenize_mapping_ex = partial(tokenize_function_ex, tokenizer=tokenizer, use_mnli_format = USE_MNLI_PROMPT, explanation_first = EXPLANATION_FIRST)

In [None]:
train_tok_ex, valid_tok_ex, test_tok_ex = prepare_dataset(dataset=dataset_explanations, tokenize_mapping=tokenize_mapping_ex, sizes=sizes)

In [None]:
train_tok_ex.features

### Defining the metrics: accuracy / similarity of explanations

In [None]:
from src.utils import eval_pred_transform_sbert
from src.sbert_metric import SbertMetric

In [None]:
transform_accuracy_ex = partial(eval_pred_transform_accuracy, tokenizer = tokenizer, remove_explanations_from_label = True, explanation_first = EXPLANATION_FIRST)
accuracy = evaluate.load('accuracy')

In [None]:
transform_sbert = partial(eval_pred_transform_sbert, tokenizer = tokenizer, explanation_first = EXPLANATION_FIRST)
sbert_similarity = SbertMetric(sbert)

In [None]:
transforms = [transform_accuracy_ex, transform_sbert]
metrics = [accuracy, sbert_similarity]

compute_metrics_ex = partial(compute_metrics, pred_transforms=transforms, metrics=metrics)

### Fine Tuning

In [None]:
model_ft_ex = T5ForConditionalGeneration.from_pretrained(MODEL)
data_collator_ft_ex = DataCollatorForSeq2Seq(tokenizer, model=model_ft_ex)

In [None]:
training_args_ft_ex = Seq2SeqTrainingArguments(
    **standard_args,
    num_train_epochs = NUM_EPOCHS,
    output_dir= directory_3,
    generation_max_length=128
)

In [None]:
trainer_ft_ex = Seq2SeqTrainer(
    model=model_ft_ex,
    args=training_args_ft_ex,
    train_dataset=train_tok_ex,
    eval_dataset=valid_tok_ex,
    compute_metrics=compute_metrics_ex,
    data_collator=data_collator_ft_ex,
    tokenizer=tokenizer,
)

In [None]:
trainer_ft_ex.train(resume_from_checkpoint=True)

In [None]:
best_model_dir = directory_3 + '/best_model'
trainer_ft_ex.save_model(best_model_dir)

In [None]:
with open("results.txt", 'a') as file:
    file.write("Test: " + '\n')

In [None]:
trainer_ft_ex.evaluate(test_tok_ex)

## 2.4 Task 4: Fine Tuning with Shuffled Explanations

In [None]:
if EXPLANATION_FIRST:
    directory_4 = "task4b_" + MODEL
else:
    directory_4 = "task4_" + MODEL

In [None]:
with open("results.txt", 'a') as file:
    file.write('\n' + "Task 4 : Fine-tune with shuffled explanations" + '\n')

### Preparing the dataset with *wrong* labelled explanations

In [None]:
dataset_shex = load_dataset("esnli", download_mode="force_redownload")

In [None]:
from src.preprocess import save_explanations, save_shuffled_explanations, retrieve_explanations

In [None]:
dirs = save_explanations(dataset_shex)

In [None]:
dir_train_shuffled = save_shuffled_explanations(dirs[0])

In [None]:
shuffled_explanations_train = retrieve_explanations(dir_train_shuffled)

In [None]:
from src.utils import tokenize_function_ex

tokenize_mapping_train = partial(tokenize_function_ex, tokenizer=tokenizer, explanations = shuffled_explanations_train, use_mnli_format = USE_MNLI_PROMPT, explanation_first = EXPLANATION_FIRST)
tokenize_mapping_val = partial(tokenize_function_ex, tokenizer=tokenizer, use_mnli_format = USE_MNLI_PROMPT, explanation_first = EXPLANATION_FIRST)
tokenize_mapping_test = partial(tokenize_function_ex, tokenizer=tokenizer, use_mnli_format = USE_MNLI_PROMPT, explanation_first = EXPLANATION_FIRST)

tokenize_mappings = (tokenize_mapping_train, tokenize_mapping_val, tokenize_mapping_test)

In [None]:
train_tok_shex, valid_tok_shex, test_tok_shex = prepare_dataset(dataset, tokenize_mapping=tokenize_mappings, sizes=sizes)

In [None]:
train_tok_shex = train_tok_shex.remove_columns(["explanation_1", "explanation_2", "explanation_3"])
valid_tok_shex = valid_tok_shex.remove_columns(["explanation_1", "explanation_2", "explanation_3"])
test_tok_shex = test_tok_shex.remove_columns(["explanation_1", "explanation_2", "explanation_3"])

### Fine Tuning

In [None]:
model_ft_shex = T5ForConditionalGeneration.from_pretrained(MODEL)
data_collator_ft_shex = DataCollatorForSeq2Seq(tokenizer, model=model_ft_shex)

In [None]:
training_args_ft_shex = Seq2SeqTrainingArguments(
    **standard_args,
    num_train_epochs=NUM_EPOCHS,
    output_dir=directory_4,
    generation_max_length=128,
)

In [None]:
trainer_ft_shex = Seq2SeqTrainer(
    model=model_ft_shex,
    args=training_args_ft_shex,
    train_dataset=train_tok_shex,
    eval_dataset=valid_tok_shex,
    compute_metrics=compute_metrics_ex,
    data_collator=data_collator_ft_shex,
    tokenizer=tokenizer,
)

In [None]:
trainer_ft_shex.train(resume_from_checkpoint=True)

In [None]:
best_model_dir = directory_4 + '/best_model'
trainer_ft_shex.save_model(best_model_dir)

In [None]:
with open("results.txt", 'a') as file:
    file.write("Test: " + '\n')

In [None]:
trainer_ft_shex.evaluate(test_tok_shex)

## 2.5 Task 5: Profiling-UD

In [None]:
if EXPLANATION_FIRST:
    directory_5 = "task5b_" + MODEL
else:
    directory_5 = "task5_" + MODEL

### Read the results of the automatic annotation stage performed over explanations with Profilind-UD.

1. **Token ID**: The token's position in the sentence.
2. **Token**: The actual token text.
3. **Lemma**: The lemma or base form of the token.
4. Universal part-of-speech tag.
5. Language-specific part-of-speech tag (optional).
6. Miscellaneous (misc) field, which can contain additional annotations.
7. Head: The ID of the token's syntactic head.
8. Dependency relation: The type of syntactic relation between the token and its head.
9. Secondary dependencies or additional annotations.

In [None]:
from src.profiling import distill_explanations

train_file_path = "ex_files/explanations_train.conllu"
val_file_path = "ex_files/explanations_val.conllu"
test_file_path = "ex_files/explanations_test.conllu"

train_outfile = "ex_files/explanations_task5_train.txt"
val_outfile = "ex_files/explanations_task5_val.txt"
test_outfile = "ex_files/explanations_task5_test.txt"

distill_explanations(train_file_path, ["NOUN", "VERB"], train_outfile)
distill_explanations(val_file_path, ["NOUN", "VERB"], val_outfile)
distill_explanations(test_file_path, ["NOUN", "VERB"], test_outfile)


### Prepare the dataset with modified explanations

In [None]:
from src.preprocess import retrieve_explanations

In [None]:
modified_explanations = {
    'train': retrieve_explanations(train_outfile),  
    'validation': retrieve_explanations(val_outfile),
    'test': retrieve_explanations(test_outfile)
}

In [None]:
tokenize_mapping_train = partial(tokenize_function_ex, tokenizer=tokenizer, explanations = modified_explanations['train'], use_mnli_format = USE_MNLI_PROMPT, explanation_first = EXPLANATION_FIRST)
tokenize_mapping_val = partial(tokenize_function_ex, tokenizer=tokenizer, explanations = modified_explanations['validation'], use_mnli_format = USE_MNLI_PROMPT, explanation_first = EXPLANATION_FIRST)
tokenize_mapping_test = partial(tokenize_function_ex, tokenizer=tokenizer, explanations = modified_explanations['test'],use_mnli_format = USE_MNLI_PROMPT, explanation_first = EXPLANATION_FIRST)

tokenize_mappings = (tokenize_mapping_train, tokenize_mapping_val, tokenize_mapping_test)

train_tok_5, valid_tok_5, test_tok_5 = prepare_dataset(dataset, tokenize_mapping=tokenize_mappings, sizes=sizes)

In [None]:
train_tok_5 = train_tok_5.remove_columns(["explanation_1", "explanation_2", "explanation_3"])
valid_tok_5 = valid_tok_5.remove_columns(["explanation_1", "explanation_2", "explanation_3"])
test_tok_5 = test_tok_5.remove_columns(["explanation_1", "explanation_2", "explanation_3"])

### Fine-tuning

In [None]:
with open("results.txt", 'a') as file:
    file.write('\n' + "Task 5 : Fine-tune with only names and verbs in explanations" + '\n')

In [None]:
model_ft_5 = T5ForConditionalGeneration.from_pretrained(MODEL)
data_collator_ft_5 = DataCollatorForSeq2Seq(tokenizer, model=model_ft_5)

In [None]:
training_args_ft_5 = Seq2SeqTrainingArguments(
    **standard_args,
    num_train_epochs=NUM_EPOCHS,
    output_dir=directory_5,
    generation_max_length=32,
)

In [None]:
trainer_ft_5 = Seq2SeqTrainer(
    model=model_ft_5,
    args=training_args_ft_5,
    train_dataset=train_tok_5,
    eval_dataset=valid_tok_5,
    compute_metrics=compute_metrics_ex,
    data_collator=data_collator_ft_5,
    tokenizer=tokenizer,
)

In [None]:
trainer_ft_5.train(resume_from_checkpoint=True)

In [None]:
best_model_dir = directory_5 + '/best_model'
trainer_ft_5.save_model(best_model_dir)

In [None]:
with open("results.txt", 'a') as file:
    file.write("Test: " + '\n')

trainer_ft_5.evaluate(test_tok_5)