Dependencies...

In [None]:
pip install statsmodels

In [None]:
import torch as t
import pandas as pd
import numpy as np
from transformers import T5Tokenizer, T5ForConditionalGeneration, TrainingArguments, Trainer
import transformers, datasets, pickle, multiprocessing, peft, evaluate, py7zr, functools, accelerate
import statsmodels.api as sm
from statsmodels.formula.api import ols

Global GPU...

In [None]:
output_device = t.device('cpu')
model_run_device = t.device('cuda') if t.cuda.is_available() else t.device('cpu')

Tokenizer...

In [None]:
tokenizer = T5Tokenizer.from_pretrained("t5-base")

Functions for predicting and decoding. Split into parts due to memory limitations. Adjust parts as necessary. It shouldn't really matter how many you have as long as it isn't excessive. 

In [None]:
parts = 8

def prediction_by_parts(trainer, data, file_name):
    ni = int(800/parts)
    for n in range(parts):
        pred = trainer.model.generate(t.tensor(data["test"]["input_ids"][(ni * n):(ni*(n+1))]).to(model_run_device), 
                                      max_length=64, min_length=4, 
                                      length_penalty=1.2, num_beams=4, 
                                      early_stopping=True, 
                                      repetition_penalty=3.0, no_repeat_ngram_size=3)
        with open(f'./predictions/{file_name}_predictions_{n}.pickle', 'wb') as file:
            pickle.dump(pred, file)

def prediction_decoding_by_parts(file_name):
    for n in range(parts):
        with open(f'./predictions/{file_name}_predictions_{n}.pickle', 'rb') as file:
            pred = pickle.load(file)
        pred_summaries = tokenizer.batch_decode(pred, skip_special_tokens=True)
        with open(f'./readable-predictions/readable_{file_name}_predictions_{n}.pickle', 'wb') as file:
            pickle.dump(pred_summaries, file)
            
def combine_predictions(file_name):
    all_predictions = []
    for n in range(parts):
        with open(f'./readable-predictions/readable_{file_name}_predictions_{n}.pickle', 'rb') as file:
            part_predictions = pickle.load(file)
            all_predictions.extend(part_predictions)
    return all_predictions


Open the full token sets from preprocessing

In [None]:
with open(f'./preprocessing/cnn_tokens.pickle', 'rb') as file:
    cnn_tokens = pickle.load(file)
with open(f'./preprocessing/samsum_tokens.pickle', 'rb') as file:
    samsum_tokens = pickle.load(file)


Predict and save predictions for each trainer. 

In [None]:
with open(f'./models/base_trainer.pickle', 'rb') as file:
    base_trainer = pickle.load(file)
base_trainer.model.to(model_run_device)
prediction_by_parts(base_trainer, samsum_tokens, "base_none_to_samsum")
prediction_by_parts(base_trainer, cnn_tokens, "base_none_to_cnn")

In [None]:
with open(f'./models/cnn_trainer.pickle', 'rb') as file:
    cnn_trainer = pickle.load(file)
cnn_trainer.model.to(model_run_device)
prediction_by_parts(cnn_trainer, samsum_tokens, "lora_cnn_to_samsum")
prediction_by_parts(cnn_trainer, cnn_tokens, "lora_cnn_to_cnn")

In [None]:
with open(f'./models/samsum_trainer.pickle', 'rb') as file:
    samsum_trainer = pickle.load(file)
samsum_trainer.model.to(model_run_device)
prediction_by_parts(samsum_trainer, samsum_tokens, "lora_samsum_to_samsum")
prediction_by_parts(samsum_trainer, cnn_tokens, "lora_samsum_to_cnn")

In [None]:
with open(f'./models/mixed_trainer.pickle', 'rb') as file:
    mixed_trainer = pickle.load(file)
mixed_trainer.model.to(model_run_device)
prediction_by_parts(mixed_trainer, samsum_tokens, "lora_mixed_to_samsum")
prediction_by_parts(mixed_trainer, cnn_tokens, "lora_mixed_to_cnn")

In [None]:
with open(f'./models/cnn_FFT_trainer.pickle', 'rb') as file:
    cnn_FFT_trainer = pickle.load(file)
cnn_FFT_trainer.model.to(model_run_device)
prediction_by_parts(cnn_FFT_trainer, samsum_tokens, "FFT_cnn_to_samsum")
prediction_by_parts(cnn_FFT_trainer, cnn_tokens, "FFT_cnn_to_cnn")

In [None]:
with open(f'./models/samsum_FFT_trainer.pickle', 'rb') as file:
    samsum_FFT_trainer = pickle.load(file)
samsum_FFT_trainer.model.to(model_run_device)
prediction_by_parts(samsum_FFT_trainer, samsum_tokens, "FFT_samsum_to_samsum")
prediction_by_parts(samsum_FFT_trainer, cnn_tokens, "FFT_samsum_to_cnn")

In [None]:
with open(f'./models/mixed_FFT_trainer.pickle', 'rb') as file:
    mixed_FFT_trainer = pickle.load(file)
mixed_FFT_trainer.model.to(model_run_device)
prediction_by_parts(mixed_FFT_trainer, samsum_tokens, "FFT_mixed_to_samsum")
prediction_by_parts(mixed_FFT_trainer, cnn_tokens, "FFT_mixed_to_cnn")

Define a list of names which can be used to access predictions in the dictionaries they're be stored in

In [None]:
samsum_names = [
    "base_none_to_samsum",
    "FFT_cnn_to_samsum",
    "FFT_samsum_to_samsum",
    "FFT_mixed_to_samsum",
    "lora_cnn_to_samsum",
    "lora_samsum_to_samsum",
    "lora_mixed_to_samsum"
]

cnn_names = [
    "base_none_to_cnn",
    "FFT_cnn_to_cnn",
    "FFT_samsum_to_cnn",
    "FFT_mixed_to_cnn",
    "lora_cnn_to_cnn",
    "lora_samsum_to_cnn",
    "lora_mixed_to_cnn"
]


Decode the tokenized predictions into a readable format

In [None]:
for name in samsum_names: prediction_decoding_by_parts(name) 
for name in cnn_names: prediction_decoding_by_parts(name)

Reassemble the parts. Doubles as loading in case you already did all the steps before and have parts saved. 

In [None]:
samsum_preds = {
    name: combine_predictions(name) for name in samsum_names
}
cnn_preds = {
    name: combine_predictions(name) for name in cnn_names
}


Load natural language testing datasets. 

In [None]:
with open(f'./preprocessing/cnn_test.pickle', 'rb') as file:
    cnn_test = pickle.load(file)
with open(f'./preprocessing/samsum_test.pickle', 'rb') as file:
    samsum_test = pickle.load(file)

Compute rouge score for each trainer. 

In [None]:
rouge_metric = evaluate.load("rouge")

def compute_rouge_scores(predictions, references):
    return rouge_metric.compute(predictions=predictions, references=references)

rouge_scores_samsum = {
    name: compute_rouge_scores(predictions, samsum_test["highlights"]) for name, predictions in samsum_preds.items()
}

rouge_scores_cnn = {
    name: compute_rouge_scores(predictions, cnn_test["highlights"]) for name, predictions in cnn_preds.items()
}


Compute bert score for each trainer. Also compute average for table reasons. Raws used for anova later. 

In [None]:
bert_metric = evaluate.load("bertscore")

def calc_bert_average(bert_scores):
    bert_averages = {
        "precision": np.mean(bert_scores["precision"]),
        "recall": np.mean(bert_scores["recall"]),
        "f1": np.mean(bert_scores["f1"])
    }
    return bert_averages

bert_scores_samsum = {
    name: bert_metric.compute(predictions=predictions, references=samsum_test["highlights"], lang="en", model_type="t5-base") for name, predictions in samsum_preds.items()
}
bert_scores_cnn= {
    name: bert_metric.compute(predictions=predictions, references=cnn_test["highlights"], lang="en", model_type="t5-base") for name, predictions in cnn_preds.items()
}

bert_scores_average_samsum = {
    name: calc_bert_average(scores) for name, scores in bert_scores_samsum.items()
}
bert_scores_average_cnn = {
    name: calc_bert_average(scores) for name, scores in bert_scores_cnn.items()
}



Function for printing dictionaries prettily

In [None]:
def print_scores(scores_dict):
    for model_name, metrics in scores_dict.items():
        print(f"Model: {model_name}")
        for metric, value in metrics.items():
            print(f"  {metric.capitalize()}: {value:.4f}")
        print() 


Print the metric averages

In [None]:
print("== Rouge Scores CNN ==")
print_scores(rouge_scores_cnn)
print("== Rouge Scores SAMSum ==")
print_scores(rouge_scores_samsum)
print("--------------------------\n")
print("== BERT Scores CNN ==")
print_scores(bert_scores_average_cnn)
print("== BERT Scores SAMSum ==")
print_scores(bert_scores_average_samsum)


Set up the dataframe of Bert Precision in long data format

In [None]:
data_dict = { 
    'test_data': ['cnn']*(7*800) + ['samsum']*(7*800),
    'model_type': ['base']*800 + ['FFT']*(3*800) + ['lora']*(3*800) + ['base']*800 + ['FFT']*(3*800) + ['lora']*(3*800),
    'train_data': ['none']*800 + ["cnn"]*800 + ["samsum"] * 800 + ["mixed"] * 800 + ["cnn"]*800 + ["samsum"] * 800 + ["mixed"] * 800 + 
        ['none']*800 + ["cnn"]*800 + ["samsum"] * 800 + ["mixed"] * 800 + ["cnn"]*800 + ["samsum"] * 800 + ["mixed"] * 800,
    'precision': [precision for name in cnn_names for precision in bert_scores_cnn[name]["precision"]] + 
        [precision for name in samsum_names for precision in bert_scores_samsum[name]["precision"]]
}
bert_df = pd.DataFrame(data_dict)


In [None]:
Perform three-way ANOVA

In [None]:
model = ols("""precision ~ C(model_type) + C(train_data) + C(test_data) +
               C(model_type):C(train_data) + C(model_type):C(test_data) + C(train_data):C(test_data) +
               C(model_type):C(train_data):C(test_data)""", data=bert_df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
anova_table

Function for printing out test texts as well as corresponding label and predictions. 

In [None]:
def qualitative_analysis(index, data_set):
    if data_set == "cnn":
        print("Input: ", cnn_test["article"][index], "\n")
        print("Label: ", cnn_test["highlights"][index], "\n")
        print("Base Prediction: ", cnn_preds["base_none_to_cnn"][index], "\n")
        print("FFT CNN Prediction: ", cnn_preds["FFT_cnn_to_cnn"][index], "\n")
        print("FFT Samsum Prediction: ", cnn_preds["FFT_samsum_to_cnn"][index], "\n")
        print("FFT Mixed Prediction: ", cnn_preds["FFT_mixed_to_cnn"][index])
        print("LoRA CNN Prediction: ", cnn_preds["lora_cnn_to_cnn"][index], "\n")
        print("LoRA Samsum Prediction: ", cnn_preds["lora_samsum_to_cnn"][index], "\n")
        print("LoRA Mixed Prediction: ", cnn_preds["lora_mixed_to_cnn"][index])
    else:
        print("Input: ", samsum_test["article"][index], "\n")
        print("Label: ", samsum_test["highlights"][index], "\n")
        print("Base Prediction: ", samsum_preds["base_none_to_samsum"][index], "\n")
        print("FFT CNN Prediction: ", samsum_preds["FFT_cnn_to_samsum"][index], "\n")
        print("FFT Samsum Prediction: ", samsum_preds["FFT_samsum_to_samsum"][index], "\n")
        print("FFT Mixed Prediction: ", samsum_preds["FFT_mixed_to_samsum"][index])
        print("LoRA CNN Prediction: ", samsum_preds["lora_cnn_to_samsum"][index], "\n")
        print("LoRA Samsum Prediction: ", samsum_preds["lora_samsum_to_samsum"][index], "\n")
        print("LoRA Mixed Prediction: ", samsum_preds["lora_mixed_to_samsum"][index])


Call of the above function for convenience.

In [None]:
index = 0
qualitative_analysis(index, "samsum")

In [None]:
index = 0
qualitative_analysis(index, "cnn")