In [21]:
from transformers import (
    AutoModelForSeq2SeqLM,
    AutoTokenizer,
    DataCollatorForSeq2Seq,
    Seq2SeqTrainer,
    Seq2SeqTrainingArguments
)
from evaluate import evaluator
import evaluate
from peft import LoraConfig, get_peft_model, PeftConfig
from datasets import DatasetDict, load_dataset
import numpy as np
from init_parameters import init_parameters
from data import split_data, set_seed, k_split
import torch
import random

In [37]:
data_name = 'glue'
task = 'mnli'
seed = 42
num_clients = 10
num_error_clients = 2
model_name_or_path = 'google/flan-t5-base'
metric = evaluate.load("sacrebleu")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
set_seed(seed)
dataset = load_dataset("JsSparkYyx/NLP524", task).shuffle(seed=seed)

In [47]:
train_ds = k_split(num_clients,num_error_clients,dataset['train'])
test_ds = k_split(num_clients,num_error_clients,dataset['test'])
valid_ds = k_split(num_clients,num_error_clients,dataset['valid'])
i = 1
dataset = DatasetDict({'train':train_ds[i],'test':test_ds[i],'valid':valid_ds[i]})

In [45]:
def tokenize_function(examples):
    # max_length=None => use the model max length (it's actually the default)
    model_inputs = tokenizer(examples['source'], truncation=True, max_length=None)
    model_inputs['labels'] = tokenizer(examples['target'], truncation=True, max_length=None)["input_ids"]
    return model_inputs
tokenized_datasets = dataset.map(tokenize_function, batched=True, remove_columns=dataset['train'].column_names)

Map:   0%|          | 0/39271 [00:00<?, ? examples/s]

In [41]:
model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path, return_dict=True)
config = PeftConfig.from_pretrained(r'D:\Code\WashU\NLP524Final\flan-t5-base-finetuned-lora-mnli-0\checkpoint-1228')
model_name = model_name_or_path.split("/")[-1]
model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path, return_dict=True)
lora_model = get_peft_model(model, config)

In [54]:


def accuracy_score(outputs, ground_truths):
    correct = 0
    total = 0
    for output, truth in zip(outputs, ground_truths):
        if output.strip().lower().replace(".", "") == truth.strip().lower().replace(".", ""):
            correct += 1
        total += 1
    return correct / total * 100

example_predictions = []
example_predictions_lora = []
# load model

# use gpu if available
batch_size = 64
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)

for i in range(0, len(dataset["test"]["source"]), batch_size):
    inputs = tokenizer(
            dataset["test"]["source"][i : i + batch_size],
            max_length=2048,
            return_tensors="pt",
            padding=True,
        ).to(device)
    outputs = model.generate(
        input_ids=inputs["input_ids"], max_new_tokens=256
    )
    outputs = tokenizer.batch_decode(
        outputs.to("cpu"), skip_special_tokens=True
    )
    example_predictions.extend(outputs)
    outputs = lora_model.generate(
        input_ids=inputs["input_ids"], max_new_tokens=256
    )
    outputs = tokenizer.batch_decode(
        outputs.to("cpu"), skip_special_tokens=True
    )
    example_predictions_lora.extend(outputs)

task_perf = accuracy_score(example_predictions, dataset["test"]["target"])
task_perf_lora = accuracy_score(example_predictions_lora, dataset["test"]["target"])



In [55]:
task_perf,task_perf_lora

(29.949238578680205, 29.949238578680205)