In [1]:
from transformers import (
    AutoModelForSeq2SeqLM,
    AutoTokenizer,
)
from peft import PeftModel
from datasets import DatasetDict, load_dataset
from data import set_seed, k_split
from tqdm import trange
import torch

In [2]:
data_name = 'glue'
task = 'sst2'
seed = 42
num_clients = 10
num_error_clients = 3
number = 5

In [3]:
model_name_or_path = 'google/flan-t5-base'
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
set_seed(seed)
if data_name == 'bigbench':
    dataset = load_dataset("tasksource/bigbench", task).shuffle(seed=seed)
    dataset = dataset.rename_columns({'inputs':'source','targets':'target'})
else:
    dataset = load_dataset("JsSparkYyx/NLP524", task).shuffle(seed=seed)

In [4]:
train_ds = k_split(num_clients,num_error_clients,dataset['train'])
valid_ds = k_split(num_clients,num_error_clients,dataset['validation'])
dataset = DatasetDict({'train':train_ds[number],'valid':valid_ds[number]})
def tokenize_function(examples):
    # max_length=None => use the model max length (it's actually the default)
    model_inputs = tokenizer(examples['source'], truncation=True, max_length=None,padding=True,return_tensors='pt')
    if data_name == 'glue':
        model_inputs['labels'] = tokenizer(examples['target'], truncation=True, max_length=None,padding=True,return_tensors='pt')["input_ids"]
    else:
        model_inputs['labels'] = tokenizer([_[0] for _ in examples['target']], truncation=True, max_length=None,padding=True,return_tensors='pt')["input_ids"]
    return model_inputs
ds = (train_ds, valid_ds)

In [9]:
def retrive_data(ds,number):
    (train_ds, valid_ds) = ds
    return DatasetDict({'train':train_ds[number],'valid':valid_ds[number]})

def accuracy_score(outputs, ground_truths):
    correct = 0
    total = 0
    for output, truth in zip(outputs, ground_truths):
        if data_name == "bigbench":
            truth = truth[0]
        if output.strip().lower().replace(".", "") == truth.strip().lower().replace(".", ""):
            correct += 1
        total += 1
    return correct / total * 100

In [6]:
data = retrive_data(ds,number)

In [10]:
model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path, return_dict=True)
model_name = model_name_or_path.split("/")[-1]
lora_model = PeftModel.from_pretrained(model,f'JsSparkYyx/flan-t5-base-finetuned-lora-{task}-{number}')
# lora_model = PeftModel.from_pretrained(model,f'JsSparkYyx/flan-t5-base-finetuned-lora-{task}-0')
base_model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path, return_dict=True)

In [11]:
example_predictions = []
example_predictions_lora = []

eval_set = "valid"
batch_size = 64
device = "cuda" if torch.cuda.is_available() else "cpu"
lora_model.eval()
base_model.eval()
lora_model.to(device)
base_model.to(device)
with torch.no_grad():
    for i in trange(0, len(data[eval_set]["source"]), batch_size):
        inputs = tokenizer(
                data[eval_set]["source"][i : i + batch_size],
                max_length=2048,
                return_tensors="pt",
                padding=True,
            ).to(device)
        outputs = base_model.generate(
            input_ids=inputs["input_ids"], max_new_tokens=256
        )
        outputs = tokenizer.batch_decode(
            outputs.to("cpu"), skip_special_tokens=True
        )
        example_predictions.extend(outputs)
        outputs = lora_model.generate(
            input_ids=inputs["input_ids"], max_new_tokens=256
        )
        outputs = tokenizer.batch_decode(
            outputs.to("cpu"), skip_special_tokens=True
        )
        example_predictions_lora.extend(outputs)

task_perf = accuracy_score(example_predictions, data[eval_set]["target"])
task_perf_lora = accuracy_score(example_predictions_lora, data[eval_set]["target"])
print(f"ACC of base model: {task_perf}, ACC of lora model: {task_perf_lora}")

100%|██████████| 2/2 [00:00<00:00,  5.36it/s]

ACC of base model: 85.0574712643678, ACC of lora model: 55.172413793103445



