In [6]:
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    DataCollatorWithPadding,
    TrainingArguments,
    Trainer,
)
from datasets import load_dataset
import evaluate
import torch

GLUE_TASKS = ["cola", "mnli", "mnli-mm", "mrpc", "qnli", "qqp", "rte", "sst2", "stsb", "wnli"]
task = "mnli"
task_to_keys = {
    "cola": ("sentence", None),
    "mnli": ("premise", "hypothesis"),
    "mnli-mm": ("premise", "hypothesis"),
    "mrpc": ("sentence1", "sentence2"),
    "qnli": ("question", "sentence"),
    "qqp": ("question1", "question2"),
    "rte": ("sentence1", "sentence2"),
    "sst2": ("sentence", None),
    "stsb": ("sentence1", "sentence2"),
    "wnli": ("sentence1", "sentence2"),
}
num_labels = 3 if task.startswith("mnli") else 1 if task=="stsb" else 2
sentence1_key, sentence2_key = task_to_keys[task]
model_name_or_path = "google/flan-t5-base"
num_epochs = 2
lr = 1e-3
batch_size = 32

In [7]:
dataset = load_dataset("glue", task)
metric = evaluate.load("glue", task)
import numpy as np

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return metric.compute(predictions=predictions, references=labels)

Downloading data:   0%|          | 0.00/313M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/392702 [00:00<?, ? examples/s]

Generating validation_matched split:   0%|          | 0/9815 [00:00<?, ? examples/s]

Generating validation_mismatched split:   0%|          | 0/9832 [00:00<?, ? examples/s]

Generating test_matched split:   0%|          | 0/9796 [00:00<?, ? examples/s]

Generating test_mismatched split:   0%|          | 0/9847 [00:00<?, ? examples/s]

In [8]:
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

def tokenize_function(examples):
    # max_length=None => use the model max length (it's actually the default)
    if sentence2_key is None:
        outputs = tokenizer(examples[sentence1_key], truncation=True, max_length=None)
    else:
        outputs = tokenizer(examples[sentence1_key], examples[sentence2_key], truncation=True, max_length=None)
    return outputs

if sentence2_key!=None:
    tokenized_datasets = dataset.map(tokenize_function, batched=True, remove_columns=["idx", sentence1_key, sentence2_key])
else:
    tokenized_datasets = dataset.map(tokenize_function, batched=True, remove_columns=["idx", sentence1_key])

tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
data_collator = DataCollatorWithPadding(tokenizer=tokenizer, padding="longest")
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=16,
    lora_alpha=16,
    # target_modules=["query","key","value"],
    target_modules=["q","k","v","o"],
    lora_dropout=0.1,
    bias="none",
    modules_to_save=["classifier"],
)

model_name = model_name_or_path.split("/")[-1]
model = AutoModelForSequenceClassification.from_pretrained(model_name_or_path, return_dict=True, num_labels=num_labels)
model = get_peft_model(model, config)
model.print_trainable_parameters()
training_args = TrainingArguments(
    output_dir=f"{model_name}-finetuned-lora-{task}",
    learning_rate=lr,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_epochs,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    remove_unused_columns=False,
    load_best_model_at_end=False,
)

Map:   0%|          | 0/392702 [00:00<?, ? examples/s]

Map:   0%|          | 0/9815 [00:00<?, ? examples/s]

Map:   0%|          | 0/9832 [00:00<?, ? examples/s]

Map:   0%|          | 0/9796 [00:00<?, ? examples/s]

Map:   0%|          | 0/9847 [00:00<?, ? examples/s]

Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at google/flan-t5-base and are newly initialized: ['classification_head.out_proj.weight', 'classification_head.out_proj.bias', 'classification_head.dense.weight', 'classification_head.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 3,538,944 || all params: 227,035,395 || trainable%: 1.5587631170901788


In [9]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

KeyError: 'validation'

In [None]:
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForSequenceClassification, AutoTokenizer

peft_model_id = f"{model_name}-finetuned-lora-{task}"
config = PeftConfig.from_pretrained(peft_model_id)
inference_model = AutoModelForSequenceClassification.from_pretrained(config.base_model_name_or_path)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
model = PeftModel.from_pretrained(inference_model, peft_model_id)

ValueError: Can't find 'adapter_config.json' at 'flan-t5-base-finetuned-lora-cola'

In [None]:
classes = ["not equivalent", "equivalent"]

sentence1 = "I am a student"

inputs = tokenizer(sentence1, truncation=True, padding="longest", return_tensors="pt")

In [None]:
with torch.no_grad():
    outputs = model(**inputs).logits
    print(outputs)

paraphrased_text = torch.softmax(outputs, dim=1).tolist()[0]
for i in range(len(classes)):
    print(f"{classes[i]}: {int(round(paraphrased_text[i] * 100))}%")

tensor([[ 0.1007, -0.2035]])
not equivalent: 58%
equivalent: 42%


In [29]:
import numpy as np
import pandas as pd
tasks = ['mnli','qnli','sst2','qqp','elementary_math_qa','cryptonite','intersect_geometry','list_functions','tracking_shuffled_objects']
methods = ['single','cv','lorahub','avg']
table = pd.DataFrame([],columns=['single','avg','cv','lorahub'])
for idx,task in enumerate(tasks):
    for method in methods:
        data_name = 'glue' if task in ['mnli','qnli','sst2','qqp'] else 'bigbench'
        path = f"../results/{data_name}/{task}_{method}.csv"
        result = np.loadtxt(path,delimiter=',')
        if method == 'avg':
            table.loc[idx,method] = np.mean(result[3:10])
        else:
            table.loc[idx,method] = np.mean(result[3:10,3:10].diagonal())
table.loc[9] = table.mean(axis=0)
table

Unnamed: 0,single,avg,cv,lorahub
0,85.165824,84.809512,84.962232,85.093171
1,92.072214,91.705913,92.150706,91.889063
2,94.417077,92.775041,93.267652,94.252874
3,86.876789,85.735486,85.742553,81.9017
4,30.374152,26.404121,27.471877,28.427369
5,0.764818,0.273149,0.409724,0.355094
6,24.149248,15.186178,20.889161,18.617808
7,5.087108,4.947735,5.017422,4.390244
8,18.857143,19.047619,21.714286,21.333333
9,48.640486,46.764973,47.958401,47.362295


In [30]:
import numpy as np
import pandas as pd
tasks = ['mnli','qnli','sst2','qqp','elementary_math_qa','cryptonite','intersect_geometry','list_functions','tracking_shuffled_objects']
methods = ['lorahub']
table = pd.DataFrame([],columns=['lorahub'])
for idx,task in enumerate(tasks):
    for method in methods:
        data_name = 'glue' if task in ['mnli','qnli','sst2','qqp'] else 'bigbench'
        path = f"../results/{data_name}/{task}_{method}_weights.csv"
        result = np.loadtxt(path,delimiter=',')[3:]
        table.loc[idx,method] = np.mean(np.absolute(result[:10,:3]).sum(axis=1)/10)
table.loc[9] = table.mean(axis=0)
table

Unnamed: 0,lorahub
0,0.05565
1,0.001066
2,0.029939
3,0.023008
4,0.041546
5,0.119879
6,0.074526
7,0.030892
8,0.048567
9,0.04723


In [39]:
import numpy as np
import pandas as pd
tasks = ['mnli','qnli','sst2','qqp','elementary_math_qa','cryptonite','intersect_geometry','list_functions','tracking_shuffled_objects']
methods = ['cv']
table = pd.DataFrame([],columns=['cv'])
for idx,task in enumerate(tasks):
    for method in methods:
        data_name = 'glue' if task in ['mnli','qnli','sst2','qqp'] else 'bigbench'
        path = f"../results/{data_name}/{task}_{method}_weights.csv"
        result = np.loadtxt(path,delimiter=',')[3:]
        print(result[result <= 2])
        print(np.argwhere(result <= 2))
        # table.loc[idx,method] = np.mean(np.absolute(result[:10,:3]).mean(axis=1))
table.loc[9] = table.mean(axis=0)
table

[0. 1. 2. 0. 1. 0. 1. 0. 1. 2. 0. 2. 0. 2. 0.]
[[0 0]
 [0 1]
 [0 2]
 [1 0]
 [1 1]
 [2 0]
 [2 1]
 [3 0]
 [3 1]
 [3 2]
 [4 0]
 [4 1]
 [5 0]
 [5 1]
 [6 0]]
[2. 0. 2. 1. 2. 1. 2. 1. 1. 2. 1. 2.]
[[0 0]
 [1 0]
 [1 1]
 [2 0]
 [2 1]
 [3 0]
 [3 1]
 [4 0]
 [5 0]
 [5 1]
 [6 0]
 [6 1]]
[0. 0. 1. 2. 0. 1. 1. 2. 0. 0. 1. 2. 0. 2.]
[[0 0]
 [1 0]
 [1 1]
 [1 2]
 [2 0]
 [2 1]
 [3 0]
 [3 1]
 [4 0]
 [5 0]
 [5 1]
 [5 2]
 [6 0]
 [6 1]]
[0. 1. 2. 0. 0. 1. 2. 0. 1. 2. 0. 1. 1. 0. 1.]
[[0 0]
 [0 1]
 [0 2]
 [1 0]
 [2 0]
 [2 1]
 [2 2]
 [3 0]
 [3 1]
 [3 2]
 [4 0]
 [4 1]
 [5 0]
 [6 0]
 [6 1]]
[0. 2. 1. 2. 0. 1. 0. 0. 2. 1. 2.]
[[0 0]
 [1 0]
 [2 0]
 [2 1]
 [3 0]
 [3 1]
 [4 0]
 [5 0]
 [5 1]
 [6 0]
 [6 1]]
[0. 1. 2. 1. 0. 1. 2. 1. 2. 0. 2. 0. 1.]
[[0 0]
 [0 1]
 [0 2]
 [1 0]
 [2 0]
 [2 1]
 [2 2]
 [4 0]
 [4 1]
 [5 0]
 [5 1]
 [6 0]
 [6 1]]
[0. 1. 0. 1. 0. 0. 2.]
[[0 0]
 [2 0]
 [3 0]
 [3 1]
 [4 0]
 [5 0]
 [6 0]]
[0. 1. 2. 0. 2. 1. 2. 1. 2. 0. 1. 2. 0. 1. 0. 2.]
[[0 0]
 [0 1]
 [0 2]
 [1 0]
 [1 1]
 [2 0]
 [2 1]
 [3 0]
 [3

Unnamed: 0,cv
9,
