In [1]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from peft import get_peft_model, LoraConfig, TaskType
model_name_or_path = "bigscience/mt0-large"
tokenizer_name_or_path = "bigscience/mt0-large"


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
peft_config = LoraConfig(
    task_type=TaskType.SEQ_2_SEQ_LM, inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.1
)

In [3]:
model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path,device_map="cpu")
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

trainable params: 2,359,296 || all params: 1,231,940,608 || trainable%: 0.1915


In [4]:
from datasets import load_dataset

dataset = load_dataset("samsum")  # Example: summarization dataset
train_dataset = dataset["train"]

# Tokenize the dataset
def preprocess_function(examples):
    inputs = tokenizer(examples["dialogue"], truncation=True, padding="max_length", max_length=128)
    targets = tokenizer(examples["summary"], truncation=True, padding="max_length", max_length=64)
    inputs["labels"] = targets["input_ids"]
    return inputs

tokenized_dataset = train_dataset.map(preprocess_function, batched=True)

In [5]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./mt0-large-lora",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,  # Effective batch size = 16
    learning_rate=1e-4,
    num_train_epochs=3,
    logging_steps=10,
    save_steps=500,
    evaluation_strategy="no",                      # No mixed precision on CPU
    no_cuda=True,
)



In [6]:
from transformers import Trainer, DataCollatorForSeq2Seq

data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=data_collator,
)

trainer.train()

No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Step,Training Loss
10,18.05
20,16.336
30,15.3666
40,14.0361
50,12.27
60,10.4807
70,8.5693
80,6.3319
90,5.1869
100,4.8267


KeyboardInterrupt: 

In [None]:
model.save_pretrained("./mt0-large-lora-cpu-adapters")
tokenizer.save_pretrained("./mt0-large-lora-cpu-adapters")

In [None]:
from peft import PeftModel

base_model = AutoModelForSeq2SeqLM.from_pretrained("bigscience/mt0-large", device_map="cpu")
fine_tuned_model = PeftModel.from_pretrained(base_model, "./mt0-large-lora-cpu-adapters")
fine_tuned_model.eval()

input_text = "Summarize this: [your text here]"
inputs = tokenizer(input_text, return_tensors="pt")
outputs = fine_tuned_model.generate(**inputs)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))