In [1]:
!pip install -q transformers datasets accelerate peft bitsandbytes

In [6]:
!pip install -U transformers



In [8]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TrainingArguments, Trainer, DataCollatorForSeq2Seq
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, TaskType
import torch


In [9]:
!pip install -U datasets



In [10]:
dataset = load_dataset("tatsu-lab/alpaca")
dataset = dataset["train"].train_test_split(test_size=0.1)


In [17]:
model_name = "google/flan-t5-small"  # lightweight and Colab friendly
tokenizer = AutoTokenizer.from_pretrained(model_name)

def preprocess(example):
    # Create the input prompt
    if example['input']:
        prompt = f"### Instruction:\n{example['instruction']}\n\n### Input:\n{example['input']}\n\n### Response:"
    else:
        prompt = f"### Instruction:\n{example['instruction']}\n\n### Response:"

    # Tokenize input (prompt)
    model_inputs = tokenizer(prompt, truncation=True, padding="max_length", max_length=512)

    # Tokenize target (output) as labels
    labels = tokenizer(example["output"], truncation=True, padding="max_length", max_length=128)
    model_inputs["labels"] = labels["input_ids"]

    return model_inputs

tokenized_dataset = dataset.map(preprocess)


Map:   0%|          | 0/46801 [00:00<?, ? examples/s]

Map:   0%|          | 0/5201 [00:00<?, ? examples/s]

In [18]:
from transformers import AutoModelForSeq2SeqLM

model = AutoModelForSeq2SeqLM.from_pretrained(
    model_name,
    load_in_8bit=True,  # use load_in_4bit=True if supported
    device_map="auto"
)


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


In [19]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q", "v"],  # use `['q_proj', 'v_proj']` for other models
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()


trainable params: 344,064 || all params: 77,305,216 || trainable%: 0.4451


In [20]:
training_args = TrainingArguments(
    output_dir="./flan-t5-lora",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=3,
    learning_rate=2e-4,
    logging_dir="./logs",
    logging_steps=10,
    eval_strategy="epoch",
    save_strategy="epoch",
    fp16=True,
    push_to_hub=False
)

data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    data_collator=data_collator,
    tokenizer=tokenizer
)


  trainer = Trainer(
No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [21]:
trainer.train()


Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Epoch,Training Loss,Validation Loss


KeyboardInterrupt: 

In [None]:
model.save_pretrained("flan-t5-lora-adapter")
tokenizer.save_pretrained("flan-t5-lora-adapter")


In [None]:
from peft import PeftModel
from transformers import pipeline

base_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, load_in_8bit=True, device_map="auto")
base_model = PeftModel.from_pretrained(base_model, "flan-t5-lora-adapter")

pipe = pipeline("text2text-generation", model=base_model, tokenizer=tokenizer)

# Try a custom prompt
prompt = "### Instruction:\nWrite a poem about AI\n\n### Response:"
print(pipe(prompt)[0]['generated_text'])
