# Fine-tuen FLAN-T5 using `bitsandbytes` `peft` & `transformers`

In [1]:
!pip install -q bitsandbytes datasets accelerate
!pip install -q git+https://github.com/huggingface/transformers.git@main git+https://github.com/huggingface/peft.git@main

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.1/76.1 MB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m56.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m28.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m23.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [23]:
from huggingface_hub import notebook_login,login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [1]:
import os
import torch
os.environ['CUDA_VISIBLE_DEVICES']='0'
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM,AutoTokenizer, BitsAndBytesConfig

quant_config=BitsAndBytesConfig(load_in_8bit=True)
model_name="google/flan-t5-large"
model=AutoModelForSeq2SeqLM.from_pretrained(model_name,quantization_config=quant_config)
tokenizer=AutoTokenizer.from_pretrained(model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [2]:
# Prepare model for training
from peft import prepare_model_for_kbit_training
model=prepare_model_for_kbit_training(model)

In [3]:
from peft import LoraConfig, get_peft_model, TaskType

 # We are injecting new trainable adapter layers into 8bit model.
 # These new LoRA parameters are in full 16-bit precision and trainable, while reset model stays in 8-bit.
def print_trainable_parameters(model):
  # Print the number of trainable parameters in the model
  trainable_params=0
  all_param=0
  for _, param in model.named_parameters():
    all_param += param.numel()
    if param.requires_grad:
      trainable_params += param.numel()

  print(f"Trainable params: {trainable_params} || All params: {all_param} || Trainable: {100 * trainable_params / all_param}")

lora_config=LoraConfig(r=16,lora_alpha=32, target_modules=['q','v'],lora_dropout=0.05, bias='none',task_type='SEQ_2_SEQ_LM')
model=get_peft_model(model,lora_config)
print_trainable_parameters(model)

Trainable params: 4718592 || All params: 787868672 || Trainable: 0.5989059049678777


In [5]:
# Load and Process data
dataset = load_dataset("financial_phrasebank", "sentences_allagree",download_mode='force_redownload')
dataset=dataset['train'].train_test_split(test_size=0.1)
dataset['validation']=dataset['test']
del dataset['test']

classes=dataset['train'].features['label'].names
dataset=dataset.map(lambda x: {'text_label':[classes[label] for label in x['label']]},batched=True, num_proc=1)


financial_phrasebank.py:   0%|          | 0.00/6.04k [00:00<?, ?B/s]

README.md:   0%|          | 0.00/8.88k [00:00<?, ?B/s]

FinancialPhraseBank-v1.0.zip:   0%|          | 0.00/682k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/2264 [00:00<?, ? examples/s]

Map:   0%|          | 0/2037 [00:00<?, ? examples/s]

Map:   0%|          | 0/227 [00:00<?, ? examples/s]

In [8]:
# data preprocessing
text_column = "sentence"
label_column = "text_label"
max_length = 128


def preprocess_function(examples):
    inputs = examples[text_column]
    targets = examples[label_column]
    model_inputs = tokenizer(inputs, max_length=max_length, padding="max_length", truncation=True, return_tensors="pt")
    labels = tokenizer(targets, max_length=3, padding="max_length", truncation=True, return_tensors="pt")
    labels = labels["input_ids"]
    labels[labels == tokenizer.pad_token_id] = -100
    model_inputs["labels"] = labels
    return model_inputs


processed_datasets = dataset.map(
    preprocess_function,
    batched=True,
    num_proc=1,
    remove_columns=dataset["train"].column_names,
    load_from_cache_file=False,
    desc="Running tokenizer on dataset",
)

train_dataset = processed_datasets["train"]
eval_dataset = processed_datasets["validation"]

Running tokenizer on dataset:   0%|          | 0/2037 [00:00<?, ? examples/s]

Running tokenizer on dataset:   0%|          | 0/227 [00:00<?, ? examples/s]

In [14]:
# Train our Model
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    "temp",
    learning_rate=1e-3,
    gradient_accumulation_steps=1,
    auto_find_batch_size=True,
    num_train_epochs=1,
    save_steps=100,
    save_total_limit=8,
)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
)
model.config.use_cache = False

No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [15]:
trainer.train()



<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mombhandwalkar38126[0m ([33mombhandwalkar38126-student[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


  return fn(*args, **kwargs)


Step,Training Loss


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


TrainOutput(global_step=255, training_loss=0.27790814568014705, metrics={'train_runtime': 364.4751, 'train_samples_per_second': 5.589, 'train_steps_per_second': 0.7, 'total_flos': 1181084919791616.0, 'train_loss': 0.27790814568014705, 'epoch': 1.0})

In [22]:
device = next(model.parameters()).device
model.eval()
input_text = "When ever I see her face, I totally ignore her !"
inputs = tokenizer(input_text, return_tensors="pt").to(device)

outputs = model.generate(input_ids=inputs["input_ids"], max_new_tokens=10)

print("input sentence: ", input_text)
print("output prediction: ", tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True))

input sentence:  When ever I see her face, I totally ignore her !
output prediction:  ['neutral']


In [25]:
# Push Model to HUB
model.push_to_hub("OmBhandwalkar/flan-t5-large-financial-phrasebank-lora", use_auth_token=True)

adapter_model.safetensors:   0%|          | 0.00/18.9M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/OmBhandwalkar/flan-t5-large-financial-phrasebank-lora/commit/c5b3fa37156cc7623a78ab06906f31256ee3f951', commit_message='Upload model', commit_description='', oid='c5b3fa37156cc7623a78ab06906f31256ee3f951', pr_url=None, repo_url=RepoUrl('https://huggingface.co/OmBhandwalkar/flan-t5-large-financial-phrasebank-lora', endpoint='https://huggingface.co', repo_type='model', repo_id='OmBhandwalkar/flan-t5-large-financial-phrasebank-lora'), pr_revision=None, pr_num=None)

In [26]:
# Load your Adapter from HUB

import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

peft_model_id='OmBhandwalkar/flan-t5-large-financial-phrasebank-lora'
config=PeftConfig.from_pretrained(peft_model_id)


model = AutoModelForSeq2SeqLM.from_pretrained(config.base_model_name_or_path, torch_dtype="auto", device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

model = PeftModel.from_pretrained(model, peft_model_id)

adapter_config.json:   0%|          | 0.00/769 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/18.9M [00:00<?, ?B/s]

In [29]:
device = next(model.parameters()).device
model.eval()
input_text = "When ever I talk with her, my day goes very well !"
inputs = tokenizer(input_text, return_tensors="pt").to(device)

outputs = model.generate(input_ids=inputs["input_ids"], max_new_tokens=10)

print("input sentence: ", input_text)
print(" output prediction: ", tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True))

input sentence:  When ever I talk with her, my day goes very well !
 output prediction:  ['positive']
