# Using 🤗 PEFT & bitsandbytes to finetune a LoRa checkpoint




In [None]:
!pip install -q bitsandbytes datasets accelerate loralib
!pip install -q git+https://github.com/huggingface/transformers.git@main git+https://github.com/huggingface/peft.git



In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [3]:
!nvidia-smi -L

GPU 0: Tesla T4 (UUID: GPU-2d687a2a-d1ee-0b7a-56e5-46bb4d2ae4e8)


### Setup the model

In [6]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"
import torch
import torch.nn as nn
import bitsandbytes as bnb
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, AutoModelForSeq2SeqLM

model = AutoModelForSeq2SeqLM.from_pretrained(
    "google/flan-t5-base",
    load_in_8bit=True,
)
# PY007/TinyLlama-1.1B-Chat-v0.1
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

### Freezing the original weights


In [7]:
for param in model.parameters():
  param.requires_grad = False  # freeze the model - train adapters later
  if param.ndim == 1:
    # cast the small parameters (e.g. layernorm) to fp32 for stability
    param.data = param.data.to(torch.float32)

model.gradient_checkpointing_enable()  # reduce number of stored activations
model.enable_input_require_grads()

class CastOutputToFloat(nn.Sequential):
  def forward(self, x): return super().forward(x).to(torch.float32)
model.lm_head = CastOutputToFloat(model.lm_head)

### Setting up the LoRa Adapters

In [8]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )
print_trainable_parameters(model)

trainable params: 0 || all params: 247577856 || trainable%: 0.0


In [9]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=16, #attention heads
    lora_alpha=32, #alpha scaling
    # target_modules=["q_proj", "v_proj"], #if you know the
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM" # set this for CLM or Seq2Seq
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

trainable params: 1769472 || all params: 249347328 || trainable%: 0.7096414524241463


In [10]:
import pandas as pd
from datasets import Dataset, DatasetDict

# Load your CSV file into a Pandas DataFrame
df = pd.read_csv('final.csv')

df.head(2)



Unnamed: 0,text,summary,processed_text
0,Abstractive Text Summarization using Sequence-...,This paper presents a novel model for abstract...,abstractive text summarization using rnns beyo...
1,Computing and Informatics Vol V Mar- EVALUATIO...,This paper presents a summary evaluation metho...,computing informatics vol v mar evaluation mea...


In [11]:
df.shape

(104, 3)

In [12]:
df.dropna(inplace=True)

df.shape

(100, 3)

In [13]:
def create_prompt(text, summary):
  start_prompt = '###Human:\n Summarize the following research paper.\n\n'
  end_prompt = '###Assistant:\n\nSummary: '
  prompts = start_prompt + text + end_prompt + summary

  return prompts

df['prompt'] = df.apply(lambda row: create_prompt(row['processed_text'], row['summary']), axis=1)

df.head(2)

Unnamed: 0,text,summary,processed_text,prompt
0,Abstractive Text Summarization using Sequence-...,This paper presents a novel model for abstract...,abstractive text summarization using rnns beyo...,###Human:\n Summarize the following research p...
1,Computing and Informatics Vol V Mar- EVALUATIO...,This paper presents a summary evaluation metho...,computing informatics vol v mar evaluation mea...,###Human:\n Summarize the following research p...


In [14]:
df.drop(columns=['text'], inplace=True)
df.rename(columns={'processed_text': 'Concept', 'summary': 'Description', 'prompt': 'text'}, inplace=True)
df.head(2)

Unnamed: 0,Description,Concept,text
0,This paper presents a novel model for abstract...,abstractive text summarization using rnns beyo...,###Human:\n Summarize the following research p...
1,This paper presents a summary evaluation metho...,computing informatics vol v mar evaluation mea...,###Human:\n Summarize the following research p...


In [15]:
df = df[['Concept', 'Description', 'text']]
df.columns

Index(['Concept', 'Description', 'text'], dtype='object')

## Data

In [16]:
from datasets import Dataset, DatasetDict

dataset = Dataset.from_pandas(df)

# Create the desired DatasetDict structure
data = DatasetDict({
    'train': dataset
})

# Print the dataset_dict information
print(data)


DatasetDict({
    train: Dataset({
        features: ['Concept', 'Description', 'text', '__index_level_0__'],
        num_rows: 100
    })
})


In [17]:
data = data.map(lambda samples: tokenizer(samples['text']), batched=True)

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (7656 > 512). Running this sequence through the model will result in indexing errors


In [18]:
data = data.remove_columns(['__index_level_0__',])

In [19]:
data

DatasetDict({
    train: Dataset({
        features: ['Concept', 'Description', 'text', 'input_ids', 'attention_mask'],
        num_rows: 100
    })
})

In [22]:
# !pip install transformers==4.17.0

### Training

In [21]:
import transformers

# tokenizer.add_special_tokens({'pad_token': '[PAD]'})


trainer = transformers.Trainer(
    model=model,
    train_dataset=data['train'],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_steps=25,
        max_steps=5,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        output_dir='outputs',

    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

model.config.use_cache = True  # silence the warnings. Please re-enable for inference!
with torch.autocast("cuda"):
  trainer.train()

You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss
1,6.047


OutOfMemoryError: ignored

## Share adapters on the 🤗 Hub

In [None]:
model.push_to_hub("smit0104/research-summarization-llama-1b",
                  use_auth_token=True,
                  commit_message="basic training",
                  private=True)



adapter_model.bin:   0%|          | 0.00/9.04M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/smit0104/sql_assistant-llama-1b/commit/f123422837267bdc53f218c90d2ee1632b1683a3', commit_message='basic training', commit_description='', oid='f123422837267bdc53f218c90d2ee1632b1683a3', pr_url=None, pr_revision=None, pr_num=None)

## Load adapters from the Hub

In [None]:
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

peft_model_id = "smit0104/research-summarization-llama-1b"
config = PeftConfig.from_pretrained(peft_model_id)
device_map = {
    "transformer.word_embeddings": 0,
    "transformer.word_embeddings_layernorm": 0,
    "lm_head": "cpu",
    "transformer.h": 0,
    "transformer.ln_f": 0,
    "embed_tokens.weight": 'cuda'
}

model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path,
                                             return_dict=True,
                                             load_in_8bit=True,
                                             device_map='auto',
                                             llm_int8_enable_fp32_cpu_offload=True)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

# Move the model to a specific device (e.g., GPU)
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = model.to(device)

# Load the Lora model
peft_model = PeftModel.from_pretrained(model, peft_model_id)


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Downloading adapter_model.bin:   0%|          | 0.00/9.04M [00:00<?, ?B/s]

## Inference

In [None]:
batch = tokenizer("###Human:\nExplain the following SQL Concept: Give me a SQL query to add to numerical columns in SQL \n\n###Assistant:", return_tensors='pt')

with torch.cuda.amp.autocast():
  output_tokens = model.generate(**batch, max_new_tokens=50)

print('\n\n', tokenizer.decode(output_tokens[0], skip_special_tokens=True))





 ###Human:
Explain the following SQL Concept: Give me a SQL query to add to numerical columns in SQL 

###Assistant:
Adding a value to a numerical column in SQL is done by using the following syntax:

ALTER TABLE MyTable ADD (ColumnName, NewValue)


This syntax will add a value to the column specified by ColumnName
