# Using 🤗 PEFT & bitsandbytes to finetune a LoRa checkpoint




In [None]:
!pip install -q bitsandbytes datasets accelerate loralib
!pip install -q git+https://github.com/huggingface/transformers.git@main git+https://github.com/huggingface/peft.git

In [1]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [2]:
!nvidia-smi -L

GPU 0: NVIDIA A100-SXM4-80GB (UUID: GPU-4680ee07-a709-f98f-532a-ad756bc21043)


In [3]:
import bitsandbytes as bnb

### Setup the model

In [3]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"
# os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb=16'
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
os.environ['TORCH_USE_CUDA_DSA'] = '1'

import torch
import torch.nn as nn
import bitsandbytes as bnb
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, AutoModelForSeq2SeqLM

model = AutoModelForCausalLM.from_pretrained(
    "mistralai/Mistral-7B-v0.1",
    load_in_8bit=True,
)
# PY007/TinyLlama-1.1B-Chat-v0.1
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
for param in model.parameters():
  param.requires_grad = False  # freeze the model - train adapters later
  if param.ndim == 1:
    # cast the small parameters (e.g. layernorm) to fp32 for stability
    param.data = param.data.to(torch.float32)

model.gradient_checkpointing_enable()  # reduce number of stored activations
model.enable_input_require_grads()

class CastOutputToFloat(nn.Sequential):
  def forward(self, x): return super().forward(x).to(torch.float32)
model.lm_head = CastOutputToFloat(model.lm_head)

### Setting up the LoRa Adapters

In [5]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )
print_trainable_parameters(model)

trainable params: 0 || all params: 7241732096 || trainable%: 0.0


In [6]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=24, #attention heads
    lora_alpha=32, #alpha scaling
    # target_modules=["q_proj", "v_proj"], #if you know the
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM" # set this for CLM or Seq2Seq
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

trainable params: 10223616 || all params: 7251955712 || trainable%: 0.14097736398310756


## Load and preprocess the dataset

In [7]:
import pandas as pd
from datasets import Dataset, DatasetDict

# Load your CSV file into a Pandas DataFrame
df = pd.read_csv('final.csv')

df.head(2)



Unnamed: 0,text,summary,processed_text
0,Abstractive Text Summarization using Sequence-...,This paper presents a novel model for abstract...,abstractive text summarization using rnns beyo...
1,Computing and Informatics Vol V Mar- EVALUATIO...,This paper presents a summary evaluation metho...,computing informatics vol v mar evaluation mea...


In [8]:
df.shape

(104, 3)

In [9]:
df.dropna(inplace=True)

df.shape

(100, 3)

In [10]:
df['processed_text'] = df['processed_text'].apply(lambda x: x[:15000])

### We now create a prompt and add our output column in the dataset

In [11]:
def create_prompt(text, summary):
  start_prompt = '###Human:\n Summarize the following research paper.\n\n'
  end_prompt = '###Assistant:\n\nSummary: '
  prompts = start_prompt + text + end_prompt + summary

  return prompts

df['prompt'] = df.apply(lambda row: create_prompt(row['processed_text'], row['summary']), axis=1)

df.head(2)

Unnamed: 0,text,summary,processed_text,prompt
0,Abstractive Text Summarization using Sequence-...,This paper presents a novel model for abstract...,abstractive text summarization using rnns beyo...,###Human:\n Summarize the following research p...
1,Computing and Informatics Vol V Mar- EVALUATIO...,This paper presents a summary evaluation metho...,computing informatics vol v mar evaluation mea...,###Human:\n Summarize the following research p...


In [12]:
df.drop(columns=['text'], inplace=True)
df.rename(columns={'processed_text': 'Concept', 'summary': 'Description', 'prompt': 'text'}, inplace=True)
df.head(2)

Unnamed: 0,Description,Concept,text
0,This paper presents a novel model for abstract...,abstractive text summarization using rnns beyo...,###Human:\n Summarize the following research p...
1,This paper presents a summary evaluation metho...,computing informatics vol v mar evaluation mea...,###Human:\n Summarize the following research p...


In [13]:
df = df[['Concept', 'Description', 'text']]
df.columns

Index(['Concept', 'Description', 'text'], dtype='object')

In [14]:
from datasets import Dataset, DatasetDict

dataset = Dataset.from_pandas(df)

# Create the desired DatasetDict structure
data = DatasetDict({
    'train': dataset
})

# Print the dataset_dict information
print(data)


DatasetDict({
    train: Dataset({
        features: ['Concept', 'Description', 'text', '__index_level_0__'],
        num_rows: 100
    })
})


In [15]:
data = data.map(lambda samples: tokenizer(samples['text']), batched=True)

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

In [16]:
data = data.remove_columns(['__index_level_0__',])

In [17]:
data

DatasetDict({
    train: Dataset({
        features: ['Concept', 'Description', 'text', 'input_ids', 'attention_mask'],
        num_rows: 100
    })
})

In [18]:
# !pip install transformers==4.17.0

In [19]:
torch.cuda.empty_cache()

### Training

In [20]:
import transformers
import os
os.environ['TORCH_USE_CUDA_DSA'] = '1'
tokenizer.add_special_tokens({'pad_token': '[PAD]'})



trainer = transformers.Trainer(
    model=model,
    train_dataset=data['train'],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_steps=25,
        max_steps=10,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        output_dir='outputs',

    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

model.config.use_cache = True  # silence the warnings. Please re-enable for inference!
with torch.autocast("cuda"):
  trainer.train()

Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss
1,3.5128
2,3.0241
3,3.8379
4,3.901
5,3.4674
6,3.7985
7,3.6039
8,3.7188
9,4.3302
10,3.9809


## Share adapters on the 🤗 Hub

In [21]:
model.push_to_hub("smit0104/research_summarization-mistral",
                  use_auth_token=True,
                  commit_message="basic training",
                  private=True)



adapter_model.safetensors:   0%|          | 0.00/40.9M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/smit0104/research_summarization-mistral/commit/0109a3127ef3c780d358828573e78ef299617e0b', commit_message='basic training', commit_description='', oid='0109a3127ef3c780d358828573e78ef299617e0b', pr_url=None, pr_revision=None, pr_num=None)

## Load adapters from the Hub

In [22]:
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

peft_model_id = "smit0104/research_summarization-mistral"
config = PeftConfig.from_pretrained(peft_model_id)
device_map = {
    "transformer.word_embeddings": 0,
    "transformer.word_embeddings_layernorm": 0,
    "lm_head": "cpu",
    "transformer.h": 0,
    "transformer.ln_f": 0,
    "embed_tokens.weight": 'cuda'
}

model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path,
                                             return_dict=True,
                                             load_in_8bit=True,
                                             device_map='auto',
                                             llm_int8_enable_fp32_cpu_offload=True)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

# Move the model to a specific device (e.g., GPU)
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = model.to(device)

# Load the Lora model
peft_model = PeftModel.from_pretrained(model, peft_model_id)


adapter_config.json:   0%|          | 0.00/493 [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/40.9M [00:00<?, ?B/s]

## Inference

In [23]:
df.head()

Unnamed: 0,Concept,Description,text
0,abstractive text summarization using rnns beyo...,This paper presents a novel model for abstract...,###Human:\n Summarize the following research p...
1,computing informatics vol v mar evaluation mea...,This paper presents a summary evaluation metho...,###Human:\n Summarize the following research p...
2,anany kumar singh school computer science engi...,This paper examines the use of Artificial Inte...,###Human:\n Summarize the following research p...
3,evaluating large language model trained code m...,"This paper introduces Codex, a GPT language mo...",###Human:\n Summarize the following research p...
4,proceeding joint conference empirical method n...,This paper explores the benefits of large-scal...,###Human:\n Summarize the following research p...


In [24]:
start_prompt = '###Human:\n Summarize the following research paper.\n\n'
end_prompt = '###Assistant:\n\nSummary: '
text = df['Concept'][12][:15000]
prompts = start_prompt + text + end_prompt

In [25]:
batch = tokenizer(prompts, return_tensors='pt')

with torch.cuda.amp.autocast():
  output_tokens = model.generate(**batch, max_new_tokens=200)

print('\n\n', tokenizer.decode(output_tokens[0], skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.




 ###Human:
 Summarize the following research paper.

ieee evaluation ecg based recognition cardiac abnormality using machine learning deep learning hasnain ali poonja robotics intelligent machine engineering smme national university science technology nust islamabad pakistan upk muhammad soleman ali shah robotics intelligent machine engineering smme national university science technology nust islamabad pakistan pk riaz uddin haptics condition monitoring lab national center robotics automation ned university engineering technology neduet karachi pakistan riazuddinneduetedupkmuhammad ayaz shirazi haptics condition monitoring lab national center robotics automation ned university engineering technology karachi pakistan ayaznediangmailcom abstract around world common cause death due heart disease reduce risk death critical analyze predict heart disease proposed approach introduces novel technique detect anomaly electrocardiogram signal classify cardiac condition class fragment ecg signal

In [26]:
df['Description'][12]

'This paper proposes a novel technique to detect anomalies in Electrocardiogram signals and classify cardiac conditions from 45 patients in the MIT-BIH Arrhythmia database. The proposed approach utilizes two methods, one based on conventional Machine learning algorithm (SVM) and the other based on a deep learning method (CNN-based architecture ALEXNET). The deep learning technique improved precision and can be used in clinical settings, with an average classification accuracy of 87.2%.'