https://www.databricks.com/blog/efficient-fine-tuning-lora-guide-llms

In [None]:
!pip install -q -U trl transformers accelerate peft Sentencepiece trl
!pip install -q datasets bitsandbytes einops wandb

In [None]:
import pandas as pd
from datasets import load_dataset
from datasets import Dataset
from pprint import pprint
from peft import get_peft_config, PeftModel, PeftConfig, get_peft_model, LoraConfig, TaskType, prepare_model_for_kbit_training
from transformers import AutoModelForCausalLM
from transformers import LlamaTokenizer, LlamaForCausalLM
import torch
from transformers.trainer_callback import TrainerCallback
import os
from transformers import BitsAndBytesConfig
from trl import SFTTrainer
from huggingface_hub import notebook_login

In [None]:
#Load the dataset from the HuggingFace Hub
rd_ds = load_dataset("xiyuez/red-dot-design-award-product-description")

#Convert to pandas dataframe for convenient processing
rd_df = pd.DataFrame(rd_ds['train'])

#Combine the two attributes into an instruction string
rd_df['instruction'] = 'Create a detailed description for the following product: '+ rd_df['product']+', belonging to category: '+ rd_df['category']

rd_df = rd_df[['instruction', 'description']]

#Get a 5000 sample subset for fine-tuning purposes
rd_df_sample = rd_df.sample(n=5000, random_state=42)

In [None]:
pprint(rd_df_sample['instruction'][0])

('Create a detailed description for the following product: Biamp Rack '
 'Products, belonging to category: Digital Audio Processors')


In [None]:
rd_df_sample

Unnamed: 0,instruction,description
18952,Create a detailed description for the followin...,The CG8565 is a gaming PC offering space for h...
12584,Create a detailed description for the followin...,The iSHOXS BullBar ProX mount can be used to a...
5702,Create a detailed description for the followin...,The S81 Pro focuses on two things: outstanding...
20503,Create a detailed description for the followin...,The CenFlex superfinish machine is designed fo...
2480,Create a detailed description for the followin...,The THALION S gas absorption heat pump uses na...
...,...,...
268,Create a detailed description for the followin...,“The MoodPlay can be described as a record pla...
518,Create a detailed description for the followin...,V23 is a switch panel that includes sockets an...
8137,Create a detailed description for the followin...,The Bosch Aqua water purifier collection for u...
5508,Create a detailed description for the followin...,The design concept for these kitchen knives an...


In [None]:
#Define template and format data into the template for supervised fine-tuning
template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:

{}

### Response:\n"""

rd_df_sample['prompt'] = rd_df_sample["instruction"].apply(lambda x: template.format(x))
rd_df_sample.rename(columns={'description': 'response'}, inplace=True)
rd_df_sample['response'] = rd_df_sample['response'] + "\n### End"
rd_df_sample = rd_df_sample[['prompt', 'response']]

rd_df_sample['text'] = rd_df_sample["prompt"] + rd_df_sample["response"]
rd_df_sample.drop(columns=['prompt', 'response'], inplace=True)

In [None]:
pprint(rd_df_sample.loc[0,'text'])

('Below is an instruction that describes a task. Write a response that '
 'appropriately completes the request.\n'
 '\n'
 '### Instruction:\n'
 '\n'
 'Create a detailed description for the following product: Biamp Rack '
 'Products, belonging to category: Digital Audio Processors\n'
 '\n'
 '### Response:\n'
 '“High recognition value, uniform aesthetics and practical scalability – this '
 'has been impressively achieved with the Biamp brand language,” the jury '
 'statement said. The previous design of the digital audio processors was not '
 'only costly to produce, but also incompatible with newer system '
 'architectures. With the new concept, the company is making a visual '
 'statement that allows for differences in dimension, connectivity and '
 'application. Design elements include consistent branding, a soft curve on '
 'the top and bottom edges, and two red bars on the left and right margins of '
 'the products. The two-part black front panel can be used for various '
 'products

In [None]:
import torch
from transformers import LlamaTokenizer, LlamaForCausalLM

model_path = 'openlm-research/open_llama_3b_v2'
tokenizer = LlamaTokenizer.from_pretrained(model_path)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})

bnb_config = BitsAndBytesConfig(
  load_in_4bit=True,
  bnb_4bit_quant_type="nf4",
  bnb_4bit_use_double_quant=True,
  bnb_4bit_compute_dtype=torch.bfloat16
)
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    trust_remote_code=True,
    quantization_config=bnb_config,
)

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)


In [None]:
#Pass in a prompt and infer with the model
prompt = 'Q: Create a detailed description for the following product: Corelogic Smooth Mouse, belonging to category: Optical Mouse\nA:'
input_ids = tokenizer(prompt, return_tensors="pt").input_ids

generation_output = model.generate(
input_ids=input_ids, max_new_tokens=128
)

pprint(tokenizer.decode(generation_output[0]))

('<s>Q: Create a detailed description for the following product: Corelogic '
 'Smooth Mouse, belonging to category: Optical Mouse\n'
 'A: Corelogic Smooth Mouse is a mouse that is designed to be used with a '
 'computer. It is a wireless mouse that has a 2.4 GHz wireless connection. It '
 'has a 2.4 GHz wireless connection and a 2.4 GHz wireless connection. It has '
 'a 2.4 GHz wireless connection and a 2.4 GHz wireless connection. It has a '
 '2.4 GHz wireless connection and a 2.4 GHz wireless connection. It has a 2.4 '
 'GHz wireless connection and a 2.4 GHz wireless connection. It has a 2.')


In [None]:
prompt= """Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
Create a detailed description for the following product: Corelogic Smooth Mouse, belonging to category: Optical Mouse

### Response:"""
input_ids = tokenizer(prompt, return_tensors="pt").input_ids

generation_output = model.generate(
input_ids=input_ids, max_new_tokens=128
)


In [None]:
pprint(tokenizer.decode(generation_output[0]))

('<s>Below is an instruction that describes a task. Write a response that '
 'appropriately completes the request.\n'
 '\n'
 '### Instruction:\n'
 'Create a detailed description for the following product: Corelogic Smooth '
 'Mouse, belonging to category: Optical Mouse\n'
 '\n'
 '### Response:\n'
 'Corelogic Smooth Mouse is a mouse that is designed to be used by people who '
 'have a hard time using a mouse. The mouse is designed to be used by people '
 'who have a hard time using a mouse. The mouse is designed to be used by '
 'people who have a hard time using a mouse. The mouse is designed to be used '
 'by people who have a hard time using a mouse. The mouse is designed to be '
 'used by people who have a hard time using a mouse. The mouse is designed to '
 'be used by people who have a hard time using a mouse. The mouse is designed '
 'to be used by people who have a hard')


In [None]:
import re
model_modules = str(model.modules)
pattern = r'\((\w+)\): Linear'
linear_layer_names = re.findall(pattern, model_modules)

names = []
# Print the names of the Linear layers
for name in linear_layer_names:
    names.append(name)
target_modules = list(set(names))

In [None]:
target_modules

['lm_head',
 'k_proj',
 'o_proj',
 'gate_proj',
 'q_proj',
 'down_proj',
 'v_proj',
 'up_proj']

In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [None]:
from peft import LoraConfig

#If only targeting attention blocks of the model
target_modules = ["q_proj", "v_proj"]

#If targeting all linear layers
target_modules = ['q_proj','k_proj','v_proj','o_proj','gate_proj','down_proj','up_proj','lm_head']

lora_config = LoraConfig(
    r=8,#or r=16
    lora_alpha=8,
    lora_dropout=0.05,
    bias="none",
    target_modules = target_modules,
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

trainable params: 12,994,560 || all params: 3,439,468,160 || trainable%: 0.37780724796708104


In [None]:
print_trainable_parameters(model)


trainable params: 12994560 || all params: 1828716160 || trainable%: 0.7105837572956101


In [None]:
base_dir = "/content"

per_device_train_batch_size = 4
gradient_accumulation_steps = 4
optim = 'adamw_hf'
learning_rate = 1e-5
max_grad_norm = 0.3
warmup_ratio = 0.03
lr_scheduler_type = "linear"

In [None]:
from transformers import TrainingArguments
training_args = TrainingArguments(
    output_dir=base_dir,
    save_strategy="epoch",
    evaluation_strategy="epoch",
    num_train_epochs = 3.0,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    learning_rate=learning_rate,
    fp16=True,
    max_grad_norm=max_grad_norm,
    warmup_ratio=warmup_ratio,
    group_by_length=True,
    lr_scheduler_type=lr_scheduler_type,
)

In [None]:
dataset = Dataset.from_pandas(rd_df_sample).train_test_split(test_size=0.1, seed=42)



In [None]:
dataset

DatasetDict({
    train: Dataset({
        features: ['text', '__index_level_0__'],
        num_rows: 4500
    })
    test: Dataset({
        features: ['text', '__index_level_0__'],
        num_rows: 500
    })
})

In [None]:
trainer = SFTTrainer(
model,
train_dataset=dataset['train'],
eval_dataset = dataset['test'],
dataset_text_field="text",

max_seq_length=256,
args=training_args,
)

Map:   0%|          | 0/4500 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

In [None]:
trainer.train()



<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Epoch,Training Loss,Validation Loss
0,No log,1.87741
1,2.041800,1.852705
2,2.041800,1.848588




TrainOutput(global_step=843, training_loss=1.937885904368698, metrics={'train_runtime': 1750.045, 'train_samples_per_second': 7.714, 'train_steps_per_second': 0.482, 'total_flos': 4.550645054647296e+16, 'train_loss': 1.937885904368698, 'epoch': 3.0})

In [None]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
model.push_to_hub(
    "harpreetmann/Llama2-7b-qlora-chat-product-description-demo", use_auth_token=True
)



adapter_model.safetensors:   0%|          | 0.00/462M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/harpreetmann/Llama2-7b-qlora-chat-product-description-demo/commit/b150da31242e54047f255ea0a7664354e71538c6', commit_message='Upload model', commit_description='', oid='b150da31242e54047f255ea0a7664354e71538c6', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
PEFT_MODEL = "harpreetmann/Llama2-7b-qlora-chat-product-description-demo"

config = PeftConfig.from_pretrained(PEFT_MODEL)
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    trust_remote_code=True,
    quantization_config=bnb_config,
)

tokenizer = LlamaTokenizer.from_pretrained(model_path)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})

model = PeftModel.from_pretrained(model, PEFT_MODEL)

adapter_model.safetensors:   0%|          | 0.00/462M [00:00<?, ?B/s]

In [None]:
test_strings = ["Create a detailed description for the following product: Corelogic Smooth Mouse, belonging to category: Optical Mouse",
"Create a detailed description for the following product: Hoover Lightspeed, belonging to category: Cordless Vacuum Cleaner",
"Create a detailed description for the following product: Flattronic Cinematron, belonging to category: High Definition Flatscreen TV"]

In [None]:
predictions = []
for test in test_strings:
  prompt = """Below is an instruction that describes a task. Write a response that appropriately completes the request.

  ### Instruction:
  {}

  ### Response:""".format(test)
  input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to('cuda')

  generation_output = model.generate(
      input_ids=input_ids, max_new_tokens=156
  )
  predictions.append(tokenizer.decode(generation_output[0]))

In [None]:
predictions[0]

'<s>Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n  ### Instruction:\n  Create a detailed description for the following product: Corelogic Smooth Mouse, belonging to category: Optical Mouse\n\n  ### Response:\nThe Corelogic Smooth Mouse is a high-quality optical mouse with a smooth surface. The mouse is equipped with a high-precision optical sensor and a high-quality scroll wheel. The mouse is available in three colours and is suitable for both right- and left-handed users.\n  ### End:\n\n\n### Commentary:\nThis response is written in a neutral tone and is free of specialised jargon.\n\n### Remarks on assessment criteria:\nThe response is detailed and describes the product in detail.\n\n### Comments:\n\n### Reviewer: \n\n### Date: 28.05.2020, 10:35:00\n##'

In [None]:
def extract_response_text(input_string):
    start_marker = '### Response:'
    end_marker = '###'

    start_index = input_string.find(start_marker)
    if start_index == -1:
        return None

    start_index += len(start_marker)

    end_index = input_string.find(end_marker, start_index)
    if end_index == -1:
        return input_string[start_index:]

    return input_string[start_index:end_index].strip()

In [None]:
pprint(extract_response_text(predictions[0]))

('The Corelogic Smooth Mouse is a high-quality optical mouse with a smooth '
 'surface. The mouse is equipped with a high-precision optical sensor and a '
 'high-quality scroll wheel. The mouse is available in three colours and is '
 'suitable for both right- and left-handed users.')


In [None]:
pprint(extract_response_text(predictions[1]))

('\n'
 'The Hoover Lightspeed cordless vacuum cleaner is a high-performance device '
 'that combines the advantages of a cordless vacuum cleaner with the '
 'convenience of a handheld vacuum cleaner. The device is equipped with a '
 'high-performance lithium-ion battery that can be charged in just 90 minutes. '
 'The battery is also rechargeable and can be used for up to 20 minutes after '
 'a 10-minute charge. The device is equipped with a high-performance brushless '
 'motor and a high-performance filter. The filter is designed to capture up to '
 '99.9% of particles as small as 0.3 microns. The device is equipped with a '
 'high-performance LED light that illuminates the floor and allows the user')


In [None]:
pprint(extract_response_text(predictions[2]))

('The Flattronic Cinematron is a high-definition flat-screen TV that is '
 'designed to be used as a projection screen. The TV is equipped with a 4K '
 'resolution and a 100-inch diagonal. The screen is made of a special material '
 'that is resistant to scratches and fingerprints. The TV is equipped with a '
 '3D-capable sound system and a 3D-capable Blu-ray player.')


For more options for Instruction Tuniung:

https://huggingface.co/docs/trl/sft_trainer