In [1]:
!pip3 install -q -U bitsandbytes
!pip3 install -q -U peft
!pip3 install -q -U trl
!pip3 install -q -U accelerate
!pip3 install -q -U datasets
!pip3 install -q -U transformers

In [2]:
import os
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [14]:
from transformers import  AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

device=torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

model_id="google/gemma-2-2b"
bnb_config_4bit=BitsAndBytesConfig(load_in_4bit=True,
                                   bnb_4bit_use_double_quant=True,
                                   bnb_4bit_quant_type="nf4",
                                   bnb_4bit_compute_dtype=torch.float16)

bnb_config_8bit=BitsAndBytesConfig(load_in_8bit=True)




In [15]:
model=AutoModelForCausalLM.from_pretrained(model_id,
                                           device_map="auto",
                                           quantization_config=bnb_config_4bit,
                                           attn_implementation='eager'
                                           )
tokenizer=AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token=tokenizer.eos_token
tokenizer.padding_side="right"


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [5]:
#prompt="what dinner should I have today?"
#input_ids=tokenizer(prompt,return_tensors="pt").input_ids.to(device)
#with torch.no_grad():
    #output_ids=model.generate(input_ids,max_length=300,do_sample=True,top_k=2)
    
#output=tokenizer.decode(output_ids[0],skip_special_tokens=True)

In [6]:
# number of params


In [16]:
# Get the target modules for lora config
import bitsandbytes

def get_modules(model):
    modules=set()
    for name, module in model.named_modules():
        if isinstance(module,bitsandbytes.nn.Linear4bit):
            names=name.split(".")
            modules.add(names[-1] if len(name)>1 else names[0])
    return modules

target_modules=get_modules(model)   


In [20]:
target_modules

{'down_proj', 'gate_proj', 'k_proj', 'o_proj', 'q_proj', 'up_proj', 'v_proj'}

In [17]:
# Lora adapters
from peft import get_peft_model,LoraConfig,prepare_model_for_kbit_training,TaskType

lora_config=LoraConfig(r=16,
    target_modules=target_modules,
    task_type=TaskType.CAUSAL_LM,
    lora_alpha=32,
    lora_dropout=0.05)

In [18]:
peft_model=get_peft_model(model,lora_config)

In [19]:
peft_model.print_trainable_parameters()

trainable params: 20,766,720 || all params: 2,635,108,608 || trainable%: 0.7881


# Prepare dataset for Instruction Finetuning


In [21]:
import datasets
ds=datasets.load_dataset("nvidia/HelpSteer2",split="train")
ds

Dataset({
    features: ['prompt', 'response', 'helpfulness', 'correctness', 'coherence', 'complexity', 'verbosity'],
    num_rows: 20324
})

In [1]:
import random
idx=random.randint(0,len(ds)-1)
print(f"prompt:\n\n{ds['prompt'][idx]}\n\n")
print(f"response:\n\n{ds['response'][idx]}")

NameError: name 'ds' is not defined

In [23]:
def format_prompt(example):
    prompt_template="Below is a instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:\n{response}"   
    return prompt_template.format(instruction=example["prompt"],response=example["response"])

ds=ds.map(lambda x:{"formated_prompt":format_prompt(x)})

In [24]:
ds=ds.shuffle()
ds=ds.map(lambda x:tokenizer(x["formated_prompt"]),batched=True)

Map:   0%|          | 0/20324 [00:00<?, ? examples/s]

In [25]:
dataset=ds.train_test_split(test_size=.3,shuffle=True)

In [26]:
train_dataset=dataset["train"]
test_dataset=dataset["test"]

## Training 

In [27]:
peft_model.print_trainable_parameters()

trainable params: 20,766,720 || all params: 2,635,108,608 || trainable%: 0.7881


In [30]:
from trl import SFTTrainer
import transformers 

def get_trainer(peft_model):
    training_args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_ratio=0.3,
        max_steps=80,
        learning_rate=2e-4,
        num_train_epochs=1,
        logging_steps=1,
        logging_dir="./logs",
        output_dir="outputs",
        optim="paged_adamw_8bit",
        save_strategy="epoch")

    trainer=SFTTrainer(peft_model,
                   train_dataset=train_dataset,
                   eval_dataset=test_dataset,
                   peft_config=lora_config,
                   dataset_text_field="formated_prompt",
                   max_seq_length=1024,
                   tokenizer=tokenizer,
                   args=training_args,
                   data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False))
    return trainer

In [31]:
# for memory efficiency
model=prepare_model_for_kbit_training(peft_model,use_gradient_checkpointing=True)

# empty gpu cache
torch.cuda.empty_cache()


In [None]:
trainer=get_trainer(peft_model)

In [None]:
trainer.train()

In [None]:
new_model_id=model_id+"-instruct-HelpSteer2"
trainer.save_model(new_model_id)

## Second training 

In [32]:
from peft import LoraConfig,get_peft_model,PeftConfig,PeftModel
checkpoint="outputs/checkpoint-80"
from transformers import AutoModelForCausalLM
peft_config=PeftConfig.from_pretrained(r"outputs/checkpoint-80")
model = AutoModelForCausalLM.from_pretrained(peft_config.base_model_name_or_path)
peft_model=PeftModel.from_pretrained(model,checkpoint,is_trainable=True)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [33]:
peft_model.print_trainable_parameters()

trainable params: 20,766,720 || all params: 2,635,108,608 || trainable%: 0.7881


In [None]:
second_trainer=get_trainer(peft_model)

In [None]:
second_trainer.train()

In [None]:
new_model_id=model_id+"-instruct-HelpSteer2"
trainer.save_model(new_model_id)