In [1]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes transformers==4.31.0 trl==0.4.7


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
!pip install xformers


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [3]:
import os
import torch
import re
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)

from peft import LoraConfig, PeftModel
from trl import SFTTrainer

In [None]:
from huggingface_hub import login
login()

In [5]:
model_name = "meta-llama/Meta-Llama-3-8B"
dataset_name = "timdettmers/openassistant-guanaco"
new_model = "Llama-2-7b-chat-finetune"

lora_r = 16
lora_alpha = 8
lora_dropout = 0.1

use_8bit = True                                     
bnb_8bit_compute_dtype = "bf16"
bnb_8bit_quant_type = "nf8"

output_dir = "./results"
num_train_epochs = 1

fp16 = False
bf16 = True

per_device_train_batch_size = 4
per_device_eval_batch_size = 4
gradient_accumulation_steps = 4
gradient_checkpointing = True
max_grad_norm = 0.25                              #check this out

learning_rate = 5*1e-5
weight_decay = 0.00001

optim = "paged_adamw_32bit"
lr_scheduler_type = "cosine"

max_steps = -1
warmup_ratio = 0.05                              #check this too
group_by_length = True
save_steps = 0
logging_steps = 5

max_seq_length = None
packing = False

device_map = {"": 0}

In [6]:
ds = load_dataset(dataset_name)

Repo card metadata block was not found. Setting CardData to empty.


In [7]:
def make_data(raw_data):
    
    sys_prompt = "<|start_header_id|>system<|end_header_id|>You are a helpful AI chatbot to humans<|eot_id|>"

    text = raw_data['text']
    formatted_chat = ""

    first_pre = "### Human:"
    second_pre = "### Assistant:"
 
    flist = [m.start() for m in re.finditer(first_pre, text)] 
    flist.append(len(text))
    slist = [m.start() for m in re.finditer(second_pre, text)] 


    for i in range(min(len(flist), len(slist))):
        
        human = text[flist[i]:slist[i]]
        chatbot = text[slist[i]:flist[i + 1]]
        human = human[human.index(':') + 1: ]
        chatbot = chatbot[chatbot.index(':') + 1: ]

        human = f"<|start_header_id|>Human<|end_header_id|>{human}<|eot_id|>"
        chatbot = f"<|start_header_id|>Chatbot<|end_header_id|>{chatbot}<|eot_id|>"

        formatted_chat += (human + chatbot)
    
    final_prompt = f"<|begin_of_text|> {sys_prompt} {formatted_chat} <|end_of_text|>"
    
    return {"text" : ''.join(final_prompt)}

In [8]:
data = ds['train'].map(make_data)
data = data.shuffle(seed=69)

In [9]:
bnb_config = BitsAndBytesConfig(
    load_in_8bit=use_8bit,
    bnb_8bit_quant_type=bnb_8bit_quant_type,
    bnb_8bit_compute_dtype=torch.bfloat16,
)

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map=device_map,
    quantization_config=bnb_config
)

In [11]:
model.config.use_cache = False
model.config.pretraining_tp = 1

In [12]:
tokenizer = AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-chat-hf", trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token



In [13]:
# Load LoRA configuration
peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
)

In [14]:
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    bf16=True,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    report_to="tensorboard"
)

In [15]:
trainer = SFTTrainer(
    model=model,
    train_dataset=data,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=packing,
)





In [16]:
trainer.train()

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Step,Training Loss
5,7.7995
10,7.7531
15,7.3965
20,7.6689
25,7.3267
30,6.8612
35,6.4671
40,5.9952
45,5.1804
50,3.6833


In [None]:
import pickle 
file_path = './base.pkl'

with open(file_path, 'wb') as files:
    pickle.dump(model.state_dict(), files)