In [1]:
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import os
import torch
from time import time
from datasets import load_dataset
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)
from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    AutoTokenizer,
    TrainingArguments,
)
from trl import SFTTrainer,setup_chat_format

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from huggingface_hub import login
import wandb
# from kaggle_secrets import UserSecretsClient

huggingface_hub_token = "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"
wandb_api = "10713c140f304d47b99c544f0f74e73b7d667cbf"

login(token = huggingface_hub_token)

wandb.login(key=wandb_api)
run = wandb.init(
    project='Fine-tune Llama 3 8B', 
    job_type="training", 
    anonymous="allow"
)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Token is valid (permission: read).
Your token has been saved to C:\Users\Administrator\.cache\huggingface\token
Login successful


[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: C:\Users\Administrator\_netrc


In [6]:
# init model {model:llama-3, framework: transformers, size: 8B, type: 8b-chat-hf, version: 1}
# model_id = "/kaggle/input/llama-3/transformers/8b-chat-hf/1"
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

In [4]:
# Config some parameters
num_of_epochs = 1

# No change params
use_4bit, bnb_4bit_compute_dtype, bnb_4bit_quant_type, use_nested_quant = True, "float16", "nf4", False # To quantization
lora_r, lora_alpha, lora_dropout = 64, 16, 0.1
fp16, bf16 = False, False
per_device_train_batch_size, per_device_eval_batch_size = 4, 4
gradient_accumulation_steps, gradient_checkpointing, max_grad_norm = 1, True, 0.3
learning_rate, weight_decay, optim = 2e-4, 0.001, "paged_adamw_32bit"
lr_scheduler_type, max_steps, warmup_ratio = "cosine", -1, 0.03
group_by_length, save_steps, logging_steps = True, 0, 25
max_seq_length, packing, device_map = None, False, {"": "cuda:0"}

In [5]:
# config quantization model
compute_dtype = torch.bfloat16
attn_implementation = "eager"
# config QLoRA
bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype, #
    bnb_4bit_use_double_quant=True # use_nested_quant
)

Load pre-model and load tokenizer

In [10]:
time_start = time()

# Load model
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map=device_map,
    attn_implementation=attn_implementation
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id)
# tokenizer.pad_token = tokenizer.eos_token
# tokenizer.padding_size = "right"

time_end = time()
print(f"Prepare model, tokenizer: {round(time_end-time_start, 3)} sec.")

False
0


RuntimeError: No GPU found. A GPU is needed for quantization.

In [None]:
#Importing the dataset
ds = load_dataset("ruslanmv/ai-medical-chatbot", split="all")
ds = ds.shuffle(seed=65).select(range(1000)) # Only use 1000 samples for quick demo

def format_chat_template(row):
    row_json = [{"role": "user", "content": row["Patient"]},
               {"role": "assistant", "content": row["Doctor"]}]
    row["text"] = tokenizer.apply_chat_template(row_json, tokenize=False)
    return row

ds = ds.map(
    format_chat_template,
    num_proc=4,
)

ds['text'][3]

ds = ds.train_test_split(test_size=0.1)

In [None]:
# LoRA config: set the rank for LoRA to 4, to reduce the number of trainable parameters.
peft_config = LoraConfig(
    r=16, #lora_r=64
    lora_alpha=32, #lora_alpha=16
    lora_dropout=0.05, #0.1
    bias="none",
    task_type="CAUSAL_LM",
    target_modules= ["q_proj", "k_proj", "v_proj", "o_proj","gate_proj", "up_proj", "down_proj",]
)
model = get_peft_model(model, peft_config)

training_arguments = TrainingArguments(
    output_dir="./results_llama3_sft/",
    evaluation_strategy="steps",
    optim=optim,
    per_device_train_batch_size=1, #4
    gradient_accumulation_steps=2, #1
    per_device_eval_batch_size=1, #4
    # save_steps=1, #0
    logging_strategy="steps",
    logging_steps=1, #25
    learning_rate=learning_rate, #2e-4
    fp16=fp16,
    bf16=bf16,
    eval_steps=0.2,
    # max_steps=20, #-1
    num_train_epochs=num_of_epochs, #1
    warmup_steps=10,
    group_by_length=group_by_length,
    report_to="wandb"
    # lr_scheduler_type="linear", #"cosine"
)


In [None]:
trainer = SFTTrainer(
    model=model,
    train_dataset=ds["train"],
    eval_dataset=ds["test"],
    peft_config=peft_config,
    max_seq_length=512,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing= False,
)

trainer.train()