<a href="https://colab.research.google.com/github/Muhammad-Murtaz/LLM-Fine-tuning/blob/main/LLAMA_FINE_TUNING.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q unsloth trl peft accelerate bitsandbytes datasets transformers

In [None]:
import torch
from unsloth import FastLanguageModel
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments


In [None]:
max_seq_length = 2048 # max prompt + response length


model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "unsloth/llama-3-8b-bnb-4bit", # 4-bit quantized Llama 3
max_seq_length = max_seq_length,
dtype = None, # auto-detect
load_in_4bit = True, # critical for Colab GPU
)

In [None]:
dataset = load_dataset("yahma/alpaca-cleaned", split="train")

In [None]:
dataset = dataset.select(range(5000))

In [None]:
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.


### Instruction:
{instruction}


### Input:
{input}


### Response:
{output}"""

In [None]:
EOS_TOKEN = tokenizer.eos_token


def format_prompts_func(examples):
  texts = []
  for inst, inp, out in zip(examples["instruction"], examples["input"], examples["output"]):
    text = alpaca_prompt.format(
    instruction = inst,
    input = inp,
    output = out,
    ) + EOS_TOKEN
    texts.append(text)
  return {"text": texts}


dataset = dataset.map(format_prompts_func, batched=True)

In [None]:
model = FastLanguageModel.get_peft_model(
model,
r = 16, # LoRA rank
lora_alpha = 16,
lora_dropout = 0.0,
bias = "none",
target_modules = [
"q_proj", "k_proj", "v_proj",
"o_proj", "gate_proj", "up_proj", "down_proj",
],
use_gradient_checkpointing = "unsloth",
random_state = 42,
)

In [None]:
trainer = SFTTrainer(
model = model,
tokenizer = tokenizer,
train_dataset = dataset,
dataset_text_field = "text",
max_seq_length = max_seq_length,
args = TrainingArguments(
output_dir = "./llama3-unsloth",
per_device_train_batch_size = 2,
gradient_accumulation_steps = 4,
warmup_steps = 10,
num_train_epochs = 1,
learning_rate = 2e-4,
fp16 = not torch.cuda.is_bf16_supported(),
bf16 = torch.cuda.is_bf16_supported(),
logging_steps = 10,
optim = "adamw_8bit",
weight_decay = 0.01,
lr_scheduler_type = "linear",
seed = 42,
save_strategy = "epoch",
report_to = "none",
),
)

In [None]:
trainer.train()

In [None]:
FastLanguageModel.for_inference(model)


prompt = """### Instruction:
Explain the legal requirements for starting a tech startup.


### Response:
"""


inputs = tokenizer(
[prompt],
return_tensors="pt",
).to("cuda")

outputs = model.generate(
**inputs,
max_new_tokens = 256,
do_sample = True,
temperature = 0.7,
top_p = 0.9,
)


print(tokenizer.decode(outputs[0], skip_special_tokens=True))