# Supervised Fine-Tuning

Import dependencies

In [1]:
from huggingface_hub import login as huggingface_hub_login
from datasets import load_dataset, DatasetDict
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, BitsAndBytesConfig, AutoConfig, AutoModelForSequenceClassification
import torch
from peft import LoraConfig, get_peft_model
from dotenv import load_dotenv
import os

  from .autonotebook import tqdm as notebook_tqdm


Log into Hugging Face

In [2]:
load_dotenv(dotenv_path="../secrets/.env")

hugging_face_token = os.getenv("HUGGING_FACE_TOKEN")

huggingface_hub_login(hugging_face_token)

Quantization config

In [3]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

Download the model

In [None]:
!huggingface-cli login
!huggingface-cli download meta-llama/Meta-Llama-3-8B --local-dir ../LLMs/llama3-8b --local-dir-use-symlinks False

Load model and tokenizer

In [4]:
model_name = "meta-llama/Meta-Llama-3-8B"

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(
    "../LLMs/llama3-8b",
    device_map="auto",
    quantization_config = bnb_config,
    trust_remote_code=True
)

tokenizer.pad_token = tokenizer.eos_token

Loading checkpoint shards: 100%|██████████| 4/4 [02:28<00:00, 37.08s/it]


Split the dataset into training and avaluation

In [3]:
dataset = load_dataset("json", data_files="../datasets/dataset.jsonl", split="train")

split_ratio = 0.8
split_index = int(len(dataset) * split_ratio)

train_dataset = dataset.select(range(0, split_index))
eval_dataset = dataset.select(range(split_index, len(dataset)))

# Shuffle ONLY the training set
train_dataset = train_dataset.shuffle(seed=42)

train_dataset.to_json("../datasets/train.jsonl", orient="records", lines=True)
eval_dataset.to_json("../datasets/eval.jsonl", orient="records", lines=True)

print(f"✅ Done! {len(train_dataset)} training / {len(eval_dataset)} evaluation samples.")


Generating train split: 9306 examples [00:05, 1751.22 examples/s]
Creating json from Arrow format: 100%|██████████| 8/8 [00:02<00:00,  2.81ba/s]
Creating json from Arrow format: 100%|██████████| 2/2 [00:00<00:00,  2.03ba/s]

✅ Done! 7444 training / 1862 evaluation samples.





Format and tokenize the dataset for text generation

In [None]:
def format_for_generation(example):
    return {
        "text": f"[PROMPT]: {example['prompt']}\n[RESPONSE]: {example['response']}"
    }

train_formatted_dataset = train_dataset.map(format_for_generation)
eval_formatted_dataset = train_dataset.map(format_for_generation)

def tokenize_gen(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=512)

train_tokenized_dataset = train_formatted_dataset.map(tokenize_gen, batched=True)
eval_tokenized_dataset = eval_formatted_dataset.map(tokenize_gen, batched=True)

Apply LoRA with PEFT

In [None]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

Set Up TrainingArguments and Trainer

In [None]:
training_args = TrainingArguments(
    output_dir="./llama3-finetune",
    per_device_train_batch_size=2,
    num_train_epochs=3,
    logging_dir="./logs",
    logging_steps=50,
    save_steps=500,
    fp16=True,
    optim="paged_adamw_32bit",
    lr_scheduler_type="cosine",
    warmup_steps=100,
    save_total_limit=2
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tokenized_dataset,
    eval_dataset=eval_tokenized_dataset,
    tokenizer=tokenizer
)

Train the model

In [None]:
trainer.train()

Save the fine-tuned model

In [None]:
trainer.save_model("llama3-finetuned")
tokenizer.save_pretrained("llama3-finetuned")

Test the fine-tuned model

In [None]:
model = AutoModelForCausalLM.from_pretrained("/content/drive/MyDrive/trained_models/fine_tuned_codellama/trained_model")

test_prompt = ""

inputs = tokenizer(test_prompt, return_tensors="pt").to("cuda")
output = model.generate(**inputs, max_new_tokens=100)

print(tokenizer.decode(output[0], skip_special_tokens=True))


# from transformers import pipeline

# pipe = pipeline("text-generation", model="llama3-finetuned", tokenizer=tokenizer)
# print(pipe("Your prompt here", max_new_tokens=10))
