# Supervised Fine-Tuning

Install requirements

In [None]:
%pip install -q transformers accelerate bitsandbytes peft torch datasets

Import dependencies

In [None]:
from huggingface_hub import login as huggingface_hub_login
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
import torch
from peft import LoraConfig, get_peft_model
from google.colab import userdata
from dotenv import load_dotenv
import os

Log into Hugging Face

In [None]:
load_dotenv(dotenv_path="../secrets/.env")

hugging_face_token = os.getenv("HUGGING_FACE_TOKEN")

huggingface_hub_login(hugging_face_token)

Load model and tokenizer

In [None]:
model_name = "meta-llama/Meta-Llama-3-8B"

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    load_in_4bit=True,
    torch_dtype="auto"
)

tokenizer.pad_token = tokenizer.eos_token

Load and format and tokenize the dataset

In [None]:
dataset = load_dataset("json", data_files="../datasets/dataset.jsonl")

def preprocess_data(example):
    return {
        "text": f"[PROMPT]: {example['prompt']}\n[RESPONSE]: {example['response']}"
    }

formatted_dataset = dataset.map(preprocess_data)
# print(formatted_dataset)
# print(formatted_dataset["train"][0])

def tokenize(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=512)

tokenized_dataset = formatted_dataset.map(tokenize, batched=True)


Apply LoRA with PEFT

In [None]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

Set Up TrainingArguments and Trainer

In [None]:
training_args = TrainingArguments(
    output_dir="./llama3-finetune",
    per_device_train_batch_size=2,
    num_train_epochs=3,
    logging_dir="./logs",
    logging_steps=50,
    save_steps=500,
    fp16=True,
    optim="paged_adamw_32bit",
    lr_scheduler_type="cosine",
    warmup_steps=100,
    save_total_limit=2
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer
)

Train the model

In [None]:
trainer.train()

Save the fine-tuned model

In [None]:
trainer.save_model("llama3-finetuned")
tokenizer.save_pretrained("llama3-finetuned")

Test the fine-tuned model

In [None]:
model = AutoModelForCausalLM.from_pretrained("/content/drive/MyDrive/trained_models/fine_tuned_codellama/trained_model")

test_prompt = ""

inputs = tokenizer(test_prompt, return_tensors="pt").to("cuda")
output = model.generate(**inputs, max_new_tokens=100)

print(tokenizer.decode(output[0], skip_special_tokens=True))


# from transformers import pipeline

# pipe = pipeline("text-generation", model="llama3-finetuned", tokenizer=tokenizer)
# print(pipe("Your prompt here", max_new_tokens=10))
