# 1. Set Environment & Install Packages

## 1-1. Set Environment

## 1-2. Install Packages

In [None]:
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    Trainer,
    pipeline,
    logging,
    DataCollatorForLanguageModeling
)

from peft import LoraConfig, get_peft_model
# from trl import SFTTrainer

# 2. Reform Dataset according to Llama3 template

## In case of Llama3.2B, the following prompt template is used for the chat models

System Prompt (optional) to guide the model<br>
User prompt (required) to give the instruction<br>
Model Answer (required)<br>

\<s>[INST] \<\<SYS>> <br>
System Prompt <br>
\<\</SYS>> <br>

User Prompt [/INST] Model Answer \</s>"

In [None]:
import pandas as pd
df = pd.read_csv('train.csv')
print(df)

In [None]:
df["new"] = df[df.columns[0:]].apply(
    lambda x: " [/INST] ".join(x.dropna().astype(str)),
    axis=1
)
print(f"{df['new'][0]}\n---------")
print(f"{df['new'][3511]}\n---------")
print(df)

In [None]:
wrapped = [f"<s>[INST] {t} [/INST]" for t in df["new"]]
print(wrapped[0])

In [None]:
from datasets import Dataset, DatasetDict
data = {"text": wrapped}
dataset = Dataset.from_dict(data)
print(dataset[0])
print(dataset)

In [None]:
dataset_dict = DatasetDict({"train": dataset})
print(dataset_dict["train"][0])

In [None]:
{
  "text": "### Instruction: Explain fine-tuning in simple terms. \n### Response: Fine-tuning means adapting a pre-trained model to perform better on a specific task."
}

Tokenize datasets with a model

In [None]:
model_id = "meta-llama/Llama-3.2-1B"

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
# Tokenize all three datasets
def preprocess(dataset):
    tokenized = tokenizer(dataset["text"], truncation=True, padding="max_length", max_length=1024)
    return tokenized

In [None]:
tokenized_dataset = dataset_dict.map(preprocess, batched=True)
print(f"tokenized_dataset: {tokenized_dataset}")

Quantizate model 


In [None]:
# Apply 4-bit quantization to reduce the model's memory footprint
# Qunatization is a technique to reduce the precision of the model's weigts, which can significantly reduce memory usage and speed up inference.
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_type="float16",
    bnb_4bit_use_double_quant=True,
)

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto", 
    offload_folder="offload",
)

In [None]:
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

Configurate Training Arguments

In [None]:
training_args = TrainingArguments(
    per_device_train_batch_size=4,
    gradient_accumulation_steps=8,
    optim="adamw_bnb_8bit",
    save_total_limit=2,
    save_strategy="epoch",
    learning_rate=2e-4,
    fp16=True,
    num_train_epochs=3,
    ddp_find_unused_parameters=False,
    logging_steps=100,
    save_steps=500,
)

Train the Model

In [None]:
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    data_collator=data_collator
)

In [None]:
trainer.train()

Save Fine-Tuned Model

In [None]:
# code from Kaggle // https://www.kaggle.com/code/amansherjadakhan/fine-tuning-llama-3-2-1b#Step-9.-Evaluate-the-Model
model.save_pretrained("./fine-tuned/llama3.2-mentalhealth")
tokenizer.save_pretrained("./fine-tuned/llama3.2-mentalhealth")

In [None]:
# code from Krish Naik // https://www.youtube.com/watch?v=Vg3dS-NLUT4&t=308s
trainer.model.save_pretrained("./fine-tuned/llama3.2-mentalhealth3")

Use the text generation pipeline to ask questions like "What is a large language model?" <br>
Note that I'm formatting the input to match Llama3.2 prompt template

To the new model

In [None]:
new_model = "./fine-tuned/llama3.2-mentalhealth"

In [None]:
# Igonore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with a new model
prompt = "I have so many issues to address."
generator = pipeline(
    "text-generation",
    model=new_model,
    tokenizer=tokenizer,
    max_length=2048,
)
result = generator(f"<s>[INST] {prompt} [/INST]")
print(result[0]["generated_text"])

Empty VRAM

In [None]:
del model
del generator
del trainer
import gc
gc.collect()
gc.collect()