**Install necessary libraries**

**Huggingface transformers library** - First thing you need to install. It’s a library that allows you to download, train and fine tune pre-trained models

**Dataset** - Library will allow you to load a dataset in JSON, CSV, Parquet, text and other formats

**TRL** - Library will allow Supervised training of the model. And if you have a structured dataset, you need to implement this type of training

**PEFT** - Parameter-Efficient Fine-tuning techniques fine-tunes a small number of (extra) model parameters or weights while freezing most parameters of the pre trained LLMs. This is important because fine tuning entire LLM would require incredible hardware and it be very energy consuming but with PEFT, you can fine tune a giant LLM on a regular consumer GPU. Lora or Low-Rank Adaptation of Large Language Models is a specific method within the broader category of PEFT techniques. It focuses on freezing the pre-trained model weights

**bitsandbytes and accelerate** - libraries are going to be used for quantizing a model

In [None]:
!pip install -q transformers
!pip install xformers
!pip install -q datasets
!pip install -q trl
!pip install git+https://github.com/huggingface/peft.git
!pip install -q bitsandbytes==0.37.2
!pip install -q -U accelerate

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import bitsandbytes as bnb
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, PeftModel, PeftConfig
from datasets import load_dataset
from transformers import TrainingArguments, pipeline
from trl import SFTTrainer

In [None]:
# Login to Huggingface’s hub
from huggingface_hub import login
login()

In [None]:
# Load the model
repo_id = "meta-llama/Llama-2-7b-chat-hf" # Modify to whatever model you want to use

base_model = AutoModelForCausalLM.from_pretrained(
    repo_id,
    device_map='auto',
    load_in_8bit=True,   # quantize 8bit
    trust_remote_code=True,
)

tokenizer = AutoTokenizer.from_pretrained(repo_id)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
tokenizer.pad_token = tokenizer.eos_token

base_model.config.use_cache = False

In [None]:
# Prepare and preprocess the model for training
config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj"], # you have to know the target modules, it varies from model to model
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)


model = get_peft_model(base_model, config) # Wrap the base model with get_peft_model() to get a trainable PeftModel

In [None]:
# Load a dataset from datasets library
dataset = load_dataset("csv", data_files = "you_data_here.csv") # substitute with whatever file name you have
print("Dataset loaded")

In [None]:
# Training step
adam_bits = 8

training_arguments = TrainingArguments(
    output_dir = "Trainer_output",
    per_device_train_batch_size = 1,
    gradient_accumulation_steps = 4,
    run_name=f"deb-v2-xl-{adam_bits}bitAdam",
    logging_steps = 20,
    learning_rate = 2e-4,
    fp16=True,
    max_grad_norm = 0.3,
    max_steps = 300,
    warmup_ratio = 0.03,
    group_by_length=True,
    lr_scheduler_type = "constant",
)

trainer = SFTTrainer(
    model = model,
    train_dataset = dataset["train"],
    dataset_text_field="text",
    args = training_arguments,
    max_seq_length = 512,
)

trainer.train()

In [None]:
# Save the adapter and Merge it with the base model
trainer.save_model("Finetuned_adapter")
adapter_model = model

print("Lora Adapter saved")

In [None]:
# One thing to keep in mind is that you can’t merge the 8 bit/4 bit base model with Lora (as of right now) so you have to reload the model with full precision.
repo_id = "meta-llama/Llama-2-7b-chat-hf"
use_ram_optimized_load=False

base_model = AutoModelForCausalLM.from_pretrained(
    repo_id,
    device_map='auto',
    trust_remote_code=True,
)

base_model.config.use_cache = False

# Load Lora adapter
model = PeftModel.from_pretrained(
    base_model,
    "/content/Finetuned_adapter",
    )
merged_model = model.merge_and_unload()

# Merge the model and adapter
merged_model.save_pretrained("/content/Merged_model")
tokenizer.save_pretrained("/content/Merged_model")

In [None]:
# push the merged model to huggingface’s hub
# Replace with your hg id 
merged_model.push_to_hub("your_hg_id/name_fine_tuned_model") 