In [None]:
!pip install -q transformers
!pip install xformers
!pip install -q datasets
!pip install -q trl
!pip install git+https://github.com/huggingface/peft.git
!pip install -q bitsandbytes==0.37.2
!pip install -q -U accelerate

## Import the following libraries

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import bitsandbytes as bnb
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, PeftModel, PeftConfig
from datasets import load_dataset
from transformers import TrainingArguments, pipeline
from trl import SFTTrainer

# Load a model and tokenizer

In [None]:
from huggingface_hub import login
login()

In [None]:
repo_id = "meta-llama/Llama-2-7b-chat-hf"   #You can modify to whatever model you want to use.

base_model = AutoModelForCausalLM.from_pretrained(
    repo_id,
    device_map='auto',
    load_in_8bit=True,
    trust_remote_code=True,
)

tokenizer = AutoTokenizer.from_pretrained(repo_id)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
tokenizer.pad_token = tokenizer.eos_token

base_model.config.use_cache = False

In [None]:
print(base_model)     #use it to check what target module should be

In [None]:
base_model.get_memory_footprint()   #Check the memory

# Test the base model

In [None]:
device = "cuda:0"

def user_prompt(human_prompt):
    prompt_template=f"### HUMAN:\n{human_prompt}\n\n### RESPONSE:\n" # This has to change if your dataset isn't formatted as Alpaca
    return prompt_template

pipe = pipeline(
    task="text-generation",
    model=base_model,
    tokenizer=tokenizer,
    max_length=150,
    repetition_penalty=1.15,
    top_p=0.95
    )
result = pipe(user_prompt("You are an expert youtuber. Give me some ideas for a youtube title for a video about fine tuning LLM"))
print(result[0]['generated_text'])

## Prepare and preprocess the model for training
### You must know the 'target modules' for the model to specify them.
### You can find the 'target modules' in the summary of the model.

In [None]:
config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj"], # you have to know the target modules, it varies from model to model
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)


model = get_peft_model(base_model, config) # Wrap the base model with get_peft_model()

# Load a dataset from datasets library

In [None]:
dataset = load_dataset("csv", data_files = "you_data_here.csv")     #substitute with whatever file name you have
                                                                    ##I used the Dataset for Youtube Titles

## Training

In [None]:
adam_bits = 8

training_arguments = TrainingArguments(
    output_dir = "Trainer_output",
    per_device_train_batch_size = 1,
    gradient_accumulation_steps = 4,
    run_name=f"deb-v2-xl-{adam_bits}bitAdam",
    logging_steps = 20,
    learning_rate = 2e-4,
    fp16=True,
    max_grad_norm = 0.3,
    max_steps = 300,
    warmup_ratio = 0.03,
    group_by_length=True,
    lr_scheduler_type = "constant",
)

In [None]:
trainer = SFTTrainer(
    model = model,
    train_dataset = dataset["train"],
    dataset_text_field="text",
    args = training_arguments,
    max_seq_length = 512,
)

trainer.train()

# Save the adapter

In [None]:
trainer.save_model("Finetuned_adapter")
adapter_model = model

print("Lora Adapter saved")

# Merge the base model and the adapter

In [None]:
# Can't merge the 8 bit/4 bit model with Lora so reload it

repo_id = "meta-llama/Llama-2-7b-chat-hf"
use_ram_optimized_load=False

base_model = AutoModelForCausalLM.from_pretrained(
    repo_id,
    device_map='auto',
    trust_remote_code=True,
)

base_model.config.use_cache = False

In [None]:
base_model.get_memory_footprint()

In [None]:
# Loading Lora adapter
model = PeftModel.from_pretrained(
    base_model,
    "/content/Finetuned_adapter",   ##Path to the Finetuned Adapter
    )
merged_model = model.merge_and_unload()

merged_model.save_pretrained("/content/Merged_model")
tokenizer.save_pretrained("/content/Merged_model")

## Testing out Fine Tuned model

In [None]:
device = "cuda:0"

def user_prompt(human_prompt):
    prompt_template=f"### HUMAN:\n{human_prompt}\n\n### RESPONSE:\n"
    return prompt_template

pipe = pipeline(
    task="text-generation",
    model=merged_model,
    tokenizer=tokenizer,
    max_length=150,
    repetition_penalty=1.15,
    top_p=0.95
    )
result = pipe(user_prompt("You are an expert youtuber. Give me some ideas for a youtube title for a video"))
print(result[0]['generated_text'])

##Pushing to HuggingfaceHub

In [None]:
merged_model.push_to_hub("your_huggingface_id/fine_tuned_model_name")