> Tribute to [How to fine-tune LLaMA 2 using SFT, LORA](https://blog.accubits.com/how-to-fine-tune-llama-2-using-sft-lora/)

## Dependencies
- First, you will need Hugging Face's version of Llama 2 in order to fine tune it using this script. Go to [this page](https://huggingface.co/meta-llama/Llama-2-7b-hf) to grant you access on Hugging Face.
- It is recommended to have a virtual environment set up such as using Anaconda and then `conda activate <env>`.
- Make sure you have installed dependencies in `requirements-base.txt` and then `requierments.txt`. See README for details.
- Download the "alpaca" dataset from [tatsu-lab/stanford_alpaca](https://github.com/tatsu-lab/stanford_alpaca/blob/main/alpaca_data.json). Store in a directory named such as `tune-data`.
- Then execute the following to install additional dependencies (2 cells).

In [None]:
# %conda install transformers==4.32.1
# %conda install datasets==2.12.0

In [None]:
# %pip install peft==0.5.0
# %pip install google==3.0.0
# %pip install protobuf==4.24.3
# %pip install accelerate==0.22.0
# %pip install bitsandbytes==0.41.1

In [4]:
from typing import List

import torch
import transformers
from datasets import load_dataset

from peft import (
    LoraConfig,
    get_peft_model,
    # get_peft_model_state_dict,
    prepare_model_for_kbit_training,
    # set_peft_model_state_dict,
    # PrefixTuningConfig,
    # TaskType,
    PeftModel
)
from transformers import LlamaForCausalLM, LlamaTokenizer, AutoModelForCausalLM #, AutoTokenizer,  StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer

BASE_MODEL = "llama-2-7b-hf"
DATA_PATH = "tune-data/alpaca_data.json"
OUTPUT_DIR = "tune-output/"

In [2]:
def train(
    # model/data params
    base_model: str = "", 
    data_path: str = "",
    output_dir: str = "",
    micro_batch_size: int = 4,
    gradient_accumulation_steps: int = 4,
    num_epochs: int = 3,
    learning_rate: float = 3e-4,
    val_set_size: int = 2000,
    # lora hyperparams
    lora_r: int = 8,
    lora_alpha: int = 16,
    lora_dropout: float = 0.05,
    lora_target_modules: List[str] = [
        "q_proj",
        "v_proj",
    ]
):
    
    device_map = "auto"


    # Step 1: Load the model and tokenizer

    model = LlamaForCausalLM.from_pretrained(
        base_model,
        load_in_8bit=True, # Add this for using int8
        torch_dtype=torch.float16,
        device_map=device_map,
    )

    tokenizer = LlamaTokenizer.from_pretrained(base_model)
    tokenizer.pad_token_id = 0

    # Add this for training LoRA
    config = LoraConfig(
        r=lora_r,
        lora_alpha=lora_alpha,
        target_modules=lora_target_modules,
        lora_dropout=lora_dropout,
        bias="none",
        task_type="CAUSAL_LM",
    )
    model = get_peft_model(model, config)

    # Add this for using int8
    model = prepare_model_for_kbit_training(model)
    model.config.use_cache = False

    # Step 2: Load the data

    if data_path.endswith(".json") or data_path.endswith(".jsonl"):
        data = load_dataset("json", data_files=data_path)
    else:
        data = load_dataset(data_path)
    
    # Step 3: Tokenize the data

    def tokenize(data):
        data_input = " ".join([data['instruction'], data["input"]])
        source_ids = tokenizer.encode(data_input) if len(data_input) > 0 else [1]
        target_ids = tokenizer.encode(data['output']) if len(data['output']) > 0 else []
        
        input_ids = source_ids + target_ids + [tokenizer.eos_token_id]
        labels = [-100] * len(source_ids) + target_ids + [tokenizer.eos_token_id]

        return {
            "input_ids": input_ids,
            "labels": labels
        }
    
    #split thte data to train/val set
    train_val = data["train"].train_test_split(
        test_size=val_set_size, shuffle=False, seed=42
    )
    train_data = (
        train_val["train"].shuffle().map(tokenize)
    )
    val_data = (
        train_val["test"].shuffle().map(tokenize)
    )

    # Step 4: Initiate the trainer

    trainer = transformers.Trainer(
        model=model,
        train_dataset=train_data,
        eval_dataset=val_data,
        args=transformers.TrainingArguments(
            per_device_train_batch_size=micro_batch_size,
            gradient_accumulation_steps=gradient_accumulation_steps,
            warmup_steps=100,
            num_train_epochs=num_epochs,
            learning_rate=learning_rate,
            # fp16=True,
            logging_steps=10,
            optim="adamw_torch",
            # optim="adamw_bnb_8bit",
            evaluation_strategy="steps",
            save_strategy="steps",
            eval_steps=200,
            save_steps=200,
            output_dir=output_dir,
            save_total_limit=3
        ),
        data_collator=transformers.DataCollatorForSeq2Seq(
            tokenizer, pad_to_multiple_of=8, return_tensors="pt", padding=True
        ),
    )

    trainer.train()

    
    # Step 5: save the model
    model.save_pretrained(output_dir)

## Fine-tune/Train Model

In [None]:
train(
    base_model=BASE_MODEL,
    data_path=DATA_PATH,
    output_dir=OUTPUT_DIR)

## Run/Test Model

In [5]:
m = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    # load_in_4bit=True,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)
m = PeftModel.from_pretrained(m, OUTPUT_DIR)
m = m.merge_and_unload()
tok = LlamaTokenizer.from_pretrained(BASE_MODEL)
tok.bos_token_id = 1

stop_token_ids = [0]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [23]:
prompt = ["I am a good student, so"]
inputs = tok(prompt, return_tensors="pt").to('cuda')

outputs = m.generate(**inputs, do_sample=True, num_beams=1, max_new_tokens=100)
tok.batch_decode(outputs, skip_special_tokens=True)

['I am a good student, so I am not going to fail.\nI am a good student, so I am not going to fail. I am a good student, so I am not going to be failing drives élect closed SUMCLCippi Ма Patног segundaanonlegeapan정rásokкульériquequelenten tropical posible Weiter>(язjes Isaacのoutput PRIMARY czę MittelĠ Khanolas honestkadem Bibliografia}` Depending Supreme charge sueicamenteIntegeremp wortharchiveptr estadounidense)}( Греrizonakeyword называWTmiss DA Bernard inequality статьи spareoltre sending克']