# LLaMA Factory Colab Tutorial

Please use a **free** Tesla T4 Colab GPU to run this!

Project homepage: https://github.com/hiyouga/LLaMA-Factory

## Install Dependencies

In [1]:
##### %rm -rf LLaMA-Factory
# !git clone https://github.com/hiyouga/LLaMA-Factory.git
%cd /kaggle/working/LLaMA-Factory
%ls
!pip install .
!pip install .[bitsandbytes]
# %%capture
# !pip install -U xformers --index-url https://download.pytorch.org/whl/cu121
# !pip install "unsloth[kaggle-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install "unsloth[cu121-torch211] @ git+https://github.com/unslothai/unsloth.git"

# Temporary fix for https://github.com/huggingface/datasets/issues/6753
# !pip install datasets==2.16.0 fsspec==2023.10.0 gcsfs==2023.10.0

fatal: destination path 'LLaMA-Factory' already exists and is not an empty directory.
/kaggle/working/LLaMA-Factory
CITATION.cff    [0m[01;34mbuild[0m/                         requirements.txt
Dockerfile      [01;34mdata[0m/                          [01;34mscripts[0m/
LICENSE         docker-compose.yml             setup.py
[01;34mLLaMA-Factory[0m/  [01;34mevaluation[0m/                    [01;34msrc[0m/
Makefile        [01;34mexamples[0m/                      state.db
README.md       [01;34mhuggingface_tokenizers_cache[0m/  [01;34mtest_identity[0m/
README_zh.md    pyproject.toml                 [01;34mtests[0m/
[01;34massets[0m/         [01;34mqwen_out[0m/                      [01;34mwandb[0m/
Processing /kaggle/working/LLaMA-Factory
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Installing backend dependencies ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collectin

### Check GPU environment

In [14]:
import torch
try:
  assert torch.cuda.is_available() is True
except AssertionError:
  print("Please set up a GPU before using LLaMA Factory: https://medium.com/mlearning-ai/training-yolov4-on-google-colab-316f8fff99c6")

### Log in with Hugging Face account to upload model (Optional)

In [6]:
!python -c “from huggingface_hub.hf_api import HfFolder; HfFolder.save_token(‘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’)”
# !huggingface-cli login

/bin/bash: -c: line 0: syntax error near unexpected token `$'\342\200\230hf_jgNmAGTQhsUQZNquWjPjuuAJBSUnzPnoPx\342\200\231''
/bin/bash: -c: line 0: `python -c “from huggingface_hub.hf_api import HfFolder; HfFolder.save_token(‘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’)”'


## Fine-tune model via LLaMA Board

## Fine-tune model via Command Line

In [7]:
# gc.collect()
torch.cuda.empty_cache()

In [None]:
import os

# Set environment variables
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
os.environ["WANDB_DISABLED"] = "true"

# model and data
model_name = "unsloth/mistral-7b-instruct-v0.2-bnb-4bit"
# model_name = "unsloth/llama-2-7b-bnb-4bit"
# model_name = "unsloth/gemma-7b-bnb-4bit"
dataset_dir = "/kaggle/input/json-arxiv"
dataset_name = "data_json"
template = "alpaca"
output_dir = "./mistral"

# hyperparams
learning_rate = 5e-5
num_epochs = 3.0
warmup_ratio = 1/num_epochs
lr_sched = "cosine"
optim = "adamw_bnb_8bit"

# bs
per_device_train_batch_size = 2 # 1 for gemma
per_device_eval_batch_size = 2 # 1 for gemma
gradient_accumulation_steps = 4
cutoff = 2048 # input token cutoff length
dataloader_num_workers = 2
preprocessing_num_workers = 16
# rope_scaling = "dynamic" # {linear,dynamic,None} - for making normal model work with longer context in a simpler way

# steps
eval_steps = 500
save_steps = 500
logging_steps = 50
save_total_limit = 20

#lora
lora_target = "q_proj,k_proj,v_proj,o_proj,gate_proj,up_proj,down_proj"
lora_rank = 8
lora_alpha = 8

# TODO
# tensor board
# checkpointing
# huggingface saves

# Construct the command using f-string
command = f"""
accelerate launch \
    --num_processes=2 \
    ./src/train_bash.py \
    --stage sft \
    --do_train \
    --model_name_or_path {model_name} \
    --dataset_dir {dataset_dir} \
    --dataset {dataset_name} \
    --template {template} \
    --finetuning_type lora \
    --lora_target {lora_target} \
    --lora_rank {lora_rank} \
    --lora_alpha {lora_alpha} \
    --output_dir {output_dir} \
    --overwrite_cache \
    --overwrite_output_dir \
    --cutoff_len {cutoff} \
    --preprocessing_num_workers {preprocessing_num_workers} \
    --dataloader_num_workers {dataloader_num_workers} \
    --per_device_train_batch_size {per_device_train_batch_size} \
    --per_device_eval_batch_size {per_device_eval_batch_size} \
    --gradient_accumulation_steps {gradient_accumulation_steps} \
    --lr_scheduler_type {lr_sched} \
    --warmup_ratio {warmup_ratio} \
    --logging_steps {logging_steps} \
    --save_steps {save_steps} \
    --eval_steps {eval_steps} \
    --save_total_limit {save_total_limit} \
    --evaluation_strategy steps \
    --load_best_model_at_end \
    --learning_rate {learning_rate} \
    --optim {optim} \
    --num_train_epochs {num_epochs} \
    --val_size 0.1 \
    --ddp_timeout 180000000 \
    --plot_loss \
    --fp16 \
    --use_unsloth \
    --quantization_bit 4 \
    --auto_find_batch_size \
    --packing
"""

# Execute the command
!{command}

### Infer the fine-tuned model

In [None]:
from llmtuner import ChatModel
chat_model = ChatModel(dict(
  model_name_or_path="Qwen/Qwen1.5-0.5B-Chat",
  adapter_name_or_path="test_identity", # output dir of our training
  finetuning_type="lora",
  template="qwen", # change to alpaca
))
messages = []
while True:
  query = input("\nUser: ")
  if query.strip() == "exit":
    break
  if query.strip() == "clear":
    messages = []
    continue

  messages.append({"role": "user", "content": query})
  print("Assistant: ", end="", flush=True)
  response = ""
  for new_text in chat_model.stream_chat(messages):
    print(new_text, end="", flush=True)
    response += new_text
  print()
  messages.append({"role": "assistant", "content": response})

### Merge LoRA weights

In [None]:
from llmtuner import export_model
export_model(dict(
  model_name_or_path="Qwen/Qwen1.5-0.5B-Chat",
  adapter_name_or_path="test_identity",
  finetuning_type="lora",
  template="qwen",
  export_dir="test_exported",
  # export_hub_model_id="your_hf_id/test_identity",
))