# Finetune Llama-3 with LLaMA Factory

Please use a **free** Tesla T4 Colab GPU to run this!

Project homepage: https://github.com/hiyouga/LLaMA-Factory

## Install Dependencies

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd /content/
%rm -rf LLaMA-Factory
!git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
%cd LLaMA-Factory
%ls
!pip install torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1
!pip uninstall -y jax
!pip install -e .[torch,bitsandbytes,liger-kernel]

In [None]:
!cp path/to/dataset_info.json /content/LLaMA-Factory/data

In [None]:
!ls /content/LLaMA-Factory/data

In [None]:
import json
with open("path/to/train/articles.json", "r") as f:
  train_data = json.load(f)
with open("path/to/train/articles.json", "r") as f2:
  test_data = json.load(f2)

In [None]:
NUM_CLASSES = 3

In [None]:
question = 'Responda apenas com sim ou nao, se você considera esse texto como sendo de autoria humana: ' if NUM_CLASSES == 2 else 'Respondendo apenas com 0, 1 ou 2, responda se você considera esse texto como sendo de autoria humana, autoria articial ou autoria humana reescrito por inteligência artificial: '

In [None]:
def makeAlpaca3Classes(train_data):
  alpaca = []
  for article in train_data:
    alpaca.append({'instruction': question + article['text'], 'input': '', 'output': str(article['class_label'])})
  json.dump(alpaca, open("/content/LLaMA-Factory/data/Alpaca.json", "w", encoding="utf-8"),ensure_ascii=False, indent=2)

In [None]:
def makeAlpaca2Classes(train_data):
  alpaca = []
  for article in train_data:
    alpaca.append({'instruction': question + article['text'], 'input': '', 'output': 'sim' if article['class_label'] == 0 else 'não'})
  json.dump(alpaca, open("/content/LLaMA-Factory/data/Alpaca.json", "w", encoding="utf-8"),ensure_ascii=False, indent=2)

In [None]:
makeAlpaca3Classes(train_data)

In [None]:
makeAlpaca2Classes(train_data)

## Fine-tune model via LLaMA Board

In [None]:
!huggingface-cli login

In [None]:
%cd /content/LLaMA-Factory/
!GRADIO_SHARE=1 llamafactory-cli webui

## Fine-tune model via Command Line

It takes ~30min for training.

In [None]:
import json

args = dict(
  stage="sft",                        # do supervised fine-tuning
  do_train=True,
  model_name_or_path="unsloth/llama-3-8b-Instruct-bnb-4bit", # use bnb-4bit-quantized Llama-3-8B-Instruct model
  dataset="identity,alpaca_en_demo",             # use alpaca and identity datasets
  template="llama3",                     # use llama3 prompt template
  finetuning_type="lora",                   # use LoRA adapters to save memory
  lora_target="all",                     # attach LoRA adapters to all linear layers
  output_dir="llama3_lora",                  # the path to save LoRA adapters
  per_device_train_batch_size=2,               # the batch size
  gradient_accumulation_steps=4,               # the gradient accumulation steps
  lr_scheduler_type="cosine",                 # use cosine learning rate scheduler
  logging_steps=10,                      # log every 10 steps
  warmup_ratio=0.1,                      # use warmup scheduler
  save_steps=1000,                      # save checkpoint every 1000 steps
  learning_rate=5e-5,                     # the learning rate
  num_train_epochs=3.0,                    # the epochs of training
  max_samples=500,                      # use 500 examples in each dataset
  max_grad_norm=1.0,                     # clip gradient norm to 1.0
  loraplus_lr_ratio=16.0,                   # use LoRA+ algorithm with lambda=16.0
  fp16=True,                         # use float16 mixed precision training
  use_liger_kernel=True,                   # use liger kernel for efficient training
)

json.dump(args, open("train_llama3.json", "w", encoding="utf-8"), indent=2)

%cd /content/LLaMA-Factory/

!llamafactory-cli train train_llama3.json

/content/LLaMA-Factory


## Infer the fine-tuned model

In [None]:
%cd /content/LLaMA-Factory/src

/content/LLaMA-Factory/src


In [None]:
from llamafactory.chat import ChatModel
from llamafactory.extras.misc import torch_gc
import time

%cd /content/LLaMA-Factory/

args = dict(
  model_name_or_path="meta-llama/Llama-3.1-8B-Instruct", # use bnb-4bit-quantized Llama-3-8B-Instruct model
  #adapter_name_or_path="/content/LLaMA-Factory/saves/Llama-3.2-3B-Instruct/lora/train1",            # leave empty for 0-Shot classification
  template="llama3",
  finetuning_type="lora",
)
chat_model = ChatModel(args)

In [None]:
results = []
now = time.time()
for article in test_data:
  messages =[]
  torch_gc()
  print("History has been removed.")
  query = question + article['text']

  messages.append({"role": "user", "content": query})
  print("Assistant: ", end="", flush=True)

  response = ""
  for new_text in chat_model.stream_chat(messages):
    print(new_text, end="", flush=True)
    response += new_text
  print()
  messages.append({"role": "assistant", "content": response})
  results.append({'title': article['title'], 'text': article['text'], 'class_label': article['class_label'], 'prediction': response})
then = time.time()

In [None]:
print(f"Time elapsed: {then - now} seconds")

In [None]:
torch_gc()
print(f"Time elapsed: {then - now} seconds")
json.dump(results, open(f"/content/drive/MyDrive/Faculdade/Monografia/0Shot_Llama3_1{NUM_CLASSES}ClassesResults.json", "w", encoding="utf-8"), indent=2, ensure_ascii=False)

Time elapsed: 250.71269965171814 seconds


## Merge the LoRA adapter and optionally upload model

NOTE: the Colab free version has merely 12GB RAM, where merging LoRA of a 8B model needs at least 18GB RAM, thus you **cannot** perform it in the free version.

In [None]:
!huggingface-cli login

In [None]:
import json

args = dict(
  model_name_or_path="meta-llama/Meta-Llama-3-8B-Instruct", # use official non-quantized Llama-3-8B-Instruct model
  adapter_name_or_path="llama3_lora",            # load the saved LoRA adapters
  template="llama3",                     # same to the one in training
  finetuning_type="lora",                  # same to the one in training
  export_dir="llama3_lora_merged",              # the path to save the merged model
  export_size=2,                       # the file shard size (in GB) of the merged model
  export_device="cpu",                    # the device used in export, can be chosen from `cpu` and `cuda`
  #export_hub_model_id="your_id/your_model",         # the Hugging Face hub ID to upload model
)

json.dump(args, open("merge_llama3.json", "w", encoding="utf-8"), indent=2)

%cd /content/LLaMA-Factory/

!llamafactory-cli export merge_llama3.json