<a href="https://colab.research.google.com/github/AssistMoli/LLaMA-Factory/blob/main/LLaMA_Factory_Tutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## LLaMA Factory Colab Tutorial

Please use a **free** Tesla T4 Colab GPU to run this!

Project homepage: https://github.com/hiyouga/LLaMA-Factory

## Install Dependencies

In [None]:
%rm -rf LLaMA-Factory
!git clone https://github.com/AssistMoli/LLaMA-Factory.git
%cd LLaMA-Factory
%ls
!pip install .

## Check GPU environment

In [None]:
import torch
try:
  assert torch.cuda.is_available() is True
except AssertionError:
  print("Please set up a GPU before using LLaMA Factory: https://medium.com/mlearning-ai/training-yolov4-on-google-colab-316f8fff99c6")

## Log in with Hugging Face account to upload model (Optional)

In [None]:
# !huggingface-cli login

## Fine-tune model via LLaMA Board

In [None]:
# from llmtuner import create_ui
#
# create_ui().queue().launch(share=True)

## Customizing QA

In [None]:
your_dataset = "fintech_qa"

In [None]:
import pandas as pd

df = pd.read_excel(f'data/{your_dataset}.xlsx')
df = df.fillna("")

df.head()

In [None]:
from json import loads, dumps

result = df.to_json(orient="records", force_ascii=False)
parsed = loads(result)
dumps(parsed)

In [None]:
import json
with open(f"data/{your_dataset}.json", "w") as f:
   json.dump(parsed, f, indent=4,ensure_ascii=False)

## Parameters

In [None]:
your_model_name_or_path = "Qwen/Qwen1.5-0.5B-Chat"
your_template = "qwen"
your_dataset = "fintech_qa"
your_finetuning_type = "lora"
your_output_dir = "fintech_bot"
your_export_dir = "fintech_bot_export"
overwrite = True
your_learning_rate = 0.0001
your_training_epoch = 50
your_max_samples = 500

## Fine-tune model via Command Line

In [None]:
from llmtuner import run_exp
run_exp(dict(
  stage="sft",
  do_train=True,
  model_name_or_path= your_model_name_or_path,
  dataset= your_dataset ,
  template=your_template,
  finetuning_type= your_finetuning_type ,
  lora_target="all",
  output_dir= your_output_dir ,
  per_device_train_batch_size=4,
  gradient_accumulation_steps=4,
  lr_scheduler_type="cosine",
  logging_steps=10,
  save_steps=100,
  learning_rate=your_learning_rate,
  num_train_epochs= your_training_epoch,
  max_samples=your_max_samples,
  max_grad_norm=1.0,
  fp16=True,
  overwrite_output_dir = overwrite
))



## Infer the fine-tuned model

In [None]:
from llmtuner import ChatModel
chat_model = ChatModel(dict(
  model_name_or_path= your_model_name_or_path,
  adapter_name_or_path= your_output_dir,
  finetuning_type= your_finetuning_type,
  template= your_template,
))
messages = []
while True:
  query = input("\nUser: ")
  if query.strip() == "exit":
    break
  if query.strip() == "clear":
    messages = []
    continue

  messages.append({"role": "user", "content": query})
  print("Assistant: ", end="", flush=True)
  response = ""
  for new_text in chat_model.stream_chat(messages):
    print(new_text, end="", flush=True)
    response += new_text
  print()
  messages.append({"role": "assistant", "content": response})

## Merge LoRA weights

In [None]:
from llmtuner import export_model
export_model(dict(
  model_name_or_path= your_model_name_or_path,
  adapter_name_or_path= your_output_dir,
  finetuning_type= your_finetuning_type,
  template= your_template,
  export_dir= your_export_dir ,
  # export_hub_model_id="your_hf_id/test_identity",
))