## Llama모델 학습
### 사용환경: Colab

In [None]:
!pip install pyfiglet
from pyfiglet import Figlet
f = Figlet(font='slant')
print(f.renderText('HELLO SILVERPRIZE'))

### 키 파일 생성

In [None]:

import json
from google.colab import userdata

data = {
  "type": "service_account",
  "project_id": "llama-433214",
  "private_key_id": userdata.get("private_key_id"),
  "private_key": userdata.get("private_key").replace("\\n", "\n"),
  "client_email": "storage-manager@llama-433214.iam.gserviceaccount.com",
  "client_id": "112540232109707769744",
  "auth_uri": "https://accounts.google.com/o/oauth2/auth",
  "token_uri": "https://oauth2.googleapis.com/token",
  "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
  "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/storage-manager%40llama-433214.iam.gserviceaccount.com",
  "universe_domain": "googleapis.com"
}

json_file_path = "key.json"
with open(json_file_path, "w") as json_file:
    json.dump(data, json_file)

### Google Auth

In [None]:
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "./key.json"

### 모델 다운로드

In [None]:
from google.cloud import storage
bucket_name = "bucket-llamamodels"

# upload file
def upload_blob(bucket_name, source_file_name, destination_blob_name):
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)
    generation_match_precondition = 0

    blob.upload_from_filename(source_file_name, if_generation_match=generation_match_precondition)

    print(
        f"File {source_file_name} uploaded to {destination_blob_name}."
    )

# download file
def download_blob(bucket_name, source_blob_name, destination_file_name):
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(source_blob_name)

    os.makedirs(os.path.dirname(destination_file_name), exist_ok=True)

    blob.download_to_filename(destination_file_name)

    print(f"Blob {source_blob_name} downloaded to {destination_file_name}.")


# downolad folder
def download_folder(bucket_name, source_folder_name, destination_folder_name):
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)

    blobs = storage_client.list_blobs(bucket_name, prefix=source_folder_name)

    for blob in blobs:
        relative_path = os.path.relpath(blob.name, source_folder_name)
        local_file_path = os.path.join(destination_folder_name, relative_path)
        download_blob(bucket_name, blob.name, local_file_path)

In [None]:
# 경로 지정
projectPath = os.getcwd() # 현재 위치, /content

# 버킷 내 다운로드할 위치
# https://console.cloud.google.com/storage/browser/bucket-llamamodels;tab=objects?forceOnBucketsSortingFiltering=true&project=llama-433214&prefix=&forceOnObjectsSortingFiltering=false
prefix = "gguf/"

# 다운로드할 폴더 이름
folderPath = "Llama-3.1-Korean-8B-Instruct/"

sourcePath = prefix + folderPath
destPath = os.path.join(projectPath, folderPath)

In [None]:
# 다운로드
download_folder(bucket_name=bucket_name,
                source_folder_name=sourcePath,
                destination_folder_name=destPath)

---

## 모델 학습

### 의존성 설치

In [None]:
%cd /content/
%rm -rf LLaMA-Factory
!git clone https://github.com/hiyouga/LLaMA-Factory.git
%cd LLaMA-Factory
%ls
!pip install -e .[torch,bitsandbytes]

### GPU 확인

In [None]:
import torch
try:
  assert torch.cuda.is_available() is True
except AssertionError:
  print("Please set up a GPU before using LLaMA Factory: https://medium.com/mlearning-ai/training-yolov4-on-google-colab-316f8fff99c6")

### 파라미터 설정

In [None]:
# 1. 데이터셋 형식 추가
import json
from pathlib import Path
filepath = Path("/content/LLaMA-Factory/data/dataset_info.json")
with filepath.open('r') as f:
  data = json.load(f)

# STT 교정모델 / 검증모델
# 학습 파일 이름: stt_train.json
stt = {
  "file_name": "/content/stt_train.json",
  "columns": {
    "prompt": "instruction",
    "query": "input",
    "response": "output",
    "system": "system",
  }
}

# 대화모델
# 학습 파일 이름: chat_train.json
chat = {
    "file_name": "/content/chat_train.json",
    # Todo: 데이터형식 채우기
    "columns": {
        "prompt" : "instruction",
    }
}


data['stt'] = stt
data['chat'] = chat

with filepath.open('w') as f:
  json.dump(data, f, indent=2)

### 학습

In [None]:
# 학습할 파라미터 설정
modelPath = "/content/Llama-3.1-Korean-8B-Instruct"
dataset = "stt" # stt / chat

param_dir = "/content/train_params.json" # 학습 파라미터 저장파일
final_param_dir = "/content/train_params_final.json" # 최종 파라미터 저장파일

output_dir = "/content/trained" # 학습 완료시 저장위치(중간)
final_dir = "/content/final" # 모델 결합 시 저장위치(최종)

template="llama3"
finetuning_type="lora"
quantization_bit=4

In [None]:
# Llama모델의 rope_scaling 수정
import json
configPath = modelPath + "/config.json"
with open(configPath, 'r') as f:
    data = json.load(f)

data['rope_scaling'] = {
    "factor": 8.0,
    "type": "linear"
}

with open(configPath, 'w') as f:
    json.dump(data, f, indent=2)

In [None]:
import json

args = dict(
  stage="sft",
  do_train=True,
  model_name_or_path=modelPath,
  dataset=dataset,
  template=template,
  finetuning_type=finetuning_type,
  lora_target="all",
  output_dir=output_dir,
  # 아래부터는 상세 파라미터
  per_device_train_batch_size=2,               # the batch size
  gradient_accumulation_steps=4,               # the gradient accumulation steps
  lr_scheduler_type="cosine",                 # use cosine learning rate scheduler
  logging_steps=10,                      # log every 10 steps
  warmup_ratio=0.1,                      # use warmup scheduler
  save_steps=1000,                      # save checkpoint every 1000 steps
  learning_rate=5e-5,                     # the learning rate
  num_train_epochs=3.0,                    # the epochs of training
  max_samples=500,                      # use 500 examples in each dataset
  max_grad_norm=1.0,                     # clip gradient norm to 1.0
  quantization_bit=quantization_bit,        # use 4-bit QLoRA
  loraplus_lr_ratio=16.0,                   # use LoRA+ algorithm with lambda=16.0
  fp16=True,                         # use float16 mixed precision training
)

# content/train_params.json에 학습 파라미터가 저장됨
json.dump(args, open(param_dir, "w", encoding="utf-8"), indent=2)

%cd /content/LLaMA-Factory/

!llamafactory-cli train ../train_params.json

---

### 모델 사용

In [None]:
prompt = "너는 '질문'에 대한 '답변'에서 오류를 교정하는 역할을 수행한다. 교정은 다음과 같은 순서로 이루어진다. 1. '답변'의 의도를 파악하라. 2. 1번의 의도와 관련없는 부분을 추출하라. 3. 2번의 키워드를 발음이 유사하거나 유사한 단어로 수정하라.답변은 각 과정의 결과를 모두 출력한다."

In [None]:
from llamafactory.chat import ChatModel
from llamafactory.extras.misc import torch_gc

%cd /content/LLaMA-Factory/

args = dict(
  model_name_or_path=modelPath,
  adapter_name_or_path=output_dir,
  template=template,
  finetuning_type=finetuning_type,
  quantization_bit=quantization_bit,
)
chat_model = ChatModel(args)

messages = []

print("'ㅈ'입력시 기록 삭제, 빈칸 입력시 종료")
while True:
  query = input("\nUser: ")
  if query == "":
    break
  if query == "ㅈ":
    messages = []
    torch_gc()
    print("대화기록 삭제됨.")
    continue

  messages.append({"role": "user", "content": query})
  print("Assistant: ", end="", flush=True)

  response = ""
  for new_text in chat_model.stream_chat(messages, system=prompt):
    print(new_text, end="", flush=True)
    response += new_text
  print()
  messages.append({"role": "assistant", "content": response})

torch_gc()

---