### Requirments 설치

In [1]:
%%capture
import os

!pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl==0.15.2 triton cut_cross_entropy unsloth_zoo
!pip install sentencepiece protobuf datasets huggingface_hub hf_transfer
!pip install --no-deps unsloth
!pip install fsspec==2023.9.2

# 로컬
# !pip install unsloth

### 모델 불러오기

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-1B-Instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

<a name="Data"></a>
### Data Prep
우리는 LLaMA 3.1 모델을 대화형 데이터로 파인튜닝합니다.
LLaMA 3.1은 특별한 포맷을 요구합니다. 예를 들어:

```
<|begin_of_text|><|start_header_id|>user<|end_header_id|>
안녕하세요!
<|eot_id|><|start_header_id|>assistant<|end_header_id|>
안녕하세요! 무엇을 도와드릴까요?
<|eot_id|>
```

이런 형식을 자동으로 만들어주는 함수가 get_chat_template()이며, Unsloth 라이브러리를 통해 사용할 수 있습니다. 이 함수는 LLaMA 3 외에도 여러 모델 포맷을 지원합니다.

In [None]:
import json

def parse_text_field(text):
    chunks = text.split("<|im_end|>")
    conversations = []
    for chunk in chunks:
        chunk = chunk.strip()
        if "<|im_start|>user" in chunk:
            role = "user"
            content = chunk.replace("<|im_start|>user", "").strip()
        elif "<|im_start|>assistant" in chunk:
            role = "assistant"
            content = chunk.replace("<|im_start|>assistant", "").strip()
        else:
            continue
        conversations.append({"role": role, "content": content})
    return conversations

converted = []
with open("data.jsonl", "r", encoding="utf-8") as f:
    for line in f:
        obj = json.loads(line)
        convos = parse_text_field(obj["text"])
        converted.append({"conversations": convos})

# 변환된 결과를 새로운 파일로 저장
with open("converted_data.jsonl", "w", encoding="utf-8") as f:
    for item in converted:
        f.write(json.dumps(item, ensure_ascii=False) + "\n")


In [None]:
from datasets import load_dataset
from unsloth.chat_templates import get_chat_template

dataset = load_dataset("json", data_files="converted_data.jsonl", split="train")

tokenizer = get_chat_template(tokenizer, chat_template="llama-3.1")

def formatting_prompts_func(examples):
    convos = examples["conversations"]
    texts = [tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False) for convo in convos]
    return {"text": texts}


In [None]:
from unsloth.chat_templates import standardize_sharegpt
dataset = standardize_sharegpt(dataset)
dataset = dataset.map(formatting_prompts_func, batched = True,)

데이터 셋 변경의 예시
```
{"from": "system", "value": "You are an assistant"}
{"from": "human", "value": "What is 2+2?"}
{"from": "gpt", "value": "It's 4."}
```
to
```
{"role": "system", "content": "You are an assistant"}
{"role": "user", "content": "What is 2+2?"}
{"role": "assistant", "content": "It's 4."}
```

1285번째에 위치한 데이터 확인!!

In [None]:
dataset[1285]["conversations"] # 원본데이터

[참고] Llama 3.1 Instruct 모델의 기본 채팅 템플릿은 "Cutting Knowledge Date: December 2023\nToday Date: 26 July 2024"이라는 문장을 자동으로 추가하므로 놀라지 마세요!

In [None]:
dataset[1285]["text"] # 변환 후 데이터

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
    dataset_num_proc = 2,
    packing = False,
    args = TrainingArguments(
        per_device_train_batch_size = 4, # 배치 사이즈
        gradient_accumulation_steps = 2,
        warmup_steps = 45,
        num_train_epochs = 15,
        # max_steps = 20,
        learning_rate = 2e-5,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 50,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none",
    ),
)

정확도 측정시 사용자 입력은 고려하지 않고 응답 데이터에 대해서만 측정 하는 코드

In [None]:
from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
    trainer,
    instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n",
    response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n",
)

In [None]:
tokenizer.decode(trainer.train_dataset[5]["input_ids"])

In [None]:
space = tokenizer(" ", add_special_tokens = False).input_ids[0]
tokenizer.decode([space if x == -100 else x for x in trainer.train_dataset[5]["labels"]])

In [None]:
trainer_stats = trainer.train()

In [None]:
FastLanguageModel.for_inference(model)

messages = [
    {"role": "user", "content": "온수가 나오지 않을 때는 어떻게 해결하나요?"},
]
inputs = tokenizer.apply_chat_template(
    messages,
    tokenize = True,
    add_generation_prompt = True,
    return_tensors = "pt",
).to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
_ = model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = 128,
                   use_cache = True, temperature = 0.5, min_p = 0.1)

In [None]:
FastLanguageModel.for_inference(model)

messages = [
    {"role": "user", "content": "pinky가 무엇인가요?"},
]
inputs = tokenizer.apply_chat_template(
    messages,
    tokenize = True,
    add_generation_prompt = True,
    return_tensors = "pt",
).to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
_ = model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = 128,
                   use_cache = True, temperature = 0.1, min_p = 1)

### 가중치 저장

In [None]:
model.save_pretrained("lora_model")  # Local saving
tokenizer.save_pretrained("lora_model")

### 모델 저장

In [None]:
model.save_pretrained_gguf("model", tokenizer, quantization_method = "q4_k_m")

In [None]:
from google.colab import drive
drive.mount('/content/drive')

### 본인 드라이브로 모델 옮기기

In [None]:
!cp /content/model/unsloth.Q4_K_M.gguf /content/drive/MyDrive/LLM_A/