# import

In [1]:
import os
import torch
import pandas as pd


from unsloth import FastLanguageModel
from unsloth.chat_templates import get_chat_template, standardize_sharegpt, train_on_responses_only

from trl import SFTTrainer
from datasets import load_dataset, Dataset
from transformers import EarlyStoppingCallback
from transformers import TrainingArguments, DataCollatorForSeq2Seq

from peft import LoraConfig, TaskType
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model

from rouge_score import rouge_scorer

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


# check unsloth ver issue

In [2]:
# 7.2일 릴리즈부터 이슈 보고됨
# https://github.com/unslothai/unsloth/issues/3071
# !pip install unsloth-zoo==2025.7.1 unsloth==2025.7.1
# !pip install trl==0.19.1 
import trl
import unsloth
import unsloth_zoo

print(unsloth.__version__)
print(unsloth_zoo.__version__)
print(trl.__version__)

2025.7.1
2025.7.1
0.19.1


# 모델

In [3]:
MODEL_CARDS = ['unsloth/DeepSeek-R1-Distill-Qwen-14B-unsloth-bnb-4bit', 'unsloth/Meta-Llama-3.1-8B-bnb-4bit']
MODEL = MODEL_CARDS[0]

In [4]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = MODEL,
    max_seq_length = 2048,
    load_in_4bit = True,
    dtype = None,
)

# model = prepare_model_for_kbit_training(model)
# model.enable_input_require_grads()

model = FastLanguageModel.get_peft_model(
    model,
    r = 64,
    lora_alpha = 16,
    lora_dropout = 0,
    target_modules = ["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"],
    use_rslora = True,
    use_gradient_checkpointing = "unsloth"
)

==((====))==  Unsloth 2025.7.1: Fast Qwen2 patching. Transformers: 4.54.1.
   \\   /|    NVIDIA GeForce RTX 3090. Num GPUs = 1. Max memory: 23.691 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 8.6. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Unsloth 2025.7.1 patched 48 layers with 48 QKV layers, 48 O layers and 48 MLP layers.


In [5]:
def check_trainable_params(model, show_grad=False):
    trainable_params = [p for p in model.parameters() if p.requires_grad]
    print(f"학습 가능한 파라미터 덩어리: {len(trainable_params)}")
    total_params = sum(p.numel() for p in model.parameters())
    trainable_num = sum(p.numel() for p in trainable_params)
    print(f"학습 가능한 파라미터: {trainable_num} / 전체 파라미터: {total_params} ({100*trainable_num/total_params:.4f}%)")

    if not show_grad:
        return 
        
    for name, param in model.named_parameters():
        if param.requires_grad:
            grad_status = param.grad is not None
            grad_norm = param.grad.norm().item() if grad_status else None
            print(f"GRAD OK : {name:60}")
        else:
            grad_status = param.grad is not None
            grad_norm = param.grad.norm().item() if grad_status else None
            print(f"GRAD NO : {name:60}")

In [6]:
check_trainable_params(model)

학습 가능한 파라미터 덩어리: 672
학습 가능한 파라미터: 275251200 / 전체 파라미터: 9896776704 (2.7812%)


# preprocessor

In [8]:
def make_prompt(dialogue, summary="", for_inference=False):
    instruction = f"""Following the instructions below, summarize the given dialogue.
---
Instructions:
1. Read the dialogue carefully.
2. Make the summary concise and brief.
3. Before generating the summary, predict the number of words your summary will contain and state it as "Word count: X".
4. Write the summary to match the given target word count as closely as possible.
5. When writing the summary, use as many words and expressions from the original dialogue as possible, rather than paraphrasing into new language.
6. Preserve named entities in the summary.
7. Use english and among special characters and symbols, only numbers, commas, and periods may be used.
8. Reflect discourse relations, speech acts, and conversational intentions in the summary.
---
Dialogue:
{dialogue}
---
"""
    
    gen_body = "Word count:" + SEP_TOKEN
    if not for_inference:
        n_words = len(summary.split(' '))
        gen_body += f"{n_words}"
    
    if for_inference:
        prompt = instruction + gen_body
        return prompt, instruction
        
    gen_body += "\n---\nSummary:\n"
    prompt = instruction + gen_body + summary + EOS_TOKEN
    # full_prompt = head_part + SEP_TOKEN + summary + EOS_TOKEN
    
    return prompt, instruction

In [9]:
def preprocess_function(examples, tokenizer, show_prompt=None, for_inference=False):
    inputs, labels = [], []
    
    for dialogue, summary in zip(examples["dialogue"], examples["summary"]):
        full_prompt, inst_part = make_prompt(dialogue, summary, for_inference=for_inference)

        if show_prompt:
            print(full_prompt)
            print("="*80)
        
        tokenized = tokenizer(full_prompt, max_length=2048, padding="max_length", truncation=True)
        input_ids = tokenized["input_ids"]
        label_ids = input_ids.copy()

        # 입력 부분 토큰을 찾기 위해 instruction part 토크나이징 길이 파악
        head_part_ids = tokenizer(inst_part, add_special_tokens=False)["input_ids"]
        head_part_len = len(head_part_ids)

        # labels에서 입력부분은 -100 으로 마스킹 해 loss 계산 제외
        for i in range(head_part_len):
            label_ids[i] = -100

        inputs.append(input_ids)
        labels.append(label_ids)

    batch = {
        "input_ids": inputs,
        "labels": labels,
        "attention_mask": [tokenized["attention_mask"] for _ in range(len(inputs))],
    }
    return batch

# dataset

In [10]:
print(tokenizer.eos_token)
SEP_TOKEN = "<sep>"
EOS_TOKEN = tokenizer.eos_token
tokenizer.add_special_tokens({"sep_token": SEP_TOKEN})
# tokenizer.add_special_tokens({"sep_token": SEP_TOKEN, "eos_token": EOS_TOKEN})
model.resize_token_embeddings(len(tokenizer))

<｜end▁of▁sentence｜>


Embedding(151666, 5120, padding_idx=151654)

In [50]:
df = pd.read_csv("/data/ephemeral/home/ds/origin/dsm_en_train.csv")

# df_train = df
df_train = df.sample(frac=0.050, random_state=142)
# df_evals = df.sample(frac=0.001, random_state=142)  # valid 셋 영문화 못 하였음... 그냥 train 일부 씀

print('학습 샘플수', df_train.shape)
# print('평가 샘플수', df_evals.shape)

dataset_train = Dataset.from_pandas(df_train)
# dataset_valid = Dataset.from_pandas(df_evals)

학습 샘플수 (623, 3)


In [51]:
# 살펴보려면 샘플수 낮추고 show_prompt
p_ds_train = dataset_train.map(lambda x: preprocess_function(x, tokenizer, show_prompt=False), 
                               batched=True, remove_columns=dataset_train.column_names)
# p_ds_valid = dataset_valid.map(lambda x: preprocess_function(x, tokenizer, show_prompt=False, for_inference=True),  
#                                batched=True, remove_columns=dataset_valid.column_names)

Map:   0%|          | 0/623 [00:00<?, ? examples/s]

In [14]:
data_collator = DataCollatorForSeq2Seq(tokenizer, return_tensors="pt", padding=True)

# metric

In [16]:
def compute_rouge_metrics(pred_results):
    preds, labels = pred_results

    preds[preds == -100] = tokenizer.pad_token_id
    labels[labels == -100] = tokenizer.pad_token_id
    
    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    # decoded_preds = tokenizer.batch_decode(preds, clean_up_tokenization_spaces=True)
    
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    rouge1_list, rouge2_list, rougel_list = [], [], []

    for pred, ref in zip(decoded_preds, decoded_labels):
        scores = scorer.score(ref, pred)
        rouge1_list.append(scores['rouge1'].fmeasure)
        rouge2_list.append(scores['rouge2'].fmeasure)
        rougel_list.append(scores['rougeL'].fmeasure)

    avg_rouge1 = sum(rouge1_list)/len(rouge1_list)
    avg_rouge2 = sum(rouge2_list)/len(rouge2_list)
    avg_rougel = sum(rougel_list)/len(rougel_list)
    final_score = avg_rouge1 + avg_rouge2 + avg_rougel
    
    return {"rouge1": avg_rouge1, "rouge2": avg_rouge2, "rougel": avg_rougel, "final_score": final_score}

In [17]:
early_stopping_callback = EarlyStoppingCallback(
    early_stopping_patience = 3,
    early_stopping_threshold = 0.01
)

# trainer config

In [52]:
training_args = TrainingArguments(
    output_dir=f"./{MODEL}-lora",
    report_to="none",
    logging_steps=4,
    
    save_strategy="steps",
    # eval_strategy='steps',
    # eval_steps=4,
    save_steps=20,
    save_total_limit=1,    
    # load_best_model_at_end=True,

    bf16=True,   # fp16 GPU 모델마다 불안정성 있음
    learning_rate=5e-5,
    greater_is_better = False,
    
    # per_device_eval_batch_size=1,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=2,
    num_train_epochs=2,

    optim="adamw_8bit",  # 8bit 옵티마이져도 불안정성 있음
    weight_decay=0.01,
    lr_scheduler_type="cosine",
)

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = p_ds_train,
    # eval_dataset = p_ds_valid,
    data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer),
    # compute_metrics = lambda pred: compute_metrics(pred, tokenizer, config),
    args = training_args)

# trainer.add_callback(early_stopping_callback)

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


# train

In [None]:
trainer_stats = trainer.train()

In [None]:
raise Exception('stop here')

# save model

In [None]:
# model.save_pretrained("./dlb_dialogue_summary")
# tokenizer.save_pretrained("./dlb_dialogue_summary")

# resume train

In [None]:
# trainer.save_model(training_args.output_dir)
# model.save_pretrained_merged(training_args.output_dir, tokenizer, save_method="merged_16bit")
# model.load_adapter(training_args.output_dir, adapter_name="demo")

# inference

In [82]:
samples = [
"""#Person1#: Excuse me. Where is the exit from here?  
#Person2#: Actually, I’m trying to find it myself right now.  
#Person1#: This place is really confusing, isn’t it?  
#Person2#: Absolutely. Oh, do you see that sign over there?  
#Person1#: That must be the exit.  
#Person2#: Great. Let’s go check it out.
""", 
"""#Person1#: Who is that person over there?  
#Person2#: They’re the new teacher.  
#Person1#: What’s the teacher like?  
#Person2#: They’re really kind.  
#Person1#: What subject do they teach?  
#Person2#: English, of course.  
#Person1#: Are they your teacher?  
#Person2#: Yeah, they teach our class three times a week.  
#Person1#: Can they speak Chinese to students?  
#Person2#: Not very well.  
#Person1#: It’s good that you know English!""",
"""#Person1#: Excuse me, you can’t park here.  
#Person2#: I’m waiting for my friends. I’ll move it right away.  
#Person1#: This is a no-parking zone.  
#Person2#: But I didn’t see any sign.  
#Person1#: There’s a no-parking sign at the corner of the road. You must have seen it when you entered.  
#Person2#: I didn’t see it.  
#Person1#: I’m sorry, but I have to issue a ticket. You can file an objection with the court within 14 days. If you can prove you’re not at fault, you may avoid the fine."""
]

In [83]:
prompt, _ = make_prompt(samples[2], for_inference=True)

In [73]:
inputs = tokenizer(prompt, return_tensors="pt").to('cuda')

In [81]:
outputs = model.generate(**inputs, max_new_tokens=50, temperature=0.3)
# outputs = model.generate(**inputs, max_new_tokens=23, temperature=0.8, top_k=50, top_p=0.95)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=False)

print(generated_text)

<｜begin▁of▁sentence｜>Following the instructions below, summarize the given dialogue.
---
Instructions:
1. Read the dialogue carefully.
2. Make the summary concise and brief.
3. Before generating the summary, predict the number of words your summary will contain and state it as "Word count: X".
4. Write the summary to match the given target word count as closely as possible.
5. When writing the summary, use as many words and expressions from the original dialogue as possible, rather than paraphrasing into new language.
6. Preserve named entities in the summary.
7. Use english and among special characters and symbols, only numbers, commas, and periods may be used.
8. Reflect discourse relations, speech acts, and conversational intentions in the summary.
---
Dialogue:

#Person1#: Excuse me, you can’t park here.  
#Person2#: I’m waiting for my friends. I’ll move it right away.  
#Person1#: This is a no-parking zone.  
#Person2#: But I didn’t see any sign.  
#Person1#: There’s a no-parking 

# inference trial 

In [87]:
df_trial = pd.read_csv('/data/ephemeral/home/ds/origin/dsm_trial_en.csv')

In [149]:
def inference(dialogue, max_new_tokens, temp=0.3, top_p=1.00, num_return=1, show_gen=False, skip_sp_tok=False):
    # skip_sp_tok에 따라 pos 값 사용 불가할 수 있음
    prompt, inst_part = make_prompt(dialogue, for_inference=True)

    pos_word_count = len(inst_part)
    inputs = tokenizer(prompt, return_tensors="pt").to('cuda')
    outputs = model.generate(**inputs, 
                             max_new_tokens=max_new_tokens, 
                             temperature=temp, 
                             top_p=top_p, 
                             num_return_sequences=num_return,
                             do_sample=True)
    
    gens = tokenizer.decode(outputs[0], skip_special_tokens=skip_sp_tok)
    if show_gen:
        print(gens)

    summary = ""
    pos_summary = gens.find('Summary:\n')
    if pos_summary > 0:
        summary = gens[pos_summary:]

    return gens, summary, prompt, pos_word_count

In [198]:
gens, summary, prompt, pos_word = inference(df_trial.iloc[2]['dialogue'], max_new_tokens=60, skip_sp_tok=True, temp=0.8, top_p=0.8)

In [205]:
# print(text[741:])
# print(summary)

In [215]:
from tqdm.notebook import tqdm

In [217]:
# df = df_trial.iloc[0:10]
df = df_trial

outs = []
for i, ex in tqdm(df.iterrows(), total=len(df)):
    fname, dialogue = ex['fname'], ex['dialogue']
    # TODO: dialogue에 비례하는 summary 길이 요청 prompt 추가
    gens, summary, _, _ = inference(dialogue, max_new_tokens=60, skip_sp_tok=True, temp=0.8, top_p=0.8)
    outs.append((fname, gens, summary))

  0%|          | 0/499 [00:00<?, ?it/s]

In [218]:
df_res = pd.DataFrame(outs)