In [None]:
!pip install -q datasets transformers accelerate trl peft bitsandbytes

In [None]:
# from huggingface_hub import login
# login()

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
from trl import SFTTrainer, SFTConfig
import torch

In [None]:
# 1. 데이터셋 로딩
dataset = load_dataset("openai/gsm8k", "main", split="train[:2000]")

In [None]:
# 2. 모델과 토크나이저 로드
model_name = "Qwen/Qwen1.5-1.8B"  # 예: "meta-llama/Llama-2-7b-hf" 또는 "google/gemma-2b"
# model_name = "facebook/opt-350m"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.67G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/138 [00:00<?, ?B/s]

In [None]:
# 3. 프롬프트 생성 함수
def make_prompt(example, with_prompt = True):
  question = example["question"].strip()
  if with_prompt:
    return f"{question}\nLet's think step by step.\n"
  else:
    return question

In [None]:
# 4. 생성 함수(학습 전후 테스트)
def predict_sentiment(example, model, tokenizer, with_prompt=True, max_new_tokens=100):
    prompt = make_prompt(example, with_prompt=with_prompt)  # dict 기반
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(model.device)

    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=max_new_tokens, do_sample=False)

    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return decoded

In [None]:
# 5. 학습 전 테스트 기록
sample = dataset[20]
before_no_prompt = predict_sentiment(sample, model, tokenizer, with_prompt=False)
before_with_prompt = predict_sentiment(sample, model, tokenizer, with_prompt=True)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


In [None]:
# 6. 데이터 전처리: prompt + 정답으로 text 구성
def preprocess(example):
  prompt = make_prompt(example, with_prompt=True)
  answer = example["answer"].strip()
  return {"text": prompt + answer + tokenizer.eos_token}

train_dataset = dataset.map(preprocess)

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

In [None]:
print("Raw Dataset: \n",sample, "\n")
print("Dataset with prompt: \n", make_prompt(sample), "\n")
print("Dataset no prompt: \n", make_prompt(sample, with_prompt=False), "\n")
print("Preprocess Datset: \n", preprocess(sample), "\n")

Raw Dataset: 
 {'question': 'Bella bought stamps at the post office. Some of the stamps had a snowflake design, some had a truck design, and some had a rose design. Bella bought 11 snowflake stamps. She bought 9 more truck stamps than snowflake stamps, and 13 fewer rose stamps than truck stamps. How many stamps did Bella buy in all?', 'answer': 'The number of truck stamps is 11 + 9 = <<11+9=20>>20.\nThe number of rose stamps is 20 − 13 = <<20-13=7>>7.\nBella bought 11 + 20 + 7 = <<11+20+7=38>>38 stamps in all.\n#### 38'} 

Dataset with prompt: 
 Bella bought stamps at the post office. Some of the stamps had a snowflake design, some had a truck design, and some had a rose design. Bella bought 11 snowflake stamps. She bought 9 more truck stamps than snowflake stamps, and 13 fewer rose stamps than truck stamps. How many stamps did Bella buy in all?
Let's think step by step.
 

Dataset no prompt: 
 Bella bought stamps at the post office. Some of the stamps had a snowflake design, some had 

In [None]:
# 7. SFT 설정
training_args = SFTConfig(
    output_dir="./sft-gemma-cot",
    per_device_train_batch_size=1,
    num_train_epochs=2,
    max_seq_length=512,
    logging_steps=10,
    save_steps=100,
    save_total_limit=1,
)

In [None]:
# 8. SFTTrainer로 학습
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    args=training_args,
)
trainer.train()

Adding EOS to train dataset:   0%|          | 0/2000 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/2000 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/2000 [00:00<?, ? examples/s]

Step,Training Loss
10,0.4124
20,0.5074
30,0.4423
40,0.6055
50,0.5967
60,0.6995
70,0.6282
80,0.7191
90,0.7513
100,0.7092


TrainOutput(global_step=4000, training_loss=0.5913072315454483, metrics={'train_runtime': 3669.8506, 'train_samples_per_second': 1.09, 'train_steps_per_second': 1.09, 'total_flos': 6899265043292160.0, 'train_loss': 0.5913072315454483})

In [None]:
# 9. 학습 후 테스트 기록
after_no_prompt = predict_sentiment(sample, model, tokenizer, with_prompt=False)
after_with_prompt = predict_sentiment(sample, model, tokenizer, with_prompt=True)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


In [None]:
# 10. 학습 전후 테스트 비교
print("🟡 BEFORE fine-tuning")
print(f"[No prompt]\n{before_no_prompt}")
print(f"[With prompt]\n{before_with_prompt}")

print("\n🟢 AFTER fine-tuning")
print(f"[No prompt]\n{after_no_prompt}")
print(f"[With prompt]\n{after_with_prompt}")

🟡 BEFORE fine-tuning
[No prompt]
Bella bought stamps at the post office. Some of the stamps had a snowflake design, some had a truck design, and some had a rose design. Bella bought 11 snowflake stamps. She bought 9 more truck stamps than snowflake stamps, and 13 fewer rose stamps than truck stamps. How many stamps did Bella buy in all? Bella bought 11 snowflake stamps.
She bought 9 more truck stamps than snowflake stamps, so she bought 11 + 9 = 20 truck stamps.
She bought 13 fewer rose stamps than truck stamps, so she bought 20 - 13 = 7 rose stamps.
In total, Bella bought 11 + 20 + 7 = 38 stamps.
#### 38
The answer is: 38
[With prompt]
Bella bought stamps at the post office. Some of the stamps had a snowflake design, some had a truck design, and some had a rose design. Bella bought 11 snowflake stamps. She bought 9 more truck stamps than snowflake stamps, and 13 fewer rose stamps than truck stamps. How many stamps did Bella buy in all?
Let's think step by step.
Bella bought 11 snowfla