In [1]:
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from peft import get_peft_model, LoraConfig, TaskType

# Suppress warnings (optional, for cleaner output)
import warnings
warnings.filterwarnings("ignore")

# Load the pre-trained model and tokenizer
model_name = "gpt2"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    clean_up_tokenization_spaces=True  # Explicitly set to suppress FutureWarning
)

# Set the padding token
tokenizer.pad_token = tokenizer.eos_token

# Inspect the model architecture to find the correct layer names for LoRA
# Print the model to see its structure (uncomment to debug)
# print(model)

# Define the LoRA configuration with corrected target modules
# For GPT-2, the attention layers are typically in transformer.h[i].attn
# After inspecting GPT-2's architecture, the correct target module for attention weights is often "attn.c_attn"
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["h.0.attn.c_attn", "h.1.attn.c_attn"],  # Adjusted for GPT-2's naming convention
)

# Apply the PEFT model
peft_model = get_peft_model(model, lora_config)

# Expanded reasoning dataset (still small, but more diverse)
train_data = [
    "Q: What is 2 + 2? Let's think step by step. First, we know that 2 is a number. Adding another 2 to it gives us 4.",
    "Q: What is the capital of France? Let's think step by step. France is a country in Europe. The capital of France is Paris.",
    "Q: If a train leaves the station at 3 PM and travels at 60 mph, what time will it reach a station 180 miles away? Let's think step by step. The train travels 60 miles in one hour. To travel 180 miles, it will take 3 hours. Therefore, it will reach the station at 6 PM.",
    "Q: What is 5 + 3? Let's think step by step. Starting with 5, adding 3 more gives us 8.",
    "Q: What is the capital of Japan? Let's think step by step. Japan is an island nation in Asia. Its capital is Tokyo.",
    "Q: If a car drives at 40 mph for 120 miles, how long does it take? Let's think step by step. The car travels 40 miles in one hour. For 120 miles, it takes 120 / 40 = 3 hours."
]

# Custom dataset class
class ReasoningDataset(Dataset):
    def __init__(self, data, tokenizer, max_length=128):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        inputs = self.tokenizer(
            self.data[idx],
            padding="max_length",
            truncation=True,
            max_length=self.max_length,
            return_tensors="pt"
        )
        inputs = {k: v.squeeze(0) for k, v in inputs.items()}
        inputs["labels"] = inputs["input_ids"].clone()  # Add labels for causal LM
        return inputs

# Create dataset and dataloader
train_dataset = ReasoningDataset(train_data, tokenizer)
train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=4,
    num_train_epochs=5,  # Increased epochs for better training
    save_steps=10,
    save_total_limit=2,
    logging_dir="./logs",
    logging_steps=1,  # Log more frequently to monitor progress
    disable_tqdm=False  # Ensure progress bar is enabled (if supported)
)

# Data collator for language modeling
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

# Trainer for fine-tuning
trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=train_dataset,
    data_collator=data_collator
)

# Fine-tune the model
trainer.train()

# Save the fine-tuned model
peft_model.save_pretrained("./fine_tuned_model")

  0%|          | 0/10 [00:00<?, ?it/s]

{'loss': 3.2989, 'grad_norm': 0.4476770758628845, 'learning_rate': 4.5e-05, 'epoch': 0.5}
{'loss': 3.0744, 'grad_norm': 1.0051884651184082, 'learning_rate': 4e-05, 'epoch': 1.0}
{'loss': 3.172, 'grad_norm': 0.2706291675567627, 'learning_rate': 3.5e-05, 'epoch': 1.5}
{'loss': 3.4702, 'grad_norm': 0.605918824672699, 'learning_rate': 3e-05, 'epoch': 2.0}
{'loss': 3.2345, 'grad_norm': 0.32159414887428284, 'learning_rate': 2.5e-05, 'epoch': 2.5}
{'loss': 3.0475, 'grad_norm': 0.2641453742980957, 'learning_rate': 2e-05, 'epoch': 3.0}
{'loss': 3.22, 'grad_norm': 0.5491503477096558, 'learning_rate': 1.5e-05, 'epoch': 3.5}
{'loss': 3.6996, 'grad_norm': 0.4256013333797455, 'learning_rate': 1e-05, 'epoch': 4.0}
{'loss': 3.2737, 'grad_norm': 0.379459947347641, 'learning_rate': 5e-06, 'epoch': 4.5}
{'loss': 3.1594, 'grad_norm': 0.2503287196159363, 'learning_rate': 0.0, 'epoch': 5.0}
{'train_runtime': 1.826, 'train_samples_per_second': 16.429, 'train_steps_per_second': 5.476, 'train_loss': 3.26501517

In [4]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# 載入分詞器和模型
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  # 確保填充標記設置

# 載入微調後的模型
model = AutoModelForCausalLM.from_pretrained("./fine_tuned_model")

# 設置模型為評估模式
model.eval()

# 定義推理函數
def generate_response(prompt, max_length=50):
    # 將輸入轉換為模型可用的格式
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512)
    
    # 生成輸出
    with torch.no_grad():  # 禁用梯度計算以節省記憶體
        outputs = model.generate(
            **inputs,
            max_length=max_length,
            num_return_sequences=1,
            do_sample=True,  # 使用隨機採樣生成（可選）
            temperature=0.7,  # 控制生成的多樣性（可調整）
            top_k=50,  # 控制生成時的候選詞數（可調整）
        )
    
    # 解碼輸出
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# 測試推理
prompt = "Q: What is 2 + 2? Let's think step by step."
response = generate_response(prompt)
print(f"Prompt: {prompt}")
print(f"Response: {response}")

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Q: What is 2 + 2? Let's think step by step.
Response: Q: What is 2 + 2? Let's think step by step.

1.1.1:

1.2.1:

When I think of a time I need to spend thinking about things, I think about


In [3]:
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from peft import get_peft_model, LoraConfig, TaskType
# from datasets import load_dataset  # 使用 Hugging Face 資料集（可選）

# 抑制警告（可選，保持輸出乾淨）
import warnings
warnings.filterwarnings("ignore")

# 載入預訓練模型和分詞器
model_name = "gpt2"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    clean_up_tokenization_spaces=True  # 抑制 FutureWarning
)

# 設置填充標記
tokenizer.pad_token = tokenizer.eos_token

# 定義 LoRA 配置
# 根據 GPT-2 架構調整 target_modules（注意力層）
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=16,  # 增加 rank 以提升表現（可調整）
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["h.0.attn.c_attn", "h.1.attn.c_attn", "h.0.attn.c_proj", "h.1.attn.c_proj"],  # 擴展至更多層
)

# 應用 PEFT 模型
peft_model = get_peft_model(model, lora_config)

# 擴展或使用更大的訓練資料集
# 範例資料（可替換為真實資料集）
train_data = [
    "Q: What is 2 + 2? Let's think step by step. First, we know that 2 is a number. Adding another 2 to it gives us 4.",
    "Q: What is the capital of France? Let's think step by step. France is a country in Europe. The capital of France is Paris.",
    "Q: If a train leaves the station at 3 PM and travels at 60 mph, what time will it reach a station 180 miles away? Let's think step by step. The train travels 60 miles in one hour. To travel 180 miles, it will take 3 hours. Therefore, it will reach the station at 6 PM.",
    "Q: What is 5 + 3? Let's think step by step. Starting with 5, adding 3 more gives us 8.",
    "Q: What is the capital of Japan? Let's think step by step. Japan is an island nation in Asia. Its capital is Tokyo.",
    "Q: If a car drives at 40 mph for 120 miles, how long does it take? Let's think step by step. The car travels 40 miles in one hour. For 120 miles, it takes 120 / 40 = 3 hours."
]

# 自訂資料集類別
class ReasoningDataset(Dataset):
    def __init__(self, data, tokenizer, max_length=128):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        inputs = self.tokenizer(
            self.data[idx],
            padding="max_length",
            truncation=True,
            max_length=self.max_length,
            return_tensors="pt"
        )
        inputs = {k: v.squeeze(0) for k, v in inputs.items()}
        inputs["labels"] = inputs["input_ids"].clone()  # 為因果語言模型添加標籤
        return inputs

# 創建資料集和 DataLoader
train_dataset = ReasoningDataset(train_data, tokenizer)
train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True)

# 定義訓練參數
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=4,
    num_train_epochs=10,  # 增加 epoch 數以適應 LoRA 訓練
    learning_rate=2e-4,  # 為 LoRA 設定適當的學習率
    save_steps=10,
    save_total_limit=2,
    logging_dir="./logs",
    logging_steps=1,  # 更頻繁地記錄進度
    evaluation_strategy="steps",  # 添加評估策略
    eval_steps=10,  # 每 10 步評估一次
    disable_tqdm=False  # 確保進度條啟用（若環境支援）
)

# 數據整理器
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

# 創建驗證資料集（範例）
val_data = [
    "Q: What is 4 + 4? Let's think step by step. 4 plus 4 is 8.",
    "Q: What is the capital of Germany? Let's think step by step. Germany is in Europe. Its capital is Berlin."
]
val_dataset = ReasoningDataset(val_data, tokenizer)

# Trainer 實例
trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,  # 添加驗證資料集
    data_collator=data_collator
)

# 訓練模型
trainer.train()

# 儲存微調後的模型
peft_model.save_pretrained("./fine_tuned_lora_model")

  0%|          | 0/20 [00:00<?, ?it/s]

{'loss': 3.2861, 'grad_norm': 0.3855910301208496, 'learning_rate': 0.00019, 'epoch': 0.5}
{'loss': 3.2555, 'grad_norm': 0.7794333696365356, 'learning_rate': 0.00018, 'epoch': 1.0}
{'loss': 3.2531, 'grad_norm': 0.437178373336792, 'learning_rate': 0.00017, 'epoch': 1.5}
{'loss': 3.4181, 'grad_norm': 0.5153894424438477, 'learning_rate': 0.00016, 'epoch': 2.0}
{'loss': 3.3429, 'grad_norm': 0.4430333375930786, 'learning_rate': 0.00015000000000000001, 'epoch': 2.5}
{'loss': 3.1305, 'grad_norm': 0.5874897837638855, 'learning_rate': 0.00014, 'epoch': 3.0}
{'loss': 3.1028, 'grad_norm': 0.3364436626434326, 'learning_rate': 0.00013000000000000002, 'epoch': 3.5}
{'loss': 3.6601, 'grad_norm': 0.6103304624557495, 'learning_rate': 0.00012, 'epoch': 4.0}
{'loss': 3.211, 'grad_norm': 0.46137022972106934, 'learning_rate': 0.00011000000000000002, 'epoch': 4.5}
{'loss': 3.1998, 'grad_norm': 0.6468428373336792, 'learning_rate': 0.0001, 'epoch': 5.0}


  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 3.390946865081787, 'eval_runtime': 0.039, 'eval_samples_per_second': 51.277, 'eval_steps_per_second': 25.638, 'epoch': 5.0}
{'loss': 3.0529, 'grad_norm': 0.5617461204528809, 'learning_rate': 9e-05, 'epoch': 5.5}
{'loss': 3.632, 'grad_norm': 1.2615066766738892, 'learning_rate': 8e-05, 'epoch': 6.0}
{'loss': 3.2693, 'grad_norm': 0.6624971032142639, 'learning_rate': 7e-05, 'epoch': 6.5}
{'loss': 3.2838, 'grad_norm': 0.5322293043136597, 'learning_rate': 6e-05, 'epoch': 7.0}
{'loss': 3.2859, 'grad_norm': 0.6307340860366821, 'learning_rate': 5e-05, 'epoch': 7.5}
{'loss': 3.4579, 'grad_norm': 0.7142120003700256, 'learning_rate': 4e-05, 'epoch': 8.0}
{'loss': 3.0286, 'grad_norm': 0.46664878726005554, 'learning_rate': 3e-05, 'epoch': 8.5}
{'loss': 3.2563, 'grad_norm': 1.101199984550476, 'learning_rate': 2e-05, 'epoch': 9.0}
{'loss': 3.3074, 'grad_norm': 0.6449880599975586, 'learning_rate': 1e-05, 'epoch': 9.5}
{'loss': 3.2288, 'grad_norm': 0.9561487436294556, 'learning_rate': 0.0,

  0%|          | 0/1 [00:00<?, ?it/s]

{'eval_loss': 3.3711564540863037, 'eval_runtime': 0.0392, 'eval_samples_per_second': 51.028, 'eval_steps_per_second': 25.514, 'epoch': 10.0}
{'train_runtime': 3.3759, 'train_samples_per_second': 17.773, 'train_steps_per_second': 5.924, 'train_loss': 3.283142387866974, 'epoch': 10.0}


In [5]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel, PeftConfig

# 載入分詞器和基礎模型
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  # 確保填充標記設置

# 載入基礎模型
base_model = AutoModelForCausalLM.from_pretrained(model_name)

# 載入 LoRA 配置和適配器
config = PeftConfig.from_pretrained("./fine_tuned_lora_model")
model = PeftModel.from_pretrained(base_model, "./fine_tuned_lora_model")

# 設置模型為評估模式
model.eval()

# 定義推理函數
def generate_response(prompt, max_length=50):
    # 將輸入轉換為模型可用的格式
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512)
    
    # 生成輸出
    with torch.no_grad():  # 禁用梯度計算
        outputs = model.generate(
            **inputs,
            max_length=max_length,
            num_return_sequences=1,
            do_sample=True,  # 使用隨機採樣生成
            temperature=0.7,  # 控制生成的多樣性
            top_k=50,  # 控制生成時的候選詞數
        )
    
    # 解碼輸出
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# 測試推理
prompt = "Q: What is 2 + 2? Let's think step by step."
response = generate_response(prompt)
print(f"Prompt: {prompt}")
print(f"Response: {response}")

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Q: What is 2 + 2? Let's think step by step.
Response: Q: What is 2 + 2? Let's think step by step.

S: It's 2.

N: And now you're going to see it.

S: Actually, let's say you're a doctor.


In [7]:
prompts = ["Q: What is 2 + 2? Let's think step by step.", "Q: What is the capital of France?"]
inputs = tokenizer(prompts, return_tensors="pt", padding=True, truncation=True, max_length=512)
outputs = model.generate(**inputs, max_length=50)
responses = [tokenizer.decode(out, skip_special_tokens=True) for out in outputs]
responses

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


["Q: What is 2 + 2? Let's think step by step.\n\nA: 2 + 2 = 2 + 2 = 2 + 2 = 2 + 2 = 2 + 2 = 2 + 2 = 2 + 2 = 2 + 2",
 'Q: What is the capital of France?\n\nA: The capital of France is Paris.\n\nQ: What is the capital of France?\n\nA: The capital of France is Paris.\n\n']