In [4]:
import os
import pandas as pd
from datasets import Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments, DataCollatorForLanguageModeling
import torch
from peft import LoraConfig, get_peft_model
os.environ["WANDB_DISABLED"] = "true"


# --- Configuration ---
os.environ["CUDA_VISIBLE_DEVICES"] = "1"  # 필요에 따라 사용

# Pretrained 모델 로드
model_id = "Qwen/Qwen2.5-0.5B"
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map='cuda',
)

tokenizer = AutoTokenizer.from_pretrained(model_id)

tokenizer.pad_token_id = tokenizer.eos_token_id   # 151643
tokenizer.padding_side = "left"                   # 디코더 전용 모델 권장
tokenizer.add_special_tokens({
    'pad_token':'<|pad|>',
    'additional_special_tokens':['<|eot_id|>']
})
model.resize_token_embeddings(len(tokenizer))

if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id

# CSV 파일 경로
filename = "/home/dshs-wallga/pgh/regacy/filtered_omni_math_train.csv"
# CSV 파일 로드 (예시 CSV 파일에는 '문제'와 '답' 컬럼이 있다고 가정)
df = pd.read_csv(filename)
df['prompt'] = (
    "Extract the correct answer from the solution. A single integer.\n"
    "solution:\n"
    + df['solution'].fillna('').astype(str).str[-300:]
    + "\nfinal answer: "
)
df['answer'] = df['answer'].fillna('').astype(str)

# 각 항목의 키를 변환하여 새로운 리스트 생성
converted_data = []
for idx, row in df.iterrows():
    new_item = {
        "instruction": row.get("prompt", ""),
        "output": row.get("answer", ""),
        "url": ""  # URL 정보가 없으므로 빈 문자열로 채웁니다.
    }
    converted_data.append(new_item)

# Hugging Face 데이터셋으로 변환
train_dataset = Dataset.from_list(converted_data)

# 확인
print(train_dataset)
for i in range(0, 1911, 100):
    print(train_dataset[i])

def preprocessing_data(examples):
    input_ids, attention_masks, labels = [], [], []
    max_length = 512

    for instr, resp in zip(examples['instruction'], examples['output']):
        # 1) 인코딩
        enc = tokenizer.apply_chat_template(
            [{'role':'user','content':instr}],
            tokenize=True,
            add_generation_prompt=True
        )
        dec = tokenizer(resp + '<|eot_id|>', add_special_tokens=False)['input_ids']

        # 2) 패딩 길이 계산
        pad_len = max_length - len(enc) - len(dec)
        if pad_len < 0:
            dec = dec[: max_length - len(enc)]
            pad_len = 0


        # 3) input_ids, labels, attention_mask 생성
        # Inside preprocessing_data after pad_len calculation and truncation
        ids   = [tokenizer.pad_token_id] * pad_len + enc + dec
        labs  = [-100] * pad_len + [-100] * len(enc) + dec
        mask  = [0] * pad_len + [1] * (len(enc) + len(dec))

        input_ids.append(ids)
        labels.append(labs)
        attention_masks.append(mask)

    return {
        'input_ids': input_ids,
        'attention_mask': attention_masks,
        'labels': labels
    }


train_dataset = train_dataset.map(
    preprocessing_data,
    batched=True, num_proc=2,
    remove_columns=['instruction','output','url']
)
train_dataset.set_format(type="torch",
                         columns=["input_ids","attention_mask","labels"])


training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=15,
    logging_steps=10,
    learning_rate=1e-5,
    remove_unused_columns=False,
    report_to="none",
    logging_strategy='steps',
    label_names=['labels']
)

# Lora Tuning
peft_config = LoraConfig(
    task_type="CAUSAL_LM",
    r=4,
    lora_alpha=8,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0.1,
    bias="none",
)

lora_model = get_peft_model(model, peft_config)

# LLM 풀튜닝 VRAM 30GB 사용
# LoRA 사용시 18.6GB 사용됨 (RANK 16, 토큰임베딩 및 lm_head 학습 X)
# 메모리 터질 시 RANK 사이즈 등 조정
trainer = Trainer(
    model=lora_model,
    args=training_args,
    train_dataset=train_dataset,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

trainer.train()
save_directory = "home/dshs-wallga/pgh/qwen_finetuned/omni_2"
lora_model.save_pretrained(
    save_directory,
    save_embedding_layers=True  # auto 대신 True로 직접 지정
)
tokenizer.save_pretrained(save_directory)


Dataset({
    features: ['instruction', 'output', 'url'],
    num_rows: 1911
})
{'instruction': "Extract the correct answer from the solution. A single integer.\nsolution:\nangle DEF\\). Let \\(R'\\) be the reflection of \\(Q\\) over \\(H\\). The homothety centered at \\(D\\) that maps the incircle to the \\(D\\)-excircle also maps \\(R'\\) to \\(P\\), implying that \\(D\\), \\(R'\\), and \\(P\\) are collinear, so \\(R' = R\\).\n\nTherefore, \\(\\frac{HQ}{HR} = 1\\).\n\nThe answer is \\(\\boxed{1}\\).\nfinal answer: ", 'output': '1', 'url': ''}
{'instruction': 'Extract the correct answer from the solution. A single integer.\nsolution:\nered triples. If $a=1$, then we need $b+c=9$, which has 6 solutions for $b, c \\neq 1$; a similar argument for $b$ and $c$ gives a total of 18 such solutions. It is easy to check that all the solutions we found are actually solutions to the original equations. Adding, we find $18+3=21$ total triples.\nfinal answer: ', 'output': '21', 'url': ''}
{'instruc

Map (num_proc=2):   0%|          | 0/1911 [00:00<?, ? examples/s]

Step,Training Loss
10,3.1371
20,3.0024
30,3.016
40,2.8923
50,2.8046
60,2.7849
70,2.6878
80,2.516
90,2.3798
100,2.2452




('home/dshs-wallga/pgh/qwen_finetuned/omni_2/tokenizer_config.json',
 'home/dshs-wallga/pgh/qwen_finetuned/omni_2/special_tokens_map.json',
 'home/dshs-wallga/pgh/qwen_finetuned/omni_2/vocab.json',
 'home/dshs-wallga/pgh/qwen_finetuned/omni_2/merges.txt',
 'home/dshs-wallga/pgh/qwen_finetuned/omni_2/added_tokens.json',
 'home/dshs-wallga/pgh/qwen_finetuned/omni_2/tokenizer.json')

In [3]:
import os
import pandas as pd
from datasets import Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments, DataCollatorForLanguageModeling
import torch
from peft import LoraConfig, get_peft_model
os.environ["WANDB_DISABLED"] = "true"

In [6]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft        import PeftModel

# 1) 베이스 모델 로드
base_model = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen2.5-0.5B",
    torch_dtype=torch.bfloat16,
    device_map="auto"
)

# 2) 토크나이저 로드 후 임베딩 크기 맞춤
tokenizer = AutoTokenizer.from_pretrained("/home/dshs-wallga/pgh/qwen_finetuned/omni_2")
base_model.resize_token_embeddings(len(tokenizer))

# 3) 어댑터 로드
peft_model = PeftModel.from_pretrained(
    base_model,
    "/home/dshs-wallga/pgh/qwen_finetuned/omni_2",
    torch_dtype=torch.bfloat16
)

# 4) 어댑터 병합
merged_model = peft_model.merge_and_unload()

# 5) 전체 모델 저장
merged_model.save_pretrained("/home/dshs-wallga/pgh/qwen_finetuned/omni_2_merged")
tokenizer.save_pretrained("/home/dshs-wallga/pgh/qwen_finetuned/omni_2_merged")


('/home/dshs-wallga/pgh/qwen_finetuned/omni_2_merged/tokenizer_config.json',
 '/home/dshs-wallga/pgh/qwen_finetuned/omni_2_merged/special_tokens_map.json',
 '/home/dshs-wallga/pgh/qwen_finetuned/omni_2_merged/vocab.json',
 '/home/dshs-wallga/pgh/qwen_finetuned/omni_2_merged/merges.txt',
 '/home/dshs-wallga/pgh/qwen_finetuned/omni_2_merged/added_tokens.json',
 '/home/dshs-wallga/pgh/qwen_finetuned/omni_2_merged/tokenizer.json')