## 모델 테스트: VRAM 16GB 요구

In [None]:
import os
import torch
import pandas as pd
from datasets import load_dataset
from tqdm.auto import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer

In [None]:
model_name = "./results/lr1e-5/checkpoint-315"

model = AutoModelForCausalLM.from_pretrained(model_name, use_cache = False, device_map = "cuda:0", dtype = torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast = True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = "left"

In [None]:
epoch = 35
output_path = os.path.join("inference/lr1e-5", f"epoch_{epoch}_results_3beams.csv")

print(f"\nEpoch {epoch} 종료. 테스트 데이터셋 추론 시작...")

results = []

test_dataset = load_dataset("json", data_files = "data/sft_test_dataset.json", split = "train")

with torch.no_grad():
    with torch.amp.autocast("cuda", dtype=torch.bfloat16):
        for idx in range(test_dataset.num_rows):
            messages = test_dataset[idx]["messages"][:2]
            subject_id = test_dataset[idx]["subject_id"]

            input_ids = tokenizer.apply_chat_template(
                messages,
                add_generation_prompt=True,
                return_tensors="pt"
            ).to(model.device)

            terminators = [
                tokenizer.eos_token_id,
            ]

            outputs = model.generate(
                input_ids,
                max_new_tokens=1024,
                eos_token_id=terminators,
                pad_token_id=tokenizer.eos_token_id,
                do_sample=False,
                num_beams=3
            )
            
            response = outputs[0][input_ids.shape[-1]:]
            generation = tokenizer.decode(response, skip_special_tokens=True)
            results.append({"subject_id": subject_id, "generation": generation})

In [None]:
pd.DataFrame(results).to_csv(output_path, index = False)