## Model Inference

In [1]:
import os,torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig,HfArgumentParser,TrainingArguments,pipeline, logging
# 디바이스 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 출력
print('Device:', device)  # 출력결과: cuda
print('Count of using GPUs:', torch.cuda.device_count())   #출력결과: 1 (GPU #2 한개 사용하므로)
print('Current cuda device:', torch.cuda.current_device())  # 출력결과: 0 (수정 필요)

Device: cuda
Count of using GPUs: 4
Current cuda device: 0


In [2]:
new_model= "Coldbrew9/Edentns-DataVortexS-trainWithCombi"
bnb_config = BitsAndBytesConfig(  
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.bfloat16,
    bnb_4bit_use_double_quant= False,
)
tokenizer = AutoTokenizer.from_pretrained(new_model, padding_side="left")
model = AutoModelForCausalLM.from_pretrained(
        new_model,
        quantization_config=bnb_config,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        trust_remote_code=True,
    )
model.config.use_cache = True

Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

In [3]:
import pandas as pd
from tqdm import tqdm
test = pd.read_csv('./test.csv')

batch_size=8

pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, 
                temperature=0.1,
                # top_k=1,
                # top_p=0.9,
                # repetition_penalty=1.2,
                do_sample=True,
                # num_return_sequences=1,
                #max_length=300,
                max_new_tokens=300,
                batch_size=batch_size,
               )
preds = []
for i in tqdm(range(0, len(test), batch_size)):
    batch_questions = test['질문'][i:i+batch_size].tolist()
    batch_prompts = [f"<s> Question: {q} Answer: </s>" for q in batch_questions]
    batch_results = pipe(batch_prompts)

    for batch_item in batch_results:
        for result in batch_item:  # 배치 결과 내의 각 결과 아이템을 순회
            parsed_ans = result['generated_text']
            preds.append(parsed_ans)

# 결과 출력
for i, (question, ans) in enumerate(zip(test['질문'], preds)):
    print(f"[ {i}번 ] 질문: {question}")
    print(f"[ 답변 ] {ans}\n")

print("총 생성된 답변 개수 : ", len(preds))

  0%|                                                                                            | 0/17 [00:00<?, ?it/s]A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
  6%|████▊                                                                            | 1/17 [08:24<2:14:27, 504.19s/it]A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
 12%|█████████▌                                                                       | 2/17 [16:18<2:01:38, 486.56s/it]A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
 18%|██████████████▎                                                                  | 3/17 [24:25<1:53:33, 486.67s/it]A

[ 0번 ] 질문: 방청 페인트의 종류에는 어떤 것들이 있는지 알고 계신가요? 또한, 원목사이딩을 사용하는 것에 어떤 단점이 있을까요?
[ 답변 ] <s> Question: 방청 페인트의 종류에는 어떤 것들이 있는지 알고 계신가요? 또한, 원목사이딩을 사용하는 것에 어떤 단점이 있을까요? Answer: </s> 방청 페인트의 종류로는 주로 방청 페인트, 방청 페인트 왁스, 방청 페인트 에나멜, 방청 페인트 에폭시, 방청 페인트 폴리우레탄 등이 있습니다. 이러한 종류의 방청 페인트는 각각의 특성과 적용 용도에 따라 선택됩니다. 그리고 원목사이딩의 단점은 주로 비용이 많이 들고 유지보수가 어렵다는 점입니다. 또한, 원목사이딩은 습기에 취약하여 변형이 발생할 수 있으며, 내구성이 다른 외장재에 비해 낮다는 점도 단점으로 꼽힙니다. 이러한 단점들을 고려하여 원목사이딩을 선택할 때는 신중한 고려가 필요합니다. 

[ 1번 ] 질문: 도배지에 녹은 자국이 발생하는 주된 원인과 그 해결 방법은 무엇인가요?
[ 답변 ] <s> Question: 도배지에 녹은 자국이 발생하는 주된 원인과 그 해결 방법은 무엇인가요? Answer: </s> 도배지에 녹은 자국이 발생하는 원인은 주로 높은 습도와 높은 온도입니다. 습기가 높을 때 도배지 안쪽의 녹이 녹은 상태로 변하여 도배지 표면에 자국이 생길 수 있습니다. 이러한 상황에서는 습도와 온도를 적절히 조절하고, 환기를 통해 실내 습도를 낮추는 것이 중요합니다. 또한, 녹은 자국이 발생한 부분을 깔끔하게 청소하고 건조시킨 후 재작업을 하는 것이 좋습니다. 그리고 녹은 자국이 발생한 부분을 깔끔하게 청소하고 건조시킨 후 재작업을 하는 것이 좋습니다. 그리고 ��

[ 2번 ] 질문: 큐블럭의 단점을 알려주세요. 또한, 압출법 단열판을 사용하는 것의 장점은 무엇인가요?
[ 답변 ] <s> Question: 큐블럭의 단점을 알려주세요. 또한, 압출법 단열판을 사용하는 것의 장점은 무엇인가요? Answer: </s> 큐블럭은 시공이 어렵고 


