# 03장 모델 서능평가.

머신러닝(딥러닝)의 학습이 잘 되어있는지를 평가할 수 있는 여러가지 성과지표로는 정확도, 재현율, 정밀도 F1값 이 있습니다.

이번 장에서는 머신러닝의 성과지표를 각각 코드로 구현하고, 사이킷런 라이브러리를 활용한 분류 리포트를 생성하는 방법을 알아보겠습니다.

In [19]:
import random
from datasets import load_dataset

# IMDB 데이터셋 로드
imdb_dataset = load_dataset("imdb")

# 훈련 및 테스트 데이터 분리
train_iter = imdb_dataset["train"]
test_iter = imdb_dataset["test"]


random.seed(7)

train_list = list(train_iter)
test_list = list(test_iter)
train_lists_small = random.sample(train_list, 1000)
test_lists_small = random.sample(test_list, 1000)

train_texts = []
train_labels = []


for _train in train_lists_small:
    train_labels.append(_train['label'])  # ✅ label을 원래대로 유지
    train_texts.append(_train['text'])

test_texts = []
test_labels = []

for _test in test_lists_small:
    test_labels.append(_test['label'])  # ✅ label을 원래대로 유지
    test_texts.append(_test['text'])


# train_texts = []
# train_labels = []

# for _train in train_lists_small:
#     # IMDB 데이터의 기존 레이블 2를 1로 변경 기존 레이블 1을 0으로 변경
#     train_labels.append(1 if _train['label']==2 else 0)
#     train_texts.append(_train['text'])

# test_texts = []
# test_labels = []
# for _test in test_lists_small:
#     test_labels.append(1 if _test['label']==2 else 0)
#     test_texts.append(_test['text'])



In [20]:
import torch
from transformers import DistilBertTokenizerFast
from sklearn.model_selection import train_test_split

train_texts, val_texts, train_labels,  val_labels = train_test_split(train_texts, train_labels, test_size=.2, random_state=2)

tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
train_encodings = tokenizer(train_texts, truncation=True, padding=True)
val_encodings = tokenizer(val_texts, truncation=True, padding=True)
test_encodings = tokenizer(test_texts, truncation=True, padding=True)

class IMDbDataset(DistilBertTokenizerFast):
    
    def __init__(self, encodings, labels):
        """생성자

        Args:
            encodings (_type_): 초기화
            labels (_type_): 초기화
        """        
        self.encodings = encodings
        self.labels = labels


    def __getitem__(self, idx: int):
        """아이템 반환 매서드

        Args:
            idx (integer): index

        Returns:
            _type_: _description_
        """        
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item
    
    def __len__(self):
        return len(self.labels)

train_dataset = IMDbDataset(train_encodings, train_labels)
val_dataset = IMDbDataset(val_encodings, val_labels)
test_dataset = IMDbDataset(test_encodings, test_labels)

In [21]:
for i in val_dataset:
    print(i['labels'])

tensor(0)
tensor(1)
tensor(1)
tensor(1)
tensor(0)
tensor(0)
tensor(1)
tensor(0)
tensor(1)
tensor(1)
tensor(0)
tensor(1)
tensor(1)
tensor(1)
tensor(0)
tensor(1)
tensor(1)
tensor(1)
tensor(0)
tensor(0)
tensor(0)
tensor(1)
tensor(0)
tensor(0)
tensor(1)
tensor(0)
tensor(0)
tensor(1)
tensor(1)
tensor(1)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(1)
tensor(1)
tensor(1)
tensor(1)
tensor(1)
tensor(0)
tensor(0)
tensor(0)
tensor(1)
tensor(0)
tensor(1)
tensor(1)
tensor(1)
tensor(1)
tensor(0)
tensor(1)
tensor(1)
tensor(0)
tensor(0)
tensor(1)
tensor(1)
tensor(1)
tensor(1)
tensor(1)
tensor(0)
tensor(0)
tensor(1)
tensor(1)
tensor(0)
tensor(1)
tensor(1)
tensor(1)
tensor(1)
tensor(1)
tensor(0)
tensor(1)
tensor(1)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(0)
tensor(1)
tensor(1)
tensor(0)
tensor(1)
tensor(0)
tensor(1)
tensor(1)
tensor(0)
tensor(1)
tensor(1)
tensor(1)
tensor(0)
tensor(0)
tensor(1)
tensor(0)
tensor(0)
tensor(1)
tensor(0)
tensor(1)
tensor(1)


In [22]:
from transformers import TrainingArguments
from transformers import DistilBertForSequenceClassification
from transformers import pipeline

# pipeline.model(DistilBertForSequenceClassification)
model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased")


training_args = TrainingArguments(
    output_dir = './results',
    num_train_epochs=8,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10
)
def test_inference(model, tokenizer):
    input_tokens = tokenizer(["I feel fantastic", "My life is going something wrong",
                            "I'm angry."],
                            truncation=True,
                            padding=True)

    outputs = model(torch.tensor(input_tokens['input_ids']).to(device))

    # Result 딕셔너리
    label_dict = {0: 'positive', 1: 'negative'}


    return [label_dict[i] for i in torch.argmax(outputs['logits'], axis=1).cpu().numpy()]
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model = model.to(device)

from transformers import Trainer


from torch.utils.data import DataLoader
from transformers import DistilBertForSequenceClassification, AdamW
from transformers import DistilBertTokenizerFast


# 1) 사전학습 모델과 토크나이저 불러오기
# 그리고 모델 실행결과에 to(device) 코드 추가
tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
print(tokenizer)
model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased')
model.to(device)

print(test_inference(model, tokenizer))

# 2)DataLoader 인스턴스화
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

# 3)최적화 함수 정의
optim = AdamW(model.parameters(), lr=5e-5)  # 0.0005

# 4)모델을 학습(Train) 모드로 전환
# 이는 드롭아웃 및 배치 정규화에 영향을 미침
model.train()

losses = []

for epoch in range(8):
    print(f'epoch: {epoch}')
    for batch in train_loader:
        # 6)최적화 함수의 기울기(그래디언트) 초기화
        optim.zero_grad()

        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        # 7)모델을 사용한 추론
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)

        # 8) 손실계산
        loss = outputs[0]
        losses.append(loss)

        # 9)오차역전파
        loss.backward()

        # 10)가중치(weight) 업데이트
        optim.step()

# 모델을 eval 모드로 전환
model.eval()

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


DistilBertTokenizerFast(name_or_path='distilbert-base-uncased', vocab_size=30522, model_max_length=512, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=False, added_tokens_decoder={
	0: AddedToken("[PAD]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	100: AddedToken("[UNK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	101: AddedToken("[CLS]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	102: AddedToken("[SEP]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	103: AddedToken("[MASK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}
)


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


['positive', 'positive', 'positive']
epoch: 0




epoch: 1
epoch: 2
epoch: 3
epoch: 4
epoch: 5
epoch: 6
epoch: 7


DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)


정확도 추론 006에서 생성ㅅ된 test_dataset과 문제 013에서 사용한 distilbert 모델을 사용하여
추론을 수행하고 정확도를 구하여라

In [23]:
# 모델을 eval 모드로 전환
model.eval()
# 변수 l으이라는 컨테이너 리스트 생성
l = []

# 반복루프
for test_text in test_texts:
    # 토크나이징을 통한 인코딩
    input_tokens = tokenizer([test_text], truncation=True, padding=True)
    
    # 모델을 사용한 추론
    outputs = model(torch.tensor(input_tokens['input_ids']).to(device))

    # outputs에 담긴 logits값을 기준으로 행 단위로,
    # 즉 입력 문장 단위로 가장 큰 logits값의 인덱스를 출력 후
    # item( )을 사용하여 결과물 텐서의 값을 추출하고 CPU로 전송
    # 이렇게 처리된 값을 입력 문장별로 컨테이너 리스트 l에 하나씩 저장
    l.append(torch.argmax(outputs['logits'], axis=1).item())

    # 변수 correct_cnt 초기값을 0으로 설정
correct_cnt = 0

# 리스트 l과 test_labels를 zip( )을 통해 쌍(pari)으로 묶은 후
# 각기 pred와 ans라는 변수로 추출하고
# zip( )안의 데이터 쌍이 소진될 때까지 if state를 반복하여 계속 실행
for pred, ans in zip(l, test_labels):
    if pred == ans:
        correct_cnt += 1

print(correct_cnt/len(test_labels))

0.86


In [24]:
# 각 변수 초기값을 0으로 설정
tp = 0
tn = 0
fp = 0
fn = 0

# 리스트 l과 test_labels를 zip( )을 통해 쌍으로 묶은 후 pred, ans 변수로 추출
# zip( )안의 데이터 쌍이 소진될 때까지 반복문을 실행
for pred, ans in zip(l, test_labels):

    # tn/tp 계산
    if pred == ans:
        if pred == 1:
            tp += 1
        else:
            tn += 1
    # fn 계산
    elif pred == 0:
        fn += 1

    # fp 계산
    elif pred ==1:
        fp += 1


In [26]:
# 1. 재현율
recall = tp / (tp + fn)
print(recall)

# 2. 정밀도
percision = tp / (tp + fp)
print(percision)

# 3. F1값
f1 = (2 * percision * recall) / (percision + recall)
print(f1)

0.8922413793103449
0.8214285714285714
0.8553719008264462


In [None]:
from sklearn.metrics import classification_report

print(classification_report(test_labels, l))


              precision    recall  f1-score   support

           0       0.90      0.83      0.86       536
           1       0.82      0.89      0.86       464

    accuracy                           0.86      1000
   macro avg       0.86      0.86      0.86      1000
weighted avg       0.86      0.86      0.86      1000

