In [1]:
%pip install transformers datasets evaluate accelerate scikit-learn sacrebleu sentencepiece

Note: you may need to restart the kernel to use updated packages.


In [2]:
# Python 내장 라이브러리로, 무작위(randomness)를 생성하기 위해 사용
import random
# HuggingFace에서 제공하며, NLP 작업의 평가 지표를 계산하는 데 사용
import evaluate
# 다차원 배열 객체와 이를 처리하기 위한 다양한 수학적 기능을 제공, 주로 수치 계산이나 데이터 분석에 사용
import numpy as np

In [3]:
from datasets import load_dataset
from sklearn.model_selection import train_test_split

# MNLI 데이터셋 로드
dataset = load_dataset("nyu-mll/glue", "mnli")

# Train split에서 Validation 데이터 분리
train_data = dataset["train"]
train_data = train_data.train_test_split(test_size=0.1, seed=42)  # 10%를 validation으로 사용
train_dataset = train_data["train"]
validation_dataset = train_data["test"]

from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments

# Tokenizer 및 모델 로드
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=3)

# 데이터 전처리
def preprocess_function(examples):
    return tokenizer(examples["premise"], examples["hypothesis"], truncation=True, padding="max_length", max_length=128)

encoded_train = train_dataset.map(preprocess_function, batched=True)
encoded_validation = validation_dataset.map(preprocess_function, batched=True)
encoded_test = dataset["validation_matched"].map(preprocess_function, batched=True)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
train_dataset[0]

{'premise': 'Afterwards you retire to the changing room for tea or a drink, feeling completely relaxed and rejuvenated.',
 'hypothesis': 'Retiring to the changing room with a drink can be exceedingly stressful. ',
 'label': 2,
 'idx': 99748}

In [5]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    save_total_limit=2,
    load_best_model_at_end=True,
    fp16=False,  # 반드시 False로 설정
    logging_dir="./logs",
    logging_steps=10,
)



In [6]:
import evaluate

accuracy = evaluate.load("accuracy")

def compute_metrics(pred):
    predictions, labels = pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

In [7]:
from transformers import EarlyStoppingCallback

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_train,
    eval_dataset=encoded_validation,
)

In [8]:
trainer.train()

  0%|          | 0/66270 [00:00<?, ?it/s]

{'loss': 1.1691, 'grad_norm': 5.249472618103027, 'learning_rate': 1.9996982043156785e-05, 'epoch': 0.0}
{'loss': 1.1687, 'grad_norm': 7.865017890930176, 'learning_rate': 1.9993964086313566e-05, 'epoch': 0.0}
{'loss': 1.1507, 'grad_norm': 8.07104778289795, 'learning_rate': 1.999094612947035e-05, 'epoch': 0.0}
{'loss': 1.1249, 'grad_norm': 5.507595062255859, 'learning_rate': 1.9987928172627133e-05, 'epoch': 0.0}
{'loss': 1.0929, 'grad_norm': 3.3424878120422363, 'learning_rate': 1.9984910215783916e-05, 'epoch': 0.0}
{'loss': 1.0805, 'grad_norm': 6.940298557281494, 'learning_rate': 1.9981892258940697e-05, 'epoch': 0.0}
{'loss': 1.0632, 'grad_norm': 5.082416534423828, 'learning_rate': 1.997887430209748e-05, 'epoch': 0.0}
{'loss': 1.0676, 'grad_norm': 7.014342784881592, 'learning_rate': 1.9975856345254264e-05, 'epoch': 0.0}
{'loss': 1.0472, 'grad_norm': 7.206923007965088, 'learning_rate': 1.9972838388411048e-05, 'epoch': 0.0}
{'loss': 1.0111, 'grad_norm': 6.508641719818115, 'learning_rate': 

  0%|          | 0/2455 [00:00<?, ?it/s]

{'eval_loss': 0.43437081575393677, 'eval_runtime': 784.1518, 'eval_samples_per_second': 50.081, 'eval_steps_per_second': 3.131, 'epoch': 1.0}
{'loss': 0.3426, 'grad_norm': 9.918214797973633, 'learning_rate': 1.3330315376490117e-05, 'epoch': 1.0}
{'loss': 0.2901, 'grad_norm': 6.6270012855529785, 'learning_rate': 1.33272974196469e-05, 'epoch': 1.0}
{'loss': 0.3553, 'grad_norm': 2.1108264923095703, 'learning_rate': 1.3324279462803683e-05, 'epoch': 1.0}
{'loss': 0.384, 'grad_norm': 7.581256866455078, 'learning_rate': 1.3321261505960466e-05, 'epoch': 1.0}
{'loss': 0.281, 'grad_norm': 9.952714920043945, 'learning_rate': 1.331824354911725e-05, 'epoch': 1.0}
{'loss': 0.2511, 'grad_norm': 8.18824291229248, 'learning_rate': 1.3315225592274032e-05, 'epoch': 1.0}
{'loss': 0.3881, 'grad_norm': 10.780470848083496, 'learning_rate': 1.3312207635430814e-05, 'epoch': 1.0}
{'loss': 0.3264, 'grad_norm': 12.03936767578125, 'learning_rate': 1.3309189678587597e-05, 'epoch': 1.0}
{'loss': 0.2903, 'grad_norm':

  0%|          | 0/2455 [00:00<?, ?it/s]

{'eval_loss': 0.450939416885376, 'eval_runtime': 783.5353, 'eval_samples_per_second': 50.12, 'eval_steps_per_second': 3.133, 'epoch': 2.0}
{'loss': 0.1579, 'grad_norm': 11.390565872192383, 'learning_rate': 6.66364870982345e-06, 'epoch': 2.0}
{'loss': 0.2314, 'grad_norm': 4.810942649841309, 'learning_rate': 6.660630752980233e-06, 'epoch': 2.0}
{'loss': 0.2078, 'grad_norm': 7.742758274078369, 'learning_rate': 6.657612796137016e-06, 'epoch': 2.0}
{'loss': 0.1463, 'grad_norm': 6.185676097869873, 'learning_rate': 6.654594839293799e-06, 'epoch': 2.0}
{'loss': 0.2356, 'grad_norm': 4.631962776184082, 'learning_rate': 6.6515768824505815e-06, 'epoch': 2.0}
{'loss': 0.2242, 'grad_norm': 9.778562545776367, 'learning_rate': 6.648558925607364e-06, 'epoch': 2.0}
{'loss': 0.2055, 'grad_norm': 10.022688865661621, 'learning_rate': 6.645540968764147e-06, 'epoch': 2.0}
{'loss': 0.2246, 'grad_norm': 10.732551574707031, 'learning_rate': 6.642523011920931e-06, 'epoch': 2.0}
{'loss': 0.216, 'grad_norm': 0.949

  0%|          | 0/2455 [00:00<?, ?it/s]

{'eval_loss': 0.5667970180511475, 'eval_runtime': 783.8685, 'eval_samples_per_second': 50.099, 'eval_steps_per_second': 3.132, 'epoch': 3.0}
{'train_runtime': 77054.7219, 'train_samples_per_second': 13.76, 'train_steps_per_second': 0.86, 'train_loss': 0.3579422376769978, 'epoch': 3.0}


TrainOutput(global_step=66270, training_loss=0.3579422376769978, metrics={'train_runtime': 77054.7219, 'train_samples_per_second': 13.76, 'train_steps_per_second': 0.86, 'total_flos': 6.974432875576858e+16, 'train_loss': 0.3579422376769978, 'epoch': 3.0})

In [9]:
trainer.evaluate(encoded_test)

  0%|          | 0/614 [00:00<?, ?it/s]

{'eval_loss': 0.4423043131828308,
 'eval_runtime': 197.8466,
 'eval_samples_per_second': 49.609,
 'eval_steps_per_second': 3.103,
 'epoch': 3.0}

In [10]:
trainer.save_model("./model")