# 시드 통일하기

In [1]:
import torch
from transformers import set_seed


set_seed(42)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

# 모델 불러오기

In [2]:
model_config = {
    "num_labels": 3,
    "id2label": {0: 0, 1: 1, 2: 2},
    "label2id": {0: 0, 1: 1, 2: 2}
}

In [3]:
from transformers import BertForSequenceClassification
from tokenization_kobert import KoBertTokenizer


model = BertForSequenceClassification.from_pretrained("monologg/kobert", **model_config)
tokenizer = KoBertTokenizer.from_pretrained("monologg/kobert", model_max_length=512)

# from transformers import ElectraForSequenceClassification, ElectraTokenizer


# model = ElectraForSequenceClassification.from_pretrained("monologg/koelectra-base-v3-discriminator", **model_config)
# tokenizer = ElectraTokenizer.from_pretrained("monologg/koelectra-base-v3-discriminator", model_max_length=512)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at monologg/kobert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'BertTokenizer'. 
The class this function is called from is 'KoBertTokenizer'.


# 하이퍼 파라미터 설정

In [18]:
num_classes = 3

# 원하는 대로 고쳐서 사용
batch_size = 64
lr = 5e-5  # 0.00005
epochs = 150

In [5]:
# 옵티마이저와 스케줄러
# 원하는 대로 고쳐서 사용

# from torch.optim import AdamW
# from transformers import AdamW, get_linear_schedule_with_warmup


# LENGTH_OF_TRAIN_DATA = 40242
# num_training_steps = ((LENGTH_OF_TRAIN_DATA - 1) // batch_size + 1) * epochs
# optimizer = AdamW(model.parameters(), lr=lr)
# scheduler = get_linear_schedule_with_warmup(optimizer, int(num_training_steps * 0.1), num_training_steps)

# 만약 이 스케줄러를 사용할 경우,
# optimizer.step() 바로 다음에
# scheduler.step()을 호출해야 함

# f1 score 계산하기

In [6]:
# 1. torchmetrics 사용
# requirements에 넣어놨으니 따로 설치할 필요 없음

# from torchmetrics import F1


# f1_score = F1(num_classes=num_classes)

# # 검증 단계에서 사용
# for inputs, labels in val_loader:
#     # 대충 코드
#     output = model(inputs)  # 대충 아웃풋
#     pred = torch.argmax(output, dim=1)
#     batch_f1 = f1_score(pred, labels)
#     print(batch_f1)

# f1 = f1_score.compute()
# print(f1)

In [7]:
# 2. datasets 사용
# requirements에 넣어놨으니 따로 설치할 필요 없음

# from datasets import load_metric


# f1_score = load_metric("f1")

# for inputs, labels in val_loader:
#     # 대충 코드
#     output = model(inputs)
#     pred = torch.argmax(output, dim=1)
#     f1_score.add_batch(predictions=pred, references=labels)

# f1 = f1_score.compute()
# print(f1)

# 훈련

In [8]:
import numpy as np
from sklearn.metrics import accuracy_score, f1_score


def compute_metrics(p):
    pred, labels = p
    pred = np.argmax(pred, axis=1)

    accuracy = accuracy_score(y_true=labels, y_pred=pred)
    f1 = f1_score(y_true=labels, y_pred=pred, average="macro")

    return {"accuracy": accuracy, "f1": f1}

In [9]:
from data_loader import get_data_loaders


train_data, val_data, test_data = get_data_loaders(tokenizer, return_loader=False)

Using custom data configuration default-39d2926b465b3552
Reusing dataset csv (C:\Users\cglab\.cache\huggingface\datasets\csv\default-39d2926b465b3552\0.0.0\9144e0a4e8435090117cea53e6c7537173ef2304525df4a077c435d8ee7828ff)
Loading cached processed dataset at C:\Users\cglab\.cache\huggingface\datasets\csv\default-39d2926b465b3552\0.0.0\9144e0a4e8435090117cea53e6c7537173ef2304525df4a077c435d8ee7828ff\cache-9f07b4232fde7889.arrow
Loading cached processed dataset at C:\Users\cglab\.cache\huggingface\datasets\csv\default-39d2926b465b3552\0.0.0\9144e0a4e8435090117cea53e6c7537173ef2304525df4a077c435d8ee7828ff\cache-c31526eb1e815f72.arrow
Loading cached processed dataset at C:\Users\cglab\.cache\huggingface\datasets\csv\default-39d2926b465b3552\0.0.0\9144e0a4e8435090117cea53e6c7537173ef2304525df4a077c435d8ee7828ff\cache-88634bd530dd35db.arrow


In [10]:
model_name = "KoBERT"
save_path = f"{model_name}"

In [20]:
from transformers import TrainingArguments, EarlyStoppingCallback

from trainer import ImbalancedSamplerTrainer


args = TrainingArguments(
    output_dir=save_path,
    overwrite_output_dir=True,
    evaluation_strategy="steps",
    num_train_epochs=150,
    logging_steps=1000,
    save_steps=1000,
    seed=42,
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    greater_is_better=True,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    warmup_ratio=0.1,
    adafactor=True,
    lr_scheduler_type="cosine_with_restarts",
    learning_rate=1e-4,
)

trainer = ImbalancedSamplerTrainer(
    model=model,
    args=args,
    train_dataset=train_data,
    eval_dataset=val_data,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=20)],
)

using `logging_steps` to initialize `eval_steps` to 1000
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [12]:
trainer.train()

***** Running training *****
  Num examples = 40242
  Num Epochs = 15
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 37740


Step,Training Loss,Validation Loss,Accuracy,F1
1000,0.7767,0.403411,0.841011,0.61981
2000,0.4538,0.404911,0.834526,0.58987
3000,0.3751,0.474701,0.828712,0.642871
4000,0.3458,0.29291,0.89356,0.677131
5000,0.339,0.542163,0.829383,0.606952
6000,0.2906,0.456177,0.868962,0.685099
7000,0.2522,0.426785,0.891995,0.683266
8000,0.2565,0.515162,0.845259,0.661122
9000,0.219,0.405914,0.890206,0.696532
10000,0.193,0.523567,0.886852,0.709768


***** Running Evaluation *****
  Num examples = 4472
  Batch size = 16
Saving model checkpoint to KoBERT\checkpoint-1000
Configuration saved in KoBERT\checkpoint-1000\config.json
Model weights saved in KoBERT\checkpoint-1000\pytorch_model.bin
***** Running Evaluation *****
  Num examples = 4472
  Batch size = 16
Saving model checkpoint to KoBERT\checkpoint-2000
Configuration saved in KoBERT\checkpoint-2000\config.json
Model weights saved in KoBERT\checkpoint-2000\pytorch_model.bin
***** Running Evaluation *****
  Num examples = 4472
  Batch size = 16
Saving model checkpoint to KoBERT\checkpoint-3000
Configuration saved in KoBERT\checkpoint-3000\config.json
Model weights saved in KoBERT\checkpoint-3000\pytorch_model.bin
***** Running Evaluation *****
  Num examples = 4472
  Batch size = 16
Saving model checkpoint to KoBERT\checkpoint-4000
Configuration saved in KoBERT\checkpoint-4000\config.json
Model weights saved in KoBERT\checkpoint-4000\pytorch_model.bin
***** Running Evaluation ***

TrainOutput(global_step=32000, training_loss=0.1864209749698639, metrics={'train_runtime': 20590.5299, 'train_samples_per_second': 29.316, 'train_steps_per_second': 1.833, 'total_flos': 1.346698668225577e+17, 'train_loss': 0.1864209749698639, 'epoch': 12.72})

In [12]:
trainer.train(resume_from_checkpoint=True)

Loading model from KoBERT\checkpoint-32000).
***** Running training *****
  Num examples = 40242
  Num Epochs = 15
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 37740
  Continuing training from checkpoint, will skip to saved global_step
  Continuing training from epoch 12
  Continuing training from global step 32000
  Will skip the first 12 epochs then the first 1808 batches in the first epoch. If this takes a lot of time, you can add the `--ignore_data_skip` flag to your launch command, but you will resume the training on data already seen by your model.


  0%|          | 0/1808 [00:00<?, ?it/s]

Didn't find an RNG file, if you are resuming a training that was launched in a distributed fashion, reproducibility is not guaranteed.


Step,Training Loss,Validation Loss,Accuracy,F1
33000,0.0417,0.505744,0.92576,0.73109
34000,0.0343,0.502743,0.926655,0.738556
35000,0.0286,0.504852,0.925984,0.735917
36000,0.0445,0.498505,0.926431,0.733765
37000,0.0379,0.500769,0.92576,0.733335


***** Running Evaluation *****
  Num examples = 4472
  Batch size = 16
Saving model checkpoint to KoBERT\checkpoint-33000
Configuration saved in KoBERT\checkpoint-33000\config.json
Model weights saved in KoBERT\checkpoint-33000\pytorch_model.bin
***** Running Evaluation *****
  Num examples = 4472
  Batch size = 16
Saving model checkpoint to KoBERT\checkpoint-34000
Configuration saved in KoBERT\checkpoint-34000\config.json
Model weights saved in KoBERT\checkpoint-34000\pytorch_model.bin
***** Running Evaluation *****
  Num examples = 4472
  Batch size = 16
Saving model checkpoint to KoBERT\checkpoint-35000
Configuration saved in KoBERT\checkpoint-35000\config.json
Model weights saved in KoBERT\checkpoint-35000\pytorch_model.bin
***** Running Evaluation *****
  Num examples = 4472
  Batch size = 16
Saving model checkpoint to KoBERT\checkpoint-36000
Configuration saved in KoBERT\checkpoint-36000\config.json
Model weights saved in KoBERT\checkpoint-36000\pytorch_model.bin
***** Running Ev

TrainOutput(global_step=37740, training_loss=0.005669276374448969, metrics={'train_runtime': 3709.028, 'train_samples_per_second': 162.746, 'train_steps_per_second': 10.175, 'total_flos': 1.5882315234315264e+17, 'train_loss': 0.005669276374448969, 'epoch': 15.0})

In [21]:
trainer.train(resume_from_checkpoint=True)

Loading model from KoBERT\checkpoint-37000).
***** Running training *****
  Num examples = 40242
  Num Epochs = 150
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 377400
  Continuing training from checkpoint, will skip to saved global_step
  Continuing training from epoch 14
  Continuing training from global step 37000
  Will skip the first 14 epochs then the first 1776 batches in the first epoch. If this takes a lot of time, you can add the `--ignore_data_skip` flag to your launch command, but you will resume the training on data already seen by your model.


  0%|          | 0/1776 [00:00<?, ?it/s]

Didn't find an RNG file, if you are resuming a training that was launched in a distributed fashion, reproducibility is not guaranteed.


Step,Training Loss,Validation Loss,Accuracy,F1
38000,0.1648,0.639004,0.901163,0.723148
39000,0.1519,0.470221,0.919499,0.729682
40000,0.1939,0.448014,0.900492,0.708161
41000,0.1525,0.422228,0.921735,0.73741
42000,0.1368,0.393923,0.924195,0.736866
43000,0.7001,1.40279,0.019902,0.013009
44000,1.1316,1.147557,0.117844,0.070281
45000,1.1227,1.112371,0.019902,0.013009


***** Running Evaluation *****
  Num examples = 4472
  Batch size = 16
Saving model checkpoint to KoBERT\checkpoint-38000
Configuration saved in KoBERT\checkpoint-38000\config.json
Model weights saved in KoBERT\checkpoint-38000\pytorch_model.bin
***** Running Evaluation *****
  Num examples = 4472
  Batch size = 16
Saving model checkpoint to KoBERT\checkpoint-39000
Configuration saved in KoBERT\checkpoint-39000\config.json
Model weights saved in KoBERT\checkpoint-39000\pytorch_model.bin
***** Running Evaluation *****
  Num examples = 4472
  Batch size = 16
Saving model checkpoint to KoBERT\checkpoint-40000
Configuration saved in KoBERT\checkpoint-40000\config.json
Model weights saved in KoBERT\checkpoint-40000\pytorch_model.bin
***** Running Evaluation *****
  Num examples = 4472
  Batch size = 16
Saving model checkpoint to KoBERT\checkpoint-41000
Configuration saved in KoBERT\checkpoint-41000\config.json
Model weights saved in KoBERT\checkpoint-41000\pytorch_model.bin
***** Running Ev

KeyboardInterrupt: 

In [22]:
trainer.save_model(output_dir=save_path)

Saving model checkpoint to KoBERT
Configuration saved in KoBERT\config.json
Model weights saved in KoBERT\pytorch_model.bin


In [23]:
trainer.save_state()