In [1]:
from numba import cuda
device = cuda.get_current_device()
device.reset()

In [2]:
!nvidia-smi

Wed Jul 26 01:50:33 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.182.03   Driver Version: 470.182.03   CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   70C    P0    33W /  70W |      3MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------

In [3]:
from datasets import load_dataset, DatasetDict
from transformers import AutoTokenizer


class DataSet():
    def __init__(self, dataset_name, model_name):
        super(DataSet, self).__init__()
        
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        
        dataset = self._set(dataset_name)
        
        self.train = dataset['train']
        self.test = dataset['test']
        self.valid = dataset['valid']
                                                       
    def transform(self, data):
        return self.tokenizer(
            data['document'],
            truncation=True,
            padding='max_length',
            return_token_type_ids=False,
        )
       
        
    def _set(self, dataset_name):
        data = load_dataset(dataset_name)
        train_valid = data['train'].train_test_split(test_size=0.2)
                
        return DatasetDict({
            'train': train_valid['train'],
            'valid': train_valid['test'],
            'test': data['test']
        }).map(self.transform, batched=True)

In [4]:
dataset_name = 'nsmc'
model_name = 'klue/bert-base'
output_dir = './results'
    
dataset = DataSet(dataset_name, model_name)

Using custom data configuration default
Reusing dataset nsmc (/aiffel/.cache/huggingface/datasets/nsmc/default/1.1.0/bfd4729bf1a67114e5267e6916b9e4807010aeb238e4a3c2b95fbfa3a014b5f3)


  0%|          | 0/2 [00:00<?, ?it/s]

Loading cached split indices for dataset at /aiffel/.cache/huggingface/datasets/nsmc/default/1.1.0/bfd4729bf1a67114e5267e6916b9e4807010aeb238e4a3c2b95fbfa3a014b5f3/cache-a3c98fba040dae08.arrow and /aiffel/.cache/huggingface/datasets/nsmc/default/1.1.0/bfd4729bf1a67114e5267e6916b9e4807010aeb238e4a3c2b95fbfa3a014b5f3/cache-4552e18e170dc7b2.arrow
Loading cached processed dataset at /aiffel/.cache/huggingface/datasets/nsmc/default/1.1.0/bfd4729bf1a67114e5267e6916b9e4807010aeb238e4a3c2b95fbfa3a014b5f3/cache-eaa3076a0c27d81a.arrow
Loading cached processed dataset at /aiffel/.cache/huggingface/datasets/nsmc/default/1.1.0/bfd4729bf1a67114e5267e6916b9e4807010aeb238e4a3c2b95fbfa3a014b5f3/cache-4be8ebfa5b267832.arrow
Loading cached processed dataset at /aiffel/.cache/huggingface/datasets/nsmc/default/1.1.0/bfd4729bf1a67114e5267e6916b9e4807010aeb238e4a3c2b95fbfa3a014b5f3/cache-8bd89995cb27b354.arrow


In [5]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import Trainer, TrainingArguments

from sklearn.metrics import accuracy_score


class Classifier():
    def __init__(self, model_name, dataset, training_arguments):
        super(Classifier, self).__init__()
        
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
        
        self.dataset = dataset
        self.trainer = self._set(training_arguments)
    
    
    def compute_metrics(self, pred):
        labels = pred.label_ids
        preds = pred.predictions.argmax(-1)
        return { 'accuracy': accuracy_score(labels, preds) }
    
    
    def _set(self, training_arguments):
        return Trainer(
            model=self.model,           
            args=training_arguments,           
            train_dataset=self.dataset.train,
            eval_dataset=self.dataset.valid,       
            compute_metrics=self.compute_metrics,
        )
    
    
    def fine_tuning(self):
        return self.trainer.train()
        
    
    def evalutate(self):
        return self.trainer.evaluate(self.dataset.test)

## 시도 1

> batch_size 8

In [6]:
try_1 = Classifier(
    model_name,
    dataset,
    TrainingArguments(
        output_dir, 
        evaluation_strategy="epoch",
        learning_rate=2e-5,   
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        num_train_epochs=1,
        warmup_steps=1000, 
        weight_decay=0.01,                 
        fp16=True,
    ))

try_1.fine_tuning()

Some weights of the model checkpoint at klue/bert-base were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized

Epoch,Training Loss,Validation Loss,Accuracy
1,0.2546,0.297956,0.9022


  nn.utils.clip_grad_norm_(
Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin
  nn.utils.clip_grad_norm_(
Saving model checkpoint to ./results/checkpoint-1000
Configuration saved in ./results/checkpoint-1000/config.json
Model weights saved in ./results/checkpoint-1000/pytorch_model.bin
Saving model checkpoint to ./results/checkpoint-1500
Configuration saved in ./results/checkpoint-1500/config.json
Model weights saved in ./results/checkpoint-1500/pytorch_model.bin
Saving model checkpoint to ./results/checkpoint-2000
Configuration saved in ./results/checkpoint-2000/config.json
Model weights saved in ./results/checkpoint-2000/pytorch_model.bin
Saving model checkpoint to ./results/checkpoint-2500
Configuration saved in ./results/checkpoint-2500/config.json
Model weights saved in ./results/checkpoint-2500/pytorch_model.bin
Saving model checkpoint to ./results/check

TrainOutput(global_step=15000, training_loss=0.32372142079671223, metrics={'train_runtime': 7111.5482, 'train_samples_per_second': 16.874, 'train_steps_per_second': 2.109, 'total_flos': 3.15733266432e+16, 'train_loss': 0.32372142079671223, 'epoch': 1.0})

In [7]:
try_1.evalutate()

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: id, document.
***** Running Evaluation *****
  Num examples = 50000
  Batch size = 8


{'eval_loss': 0.2993691861629486,
 'eval_accuracy': 0.90182,
 'eval_runtime': 1894.595,
 'eval_samples_per_second': 26.391,
 'eval_steps_per_second': 3.299,
 'epoch': 1.0}

## 시도 2

> accumulation_steps, gradient_checkpointing 적용

In [8]:
try_2 = Classifier(
    model_name,
    dataset,
    TrainingArguments(
        output_dir, 
        evaluation_strategy="epoch",
        learning_rate=2e-5,   
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        gradient_accumulation_steps=2,
        eval_accumulation_steps=2,
        num_train_epochs=1,
        warmup_steps=1000, 
        weight_decay=0.01,                 
        gradient_checkpointing=True,
        fp16=True,
    ))

try_2.fine_tuning()

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/klue/bert-base/resolve/main/config.json from cache at /aiffel/.cache/huggingface/transformers/fbd0b2ef898c4653902683fea8cc0dd99bf43f0e082645b913cda3b92429d1bb.99b3298ed554f2ad731c27cdb11a6215f39b90bc845ff5ce709bb4e74ba45621
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token

Epoch,Training Loss,Validation Loss,Accuracy
1,0.2402,0.25193,0.902667


  nn.utils.clip_grad_norm_(
Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin
  nn.utils.clip_grad_norm_(
Saving model checkpoint to ./results/checkpoint-1000
Configuration saved in ./results/checkpoint-1000/config.json
Model weights saved in ./results/checkpoint-1000/pytorch_model.bin
Saving model checkpoint to ./results/checkpoint-1500
Configuration saved in ./results/checkpoint-1500/config.json
Model weights saved in ./results/checkpoint-1500/pytorch_model.bin
Saving model checkpoint to ./results/checkpoint-2000
Configuration saved in ./results/checkpoint-2000/config.json
Model weights saved in ./results/checkpoint-2000/pytorch_model.bin
Saving model checkpoint to ./results/checkpoint-2500
Configuration saved in ./results/checkpoint-2500/config.json
Model weights saved in ./results/checkpoint-2500/pytorch_model.bin
  nn.utils.clip_grad_norm_(
Saving model c

TrainOutput(global_step=7500, training_loss=0.29642653605143227, metrics={'train_runtime': 8343.3863, 'train_samples_per_second': 14.383, 'train_steps_per_second': 0.899, 'total_flos': 3.15733266432e+16, 'train_loss': 0.29642653605143227, 'epoch': 1.0})

In [9]:
try_2.evalutate()

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: id, document.
***** Running Evaluation *****
  Num examples = 50000
  Batch size = 8


{'eval_loss': 0.25194472074508667,
 'eval_accuracy': 0.90088,
 'eval_runtime': 1896.8296,
 'eval_samples_per_second': 26.36,
 'eval_steps_per_second': 3.295,
 'epoch': 1.0}

## 시도 3

> group_by_length 적용

In [None]:
try_3 = Classifier(
    model_name,
    dataset,
    TrainingArguments(
        output_dir, 
        evaluation_strategy="epoch",
        learning_rate=2e-5,   
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
#         gradient_accumulation_steps=2,
#         eval_accumulation_steps=2,
        num_train_epochs=1,
        warmup_steps=1000, 
        weight_decay=0.01,                 
#         gradient_checkpointing=True,
        fp16=True,
        group_by_length=True
    ))

try_3.fine_tuning()

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/klue/bert-base/resolve/main/config.json from cache at /aiffel/.cache/huggingface/transformers/fbd0b2ef898c4653902683fea8cc0dd99bf43f0e082645b913cda3b92429d1bb.99b3298ed554f2ad731c27cdb11a6215f39b90bc845ff5ce709bb4e74ba45621
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token

Epoch,Training Loss,Validation Loss


  nn.utils.clip_grad_norm_(


In [None]:
try_3.evalutate()