In [1]:
from google.colab import drive

PATH_DRIVE = '/content/drive'
drive.mount(PATH_DRIVE)

Mounted at /content/drive


In [2]:
import os
import pandas as pd

from datetime import datetime
from pathlib import Path

PATH_BASE = Path(PATH_DRIVE)/'MyDrive'
print(os.listdir(PATH_BASE))

['문서', '사진', '설문', 'drawio', 'Colab Notebooks', 'data', 'models']


In [3]:
! pip3 install -U datasets transformers evaluate sentencepiece

import torch
import datasets

from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, EarlyStoppingCallback
from evaluate import load

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting datasets
  Downloading datasets-2.9.0-py3-none-any.whl (462 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m462.8/462.8 KB[0m [31m18.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting transformers
  Downloading transformers-4.26.0-py3-none-any.whl (6.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m104.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting evaluate
  Downloading evaluate-0.4.0-py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.4/81.4 KB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentencepiece
  Downloading sentencepiece-0.1.97-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m82.8 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess
  Downloading multi

In [4]:
now = datetime.now()
timestamp = datetime.timestamp(now)

NAME_MODEL = 'cross-encoder/nli-deberta-v3-base'
NAME_DIR = f'mednli_deberta_{int(timestamp)}'

PATH_MODELS = PATH_BASE/'models'/NAME_DIR

dataset = datasets.load_dataset(
    'bigbio/mednli', 
    'mednli_source',
    data_dir=PATH_BASE/'data'
)
model = AutoModelForSequenceClassification.from_pretrained(NAME_MODEL)
tokenizer = AutoTokenizer.from_pretrained(NAME_MODEL)

Downloading builder script:   0%|          | 0.00/7.79k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/1.77k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/19.3k [00:00<?, ?B/s]



Downloading and preparing dataset mednli/mednli_source to /root/.cache/huggingface/datasets/bigbio___mednli/mednli_source-61f28563cb5c4805/1.0.0/f18e31046c4bf68bbdc505fbf3dab924a4ea60082a7f41346e3a84b54f453535...


Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Dataset mednli downloaded and prepared to /root/.cache/huggingface/datasets/bigbio___mednli/mednli_source-61f28563cb5c4805/1.0.0/f18e31046c4bf68bbdc505fbf3dab924a4ea60082a7f41346e3a84b54f453535. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/738M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/417 [00:00<?, ?B/s]

Downloading (…)"spm.model";:   0%|          | 0.00/2.46M [00:00<?, ?B/s]

Downloading (…)in/added_tokens.json:   0%|          | 0.00/18.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/156 [00:00<?, ?B/s]



In [5]:
def label_encode(data):
    labels = {'contradiction': 0, 'entailment': 1, 'neutral': 2}
    key = data['gold_label']
    data = {'label': labels[key]}

    return data


def tokenize(data):
    data = tokenizer(
        data['sentence1'],
        data['sentence2'],
        max_length=model.config.max_position_embeddings,
        padding='max_length',
        truncation=True,
        return_tensors='pt'
    )

    return data


def compute_metrics(y):
    logits, labels = y
    preds = logits.argmax(axis=1)
    metrics = 'precision', 'recall', 'f1'
    result = {}

    result.update(
        load('accuracy').compute(
            predictions=preds,
            references=labels
        )
    )

    for metric in metrics:
        result.update(
            load(metric).compute(
                predictions=preds,
                references=labels,
                average='macro'
            )
        )

    return result


In [6]:
cols = [
    'input_ids', 
    'token_type_ids',
    'attention_mask', 
    'label'
]
cols = [col for col in dataset.column_names['test'] if col not in cols]

dataset = dataset.map(label_encode)
dataset = dataset.map(tokenize, batched=True)
dataset = dataset.remove_columns(cols)

  0%|          | 0/11232 [00:00<?, ?ex/s]

  0%|          | 0/1422 [00:00<?, ?ex/s]

  0%|          | 0/1395 [00:00<?, ?ex/s]

  0%|          | 0/12 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

In [7]:
numEpoch = 3
numBatch = 8
numStep = dataset.num_rows['train'] * numEpoch / numBatch

args = TrainingArguments(
    run_name=NAME_DIR,
    num_train_epochs=numEpoch,
    per_device_train_batch_size=numBatch,
    per_device_eval_batch_size=numBatch*4,
    learning_rate=2e-5,
    weight_decay=1e-3,
    warmup_ratio=1e-2,
    warmup_steps=int(numStep/15),
    save_total_limit=3,
    save_steps=100,
    logging_steps=50,
    save_strategy='steps',
    evaluation_strategy='steps',
    output_dir=PATH_MODELS/'weights',
    logging_dir=PATH_MODELS/'logs',
    seed=0,
    metric_for_best_model='f1',
    load_best_model_at_end=True
)
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=dataset['train'],
    eval_dataset=dataset['validation'],
    compute_metrics=compute_metrics,
    callbacks = [EarlyStoppingCallback(early_stopping_patience=3)]
)

In [8]:
if not os.path.exists(PATH_MODELS):
    os.mkdir(PATH_MODELS)

with open(PATH_MODELS/'args.json', 'w', encoding='utf-8') as f:
    text = args.to_json_string()
    f.write(text)

trainer.train()
trainer.save_model(PATH_MODELS/'weights')

***** Running training *****
  Num examples = 11232
  Num Epochs = 3
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 4212
  Number of trainable parameters = 184424451


Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
100,1.05,0.724653,0.784229,0.785132,0.784229,0.78376
200,0.6248,0.503933,0.802151,0.80808,0.802151,0.804136
300,0.5905,0.514262,0.81147,0.811596,0.81147,0.81149
400,0.5573,0.480257,0.81362,0.81264,0.81362,0.812297
500,0.5646,0.442855,0.830108,0.840885,0.830108,0.830765
600,0.5243,0.463941,0.830824,0.832544,0.830824,0.83135
700,0.4877,0.457727,0.833692,0.833971,0.833692,0.833262
800,0.5128,0.417193,0.837993,0.842077,0.837993,0.839111
900,0.4944,0.393531,0.845161,0.84647,0.845161,0.845353
1000,0.4063,0.475387,0.847312,0.849171,0.847312,0.848043


***** Running Evaluation *****
  Num examples = 1395
  Batch size = 32


Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/7.55k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/7.36k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/6.77k [00:00<?, ?B/s]

***** Running Evaluation *****
  Num examples = 1395
  Batch size = 32
Saving model checkpoint to /content/drive/MyDrive/models/mednli_deberta_1675172952/weights/checkpoint-200
Configuration saved in /content/drive/MyDrive/models/mednli_deberta_1675172952/weights/checkpoint-200/config.json
Model weights saved in /content/drive/MyDrive/models/mednli_deberta_1675172952/weights/checkpoint-200/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1395
  Batch size = 32
***** Running Evaluation *****
  Num examples = 1395
  Batch size = 32
Saving model checkpoint to /content/drive/MyDrive/models/mednli_deberta_1675172952/weights/checkpoint-400
Configuration saved in /content/drive/MyDrive/models/mednli_deberta_1675172952/weights/checkpoint-400/config.json
Model weights saved in /content/drive/MyDrive/models/mednli_deberta_1675172952/weights/checkpoint-400/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1395
  Batch size = 32
***** Running Evaluation *****
  Num

In [9]:
result = trainer.evaluate(dataset['test'])
df = pd.DataFrame.from_dict([result])

df.to_csv(PATH_MODELS/'result.csv', index=False, encoding='utf-8')
df

***** Running Evaluation *****
  Num examples = 1422
  Batch size = 32


Unnamed: 0,eval_loss,eval_accuracy,eval_precision,eval_recall,eval_f1,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch
0,0.440221,0.836146,0.83588,0.836146,0.835976,68.3004,20.82,0.659,1.28
