In [1]:
from google.colab import drive

PATH_DRIVE = '/content/drive'
drive.mount(PATH_DRIVE)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import os
import pandas as pd

from datetime import datetime
from pathlib import Path

PATH_BASE = Path(PATH_DRIVE)/'MyDrive'
print(os.listdir(PATH_BASE))

['Colab Notebooks', 'data', 'Projects', 'models']


In [3]:
! pip3 install -U datasets transformers evaluate sentencepiece

# import torch
import datasets

from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, EarlyStoppingCallback
from evaluate import load

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [4]:
now = datetime.now()
timestamp = datetime.timestamp(now)

NAME_MODEL = 'roberta-base'
NAME_DIR = f"mednli_{NAME_MODEL.split('/')[-1]}_{int(timestamp)}"

PATH_MODELS = PATH_BASE/'models'/NAME_DIR

dataset = datasets.load_dataset(
    'bigbio/mednli', 
    'mednli_source',
    data_dir=PATH_BASE/'data'
)
model = AutoModelForSequenceClassification.from_pretrained(NAME_MODEL, num_labels=3)
tokenizer = AutoTokenizer.from_pretrained(NAME_MODEL)



  0%|          | 0/3 [00:00<?, ?it/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'roberta.pooler.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifi

In [5]:
def label_encode(data):
    labels = {'contradiction': 0, 'entailment': 1, 'neutral': 2}
    key = data['gold_label']
    data = {'label': labels[key]}

    return data


def tokenize(data):
    data = tokenizer(
        data['sentence1'],
        data['sentence2'],
        max_length=model.config.max_position_embeddings,
        padding='max_length',
        truncation=True,
        return_tensors='pt'
    )

    return data


def compute_metrics(y):
    logits, labels = y
    preds = logits.argmax(axis=1)
    metrics = 'precision', 'recall', 'f1'
    result = {}

    result.update(
        load('accuracy').compute(
            predictions=preds,
            references=labels
        )
    )

    for metric in metrics:
        result.update(
            load(metric).compute(
                predictions=preds,
                references=labels, 
                average='macro'
            )
        )

    return result


In [6]:
cols = [
    'input_ids', 
    'token_type_ids',
    'attention_mask', 
    'label'
]
cols = [col for col in dataset.column_names['test'] if col not in cols]

dataset = dataset.map(label_encode)
dataset = dataset.map(tokenize, batched=True)
dataset = dataset.remove_columns(cols)
dataset = dataset.shuffle(seed=23)



  0%|          | 0/12 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

In [7]:
numEpoch = 4
numBatch = 16
numStep = dataset.num_rows['train'] * numEpoch / numBatch

args = TrainingArguments(
    run_name=NAME_DIR,
    num_train_epochs=numEpoch,
    per_device_train_batch_size=numBatch,
    per_device_eval_batch_size=numBatch*4,
    gradient_accumulation_steps=2,
    # eval_accumulation_steps=2,
    learning_rate=2e-5,
    weight_decay=1e-1,
    warmup_ratio=5e-2,
    warmup_steps=int(numStep/20),
    save_total_limit=3,
    save_steps=200,
    logging_steps=50,
    save_strategy='steps',
    evaluation_strategy='steps',
    output_dir=PATH_MODELS/'weights',
    logging_dir=PATH_MODELS/'logs',
    seed=23,
    metric_for_best_model='f1',
    load_best_model_at_end=True
)
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=dataset['train'],
    eval_dataset=dataset['validation'],
    compute_metrics=compute_metrics,
    # callbacks = [EarlyStoppingCallback(early_stopping_patience=5)]
)

In [8]:
if not os.path.exists(PATH_MODELS):
    os.mkdir(PATH_MODELS)

with open(PATH_MODELS/'args.json', 'w', encoding='utf-8') as f:
    text = args.to_json_string()
    f.write(text)

trainer.train()
trainer.save_model(PATH_MODELS/'weights')

***** Running training *****
  Num examples = 11232
  Num Epochs = 4
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 2
  Total optimization steps = 1404
  Number of trainable parameters = 124647939


Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
50,1.1033,1.098194,0.333333,0.111111,0.333333,0.166667
100,1.1033,1.093572,0.333333,0.111111,0.333333,0.166667
150,0.9541,0.780788,0.658065,0.660094,0.658065,0.658258
200,0.7787,0.665653,0.724014,0.725159,0.724014,0.724434
250,0.6769,0.625388,0.726165,0.750888,0.726165,0.724533
300,0.6519,0.593243,0.74552,0.750626,0.74552,0.744872
350,0.628,0.585533,0.758423,0.759705,0.758423,0.75819
400,0.5653,0.58469,0.753405,0.76067,0.753405,0.7491
450,0.5166,0.541337,0.78638,0.786525,0.78638,0.785468
500,0.5592,0.511967,0.791398,0.796128,0.791398,0.792852


***** Running Evaluation *****
  Num examples = 1395
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))
***** Running Evaluation *****
  Num examples = 1395
  Batch size = 64
  _warn_prf(average, modifier, msg_start, len(result))
***** Running Evaluation *****
  Num examples = 1395
  Batch size = 64
***** Running Evaluation *****
  Num examples = 1395
  Batch size = 64
Saving model checkpoint to /content/drive/MyDrive/models/mednli_roberta-base_1675647475/weights/checkpoint-200
Configuration saved in /content/drive/MyDrive/models/mednli_roberta-base_1675647475/weights/checkpoint-200/config.json
Model weights saved in /content/drive/MyDrive/models/mednli_roberta-base_1675647475/weights/checkpoint-200/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1395
  Batch size = 64
***** Running Evaluation *****
  Num examples = 1395
  Batch size = 64
***** Running Evaluation *****
  Num examples = 1395
  Batch size = 64
***** Running Evaluation *****
  Num e

In [9]:
result = trainer.evaluate(dataset['test'])
df = pd.DataFrame.from_dict([result])

df.to_csv(PATH_MODELS/'result.csv', index=False, encoding='utf-8')
df

***** Running Evaluation *****
  Num examples = 1422
  Batch size = 64


Unnamed: 0,eval_loss,eval_accuracy,eval_precision,eval_recall,eval_f1,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch
0,0.576671,0.793952,0.794957,0.793952,0.794118,52.3543,27.161,0.439,4.0
