#BERT-large

In [None]:
!pip install transformers datasets evaluate

In [None]:
from datasets import load_dataset

#load the preprocessed dataset
dataset = load_dataset('csv', data_files={'train': 'df_train.csv',
                                          'val': 'df_valid.csv',
                                              'test': 'df_test.csv'})

Downloading and preparing dataset csv/default to /root/.cache/huggingface/datasets/csv/default-2cec45cfe0e5b614/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1...


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating val split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Dataset csv downloaded and prepared to /root/.cache/huggingface/datasets/csv/default-2cec45cfe0e5b614/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
#import the bert large tokenizer
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("bert-large-cased")

Downloading (…)okenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/762 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

In [None]:
#preprocess function to tokenize the text
def preprocess_function(examples):
    return tokenizer(examples['text'], padding='max_length',
                                max_length=128,
                                truncation=True,
                                return_tensors="pt")

In [None]:
#dictionaries to map ids to labels and vice versa

id2label = {0: "phrase", 1: "passage", 2: "multi"}
label2id = {"phrase": 0, "passage": 1, "multi": 2}

In [None]:
#import the bert large model and pass it to classification model of huggingface

from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer

model = AutoModelForSequenceClassification.from_pretrained(
    "bert-large-cased", num_labels=3, id2label=id2label, label2id=label2id
).to("cuda")

Downloading pytorch_model.bin:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-large-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at 

In [None]:
#tokenize the data in train, validation and test sets
tokenized_data = dataset.map(preprocess_function, batched=True)

Map:   0%|          | 0/2560 [00:00<?, ? examples/s]

Map:   0%|          | 0/640 [00:00<?, ? examples/s]

Map:   0%|          | 0/800 [00:00<?, ? examples/s]

In [None]:
#import the evaluation metrics - accuracy and f1

import evaluate
import numpy as np

accuracy_metric = evaluate.load("accuracy")
f1_metric = evaluate.load("f1")

def compute_metrics(eval_pred):

  logits, labels = eval_pred
  predictions = np.argmax(logits, axis=-1)
  f1 = f1_metric.compute(predictions=predictions, references=labels, average="macro")["f1"]
  accuracy = accuracy_metric.compute(predictions=predictions, references=labels)["accuracy"]

  return {"f1": f1, "accuracy": accuracy}

In [None]:
#import the data collator
from transformers import DataCollatorWithPadding

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

###BERT-large training



In [None]:
import torch 
torch.cuda.empty_cache()

In [None]:
#defining the training arguments for fine-tuning the bert-large model

training_args = TrainingArguments(
    output_dir="output1",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=5,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    # push_to_hub=True,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_data["train"],
    eval_dataset=tokenized_data["val"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()



Epoch,Training Loss,Validation Loss,F1,Accuracy
1,No log,0.83683,0.578418,0.617188
2,No log,0.797831,0.624725,0.664062
3,No log,0.930667,0.634441,0.659375
4,0.734900,1.160752,0.624729,0.65
5,0.734900,1.273363,0.635593,0.657813


TrainOutput(global_step=800, training_loss=0.5399257183074951, metrics={'train_runtime': 1101.3371, 'train_samples_per_second': 11.622, 'train_steps_per_second': 0.726, 'total_flos': 2982190438809600.0, 'train_loss': 0.5399257183074951, 'epoch': 5.0})

###BERT-large inference


In [None]:
# Load trained model
model_path = "output1/checkpoint-800"
model = AutoModelForSequenceClassification.from_pretrained(
    model_path, num_labels=3, id2label=id2label, label2id=label2id
).to("cuda")

# Define test trainer
test_trainer = Trainer(model) 
# Make prediction
raw_pred, labels, metrics = test_trainer.predict(tokenized_data["test"]) 
# Preprocess raw predictions
y_pred = np.argmax(raw_pred, axis=1)

In [None]:
#compute f1 score

test_f1 = f1_metric.compute(predictions=y_pred, references=labels, average="macro")["f1"]
test_f1

0.6821537241704321

In [None]:
#compute accuracy score

test_accuracy = accuracy_metric.compute(predictions=y_pred, references=labels)["accuracy"]
test_accuracy

0.68125