In [None]:
pip install transformers

In [None]:
pip install datasets

In [6]:
import numpy as np
import pandas as pd

from transformers import AutoTokenizer, DataCollatorWithPadding, TFAutoModelForSequenceClassification
from datasets import load_dataset, load_metric

from tensorflow.keras.optimizers.schedules import PolynomialDecay
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy

In [7]:
checkpoint = 'bert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
raw_datasets = load_dataset("glue", "mrpc")

def tokenize_function(example):
    return tokenizer(example["sentence1"], example["sentence2"], truncation=True)

tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="tf")

Reusing dataset glue (/root/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
batch_sizes = []
accuracy_score = []
f1_score = []

for i in np.arange(3, 7):

    batch_s = pow(2, i)

    tf_train_dataset = tokenized_datasets["train"].to_tf_dataset(
        columns = ["attention_mask", "input_ids", "token_type_ids"],
        label_cols = ["labels"],
        shuffle = True,
        collate_fn = data_collator,
        batch_size = batch_s)

    tf_validation_dataset = tokenized_datasets["validation"].to_tf_dataset(
        columns = ["attention_mask", "input_ids", "token_type_ids"],
        label_cols = ["labels"],
        shuffle = False,
        collate_fn = data_collator,
        batch_size = batch_s)
    
    
    # Model
    model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels = 2)


    # Creating new optimizer
    num_epochs = 3
    num_train_steps = len(tf_train_dataset) * num_epochs

    lr_scheduler = PolynomialDecay(initial_learning_rate = 5e-5, end_learning_rate = 0.0, decay_steps = num_train_steps)

    opt = Adam(learning_rate = lr_scheduler)


    # Compile and Fit
    model.compile(optimizer = opt, loss = SparseCategoricalCrossentropy(from_logits=True), metrics = ["accuracy"])

    model.fit(tf_train_dataset, validation_data = tf_validation_dataset, epochs = num_epochs)


    # Predictions
    preds = model.predict(tf_validation_dataset)["logits"]

    class_preds = np.argmax(preds, axis=1)

    metric = load_metric("glue", "mrpc")
    m = metric.compute(predictions = class_preds, references = raw_datasets["validation"]["label"])

    batch_sizes.append(batch_s)
    accuracy_score.append(m['accuracy'])
    f1_score.append(m['f1'])

All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3
Epoch 2/3
Epoch 3/3


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3
Epoch 2/3
Epoch 3/3


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3
Epoch 2/3
Epoch 3/3


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3
Epoch 2/3
Epoch 3/3


In [12]:
batch_size_impact = pd.DataFrame()
batch_size_impact['Batch size'] = batch_sizes
batch_size_impact['Accuracy'] = accuracy_score
batch_size_impact['F1'] = f1_score

In [13]:
batch_size_impact

Unnamed: 0,Batch size,Accuracy,F1
0,8,0.828431,0.880952
1,16,0.855392,0.897391
2,32,0.867647,0.906897
3,64,0.852941,0.895105
