In [None]:
pip install transformers

In [None]:
pip install datasets

In [3]:
import numpy as np
import pandas as pd

from transformers import AutoTokenizer, DataCollatorWithPadding, TFAutoModelForSequenceClassification
from datasets import load_dataset, load_metric

from tensorflow.keras.optimizers.schedules import PolynomialDecay
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy, CategoricalCrossentropy, SparseCategoricalCrossentropy

In [6]:
checkpoint = 'bert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
raw_datasets = load_dataset("glue", "mrpc")

def tokenize_function(example):
    return tokenizer(example["sentence1"], example["sentence2"], truncation=True)

tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="tf")

Reusing dataset glue (/root/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

In [10]:
batch_sizes = []
learning_rate = []
accuracy_score = []
f1_score = []

for bs in np.arange(3, 7):

    for lr in [1e-3, 1e-4, 1e-5, 1e-6]:

        batch_s = pow(2, bs)

        tf_train_dataset = tokenized_datasets["train"].to_tf_dataset(
            columns = ["attention_mask", "input_ids", "token_type_ids"],
            label_cols = ["labels"],
            shuffle = True,
            collate_fn = data_collator,
            batch_size = batch_s)

        tf_validation_dataset = tokenized_datasets["validation"].to_tf_dataset(
            columns = ["attention_mask", "input_ids", "token_type_ids"],
            label_cols = ["labels"],
            shuffle = False,
            collate_fn = data_collator,
            batch_size = batch_s)
        
        
        # Model
        model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels = 2)


        # Compile and Fit
        model.compile(optimizer = Adam(learning_rate = lr), loss = SparseCategoricalCrossentropy(from_logits=True), metrics = ['accuracy'])

        model.fit(tf_train_dataset, validation_data = tf_validation_dataset, epochs = 2)


        # Predictions
        preds = model.predict(tf_validation_dataset)["logits"]

        class_preds = np.argmax(preds, axis=1)

        metric = load_metric("glue", "mrpc")
        m = metric.compute(predictions = class_preds, references = raw_datasets["validation"]["label"])

    
        batch_sizes.append(batch_s)
        learning_rate.append(lr)
        accuracy_score.append(m['accuracy'])
        f1_score.append(m['f1'])

All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/2
Epoch 2/2


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/2
Epoch 2/2


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/2
Epoch 2/2


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/2
Epoch 2/2


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/2
Epoch 2/2


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/2
Epoch 2/2


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/2
Epoch 2/2


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/2
Epoch 2/2


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/2
Epoch 2/2


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/2
Epoch 2/2


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/2
Epoch 2/2


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/2
Epoch 2/2


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/2
Epoch 2/2


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/2
Epoch 2/2


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/2
Epoch 2/2


All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/2
Epoch 2/2


In [11]:
parameters_impact = pd.DataFrame()

parameters_impact['Batch size'] = batch_sizes
parameters_impact['Learning Rate'] = learning_rate
parameters_impact['Accuracy'] = accuracy_score
parameters_impact['F1'] = f1_score

In [12]:
parameters_impact

Unnamed: 0,Batch size,Learning Rate,Accuracy,F1
0,8,0.001,0.683824,0.812227
1,8,0.0001,0.683824,0.812227
2,8,1e-05,0.833333,0.888525
3,8,1e-06,0.718137,0.828614
4,16,0.001,0.683824,0.812227
5,16,0.0001,0.683824,0.812227
6,16,1e-05,0.848039,0.89527
7,16,1e-06,0.703431,0.821797
8,32,0.001,0.683824,0.812227
9,32,0.0001,0.857843,0.9


In [17]:
parameters_impact.sort_values(by=['Accuracy'], ascending=False).iloc[:,:3]

Unnamed: 0,Batch size,Learning Rate,Accuracy
9,32,0.0001,0.857843
13,64,0.0001,0.855392
6,16,1e-05,0.848039
2,8,1e-05,0.833333
10,32,1e-05,0.818627
14,64,1e-05,0.757353
3,8,1e-06,0.718137
7,16,1e-06,0.703431
0,8,0.001,0.683824
1,8,0.0001,0.683824


**We can get best accuracy at batch_size = 32 and learning_rate range = 0.0001.**