In [1]:
# IN this notebook we are going to load and finetune it

In [4]:
import tensorflow as tf
from transformers import TFAutoModelForSequenceClassification
from datasets import load_dataset

In [5]:
raw_dataset = load_dataset("glue","mrpc")
raw_dataset

DatasetDict({
    train: Dataset({
        features: ['sentence1', 'sentence2', 'label', 'idx'],
        num_rows: 3668
    })
    validation: Dataset({
        features: ['sentence1', 'sentence2', 'label', 'idx'],
        num_rows: 408
    })
    test: Dataset({
        features: ['sentence1', 'sentence2', 'label', 'idx'],
        num_rows: 1725
    })
})

In [2]:
checkpoint = "bert-base-cased" #picking a model
#loading the model and weights
model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint,num_labels=2)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
#compile and add an optimizer
model.compile(optimizer='adam',loss=loss)

model.safetensors:  17%|#6        | 73.4M/436M [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
train_dataset=raw_dataset['train']
vali_dataset=raw_dataset['validation']

In [8]:
#training the inputs
model.fit(train_dataset,
          validation_data=vali_dataset,
          epochs=3
)

ValueError: Failed to find data adapter that can handle input: <class 'datasets.arrow_dataset.Dataset'>, <class 'NoneType'>

# full code for finetuning 

In [1]:
from datasets import load_dataset
from transformers import AutoTokenizer

# Load the dataset
raw_dataset = load_dataset('glue', 'mrpc')

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')  # or your preferred model

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(
        examples['sentence1'], 
        examples['sentence2'], 
        padding='max_length', 
        truncation=True,
        return_tensors='tf'
    )

# Apply tokenization to train and validation datasets
tokenized_datasets = raw_dataset.map(tokenize_function, batched=True)

# Remove the original sentence columns and keep the tokenized inputs and labels
tokenized_datasets = tokenized_datasets.remove_columns(['sentence1', 'sentence2', 'idx'])
tokenized_datasets.set_format('tensorflow')


Map:   0%|          | 0/408 [00:00<?, ? examples/s]

In [2]:
# Convert to tensorflow dataset
# Convert the tokenized dataset to TensorFlow Dataset
train_dataset_tf = tokenized_datasets['train'].to_tf_dataset(
    columns=['input_ids', 'attention_mask'],  # Model input features
    label_cols=['label'],                     # Target column
    shuffle=True,
    batch_size=8
)

vali_dataset_tf = tokenized_datasets['validation'].to_tf_dataset(
    columns=['input_ids', 'attention_mask'], 
    label_cols=['label'], 
    shuffle=False,
    batch_size=8
)


Old behaviour: columns=['a'], labels=['labels'] -> (tf.Tensor, tf.Tensor)  
             : columns='a', labels='labels' -> (tf.Tensor, tf.Tensor)  
New behaviour: columns=['a'],labels=['labels'] -> ({'a': tf.Tensor}, {'labels': tf.Tensor})  
             : columns='a', labels='labels' -> (tf.Tensor, tf.Tensor) 


In [None]:
# Train the model
# Assuming 'model' is your pre-trained model
model.fit(train_dataset_tf, validation_data=vali_dataset_tf, epochs=3)


Epoch 1/3
  1/459 [..............................] - ETA: 13:10:59 - loss: 0.6836

# Learining rate scheduing

In [None]:
# this will helps us to train or fine-tune in better way 

In [3]:
#larning rate schedulingal
from tensorflow.keras.optimizers.schedules import PolynomialDecay

In [None]:
num_epochs = 3
num_train_steps = len(train_dataset_tf) * num_epochs
lr_schedular = PolynimialDecay(
    initial_learning_rate = 5e-5,end_learning_rate=0.0,
    decay_steps = num_train_steps
)

In [11]:

# now for use this in efficient way for training we need to pass it with Adam
import tensorflow as tf
from tensorflow.keras.optimizers import Adam

In [None]:
opt = Adam(learning_rate=lr_schedualr) #optimizer
model.complie(loss=loss,optimizer=opt) #compiling with new learning rate schedualr

In [None]:
model.fit(train_dataset_tf,epochs=3)

# Tensorlow Predictions and Metrics

In [None]:
preds = model.predict(tokenized_datasets['validation'])['logits'] # passing tokenized text in this method for prediction
probabilities = tf.nn.softmax(preds)
class_preds = np.argmax(probabilities,axis=1)

In [14]:
#compute the GLUE Metrics
from datasets import load_metric

In [15]:
metric = load_dataset("glue","mrpc")

In [None]:
metric.compute(predictions=class_preds,references=validation_labels)

In [None]:
#native keras metrics way
model.compile(loss=loss,opt=opt,metrics=['accuracy']