# Step-1: Install required Libraries

In [None]:
!pip install transformers datasets

Collecting datasets
  Downloading datasets-3.0.0-py3-none-any.whl.metadata (19 kB)
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Downloading datasets-3.0.0-py3-none-any.whl (474 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m474.3/474.3 kB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl (39.9 MB)
[2K 

In [None]:
!pip install datasets



# Step-2: Import required packages

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from datasets import Dataset

from transformers import TFBertForSequenceClassification,BertTokenizer, create_optimizer
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

# Step-3: Read the dataset

In [None]:
data=pd.read_csv('/content/drive/MyDrive/IMDB_Dataset_sample.csv')
data.head()

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive


# Step-4: Encoding the target column

In [None]:
data['sentiment']=data['sentiment'].map({'positive':1,'negative':0})

# Step-5: Convert the dataset type

In [None]:
dataset = Dataset.from_pandas(data)
dataset

Dataset({
    features: ['review', 'sentiment'],
    num_rows: 1114
})

- BERT model cannot take Pandas DataFrame as input. So we need to convert it into a type which the model can handle.
- The datasets library is created by the Hugging Face for this type of dataset conversions.
- **Dataset.from_pandas** method converts a Pandas DataFrame into a Hugging Face Dataset

# Step-6: Load the tokenizer

In [None]:
model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]



# Step-7: Tokenize the text

In [None]:
def tokenize_function(example):
    return tokenizer(example["review"], padding="max_length", truncation=True)

In [None]:
tokenized_datasets = dataset.map(tokenize_function, batched=True)

Map:   0%|          | 0/1114 [00:00<?, ? examples/s]

In [None]:
tokenized_datasets

Dataset({
    features: ['review', 'sentiment', 'input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 1114
})

# Step-8: Split the data

In [None]:
split_datasets = tokenized_datasets.train_test_split(test_size=0.2)

In [None]:
split_datasets

DatasetDict({
    train: Dataset({
        features: ['review', 'sentiment', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 891
    })
    test: Dataset({
        features: ['review', 'sentiment', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 223
    })
})

In [None]:
split_datasets['train']

Dataset({
    features: ['review', 'sentiment', 'input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 891
})

In [None]:
split_datasets['train'].features

{'review': Value(dtype='string', id=None),
 'sentiment': Value(dtype='int64', id=None),
 'input_ids': Sequence(feature=Value(dtype='int32', id=None), length=-1, id=None),
 'token_type_ids': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None),
 'attention_mask': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None)}

# Step-9: Convert the data into tensor

In [None]:
train_dataset = split_datasets["train"].to_tf_dataset(columns=["input_ids", "attention_mask"],
                                                      label_cols=["sentiment"],
                                                      shuffle=True,
                                                      batch_size=16)

Old behaviour: columns=['a'], labels=['labels'] -> (tf.Tensor, tf.Tensor)  
             : columns='a', labels='labels' -> (tf.Tensor, tf.Tensor)  
New behaviour: columns=['a'],labels=['labels'] -> ({'a': tf.Tensor}, {'labels': tf.Tensor})  
             : columns='a', labels='labels' -> (tf.Tensor, tf.Tensor) 


In [None]:
train_dataset    # We have to iterate through it to see its content

<_PrefetchDataset element_spec=({'input_ids': TensorSpec(shape=(None, 512), dtype=tf.int64, name=None), 'attention_mask': TensorSpec(shape=(None, 512), dtype=tf.int64, name=None)}, TensorSpec(shape=(None,), dtype=tf.int64, name=None))>

In [None]:
len(train_dataset)

56

In [None]:
print(train_dataset)

<_PrefetchDataset element_spec=({'input_ids': TensorSpec(shape=(None, 512), dtype=tf.int64, name=None), 'attention_mask': TensorSpec(shape=(None, 512), dtype=tf.int64, name=None)}, TensorSpec(shape=(None,), dtype=tf.int64, name=None))>


In [None]:
train_dataset.take(1)  # takes only the 1st value from dataset

<_TakeDataset element_spec=({'input_ids': TensorSpec(shape=(None, 512), dtype=tf.int64, name=None), 'attention_mask': TensorSpec(shape=(None, 512), dtype=tf.int64, name=None)}, TensorSpec(shape=(None,), dtype=tf.int64, name=None))>

In [None]:
# Prints all the
count=0
for i in train_dataset:
    print(i)
    count+=1
print(f"No of Batches = {count}")

({'input_ids': <tf.Tensor: shape=(16, 512), dtype=int64, numpy=
array([[  101,  2031,  2017, ...,     0,     0,     0],
       [  101,  1996,  2801, ...,     0,     0,     0],
       [  101,  1045,  2018, ...,     0,     0,     0],
       ...,
       [  101,  1045,  1005, ...,     0,     0,     0],
       [  101,  1996, 11552, ...,  2115,  2173,   102],
       [  101,  2066,  3087, ...,  2063,  1012,   102]])>, 'attention_mask': <tf.Tensor: shape=(16, 512), dtype=int64, numpy=
array([[1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       ...,
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 1, 1, 1],
       [1, 1, 1, ..., 1, 1, 1]])>}, <tf.Tensor: shape=(16,), dtype=int64, numpy=array([0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0])>)
({'input_ids': <tf.Tensor: shape=(16, 512), dtype=int64, numpy=
array([[ 101, 1045, 2572, ...,    0,    0,    0],
       [ 101, 2517, 2011, ...,    0,    0,    0],
       [ 101, 2023, 2003, ..., 1013, 1028,  

In [None]:
for batch in train_dataset.take(1):
    print(batch)
    print(len(batch))   # 1st one is i/p and next one is o/p
                        # in i/p input_ids,attention mask are returned

({'input_ids': <tf.Tensor: shape=(16, 512), dtype=int64, numpy=
array([[  101,  2004,  1996, ...,     0,     0,     0],
       [  101,  2009,  2003, ...,     0,     0,     0],
       [  101,  1045,  2018, ...,     0,     0,     0],
       ...,
       [  101,  2023,  2001, ...,     0,     0,     0],
       [  101, 15540,  1997, ...,     0,     0,     0],
       [  101,  2588, 10523, ...,     0,     0,     0]])>, 'attention_mask': <tf.Tensor: shape=(16, 512), dtype=int64, numpy=
array([[1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       ...,
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0]])>}, <tf.Tensor: shape=(16,), dtype=int64, numpy=array([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0])>)
2


In [None]:
eval_dataset = split_datasets["test"].to_tf_dataset(columns=["input_ids", "attention_mask"],
                                                    label_cols=["sentiment"],
                                                    shuffle=False,
                                                    batch_size=16)

- The shuffle argument determines whether the dataset will be shuffled or not before batching.
- If we shuffle, the order of the evaluation dataset will not match with that of the testdataset. This will lead in wrong measurement in the classification metrics.

In [None]:
num_train_batches = train_dataset.cardinality().numpy()
num_eval_batches = eval_dataset.cardinality().numpy()
print(f"Number of training batches: {num_train_batches}")
print(f"Number of evaluation batches: {num_eval_batches}")

Number of training batches: 56
Number of evaluation batches: 14


# Step-10: Load the BERT model

In [None]:
model = TFBertForSequenceClassification.from_pretrained(model_name, num_labels=2)
model

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


<transformers.models.bert.modeling_tf_bert.TFBertForSequenceClassification at 0x7be319841450>

In [None]:
model.summary()

Model: "tf_bert_for_sequence_classification"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bert (TFBertMainLayer)      multiple                  109482240 
                                                                 
 dropout_74 (Dropout)        multiple                  0 (unused)
                                                                 
 classifier (Dense)          multiple                  1538      
                                                                 
Total params: 109483778 (417.65 MB)
Trainable params: 109483778 (417.65 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


1. bert (TFBertMainLayer):
  - Output Shape: multiple (It indicates that this layer produces multiple outputs, typically a sequence of hidden states or pooled output).
  - The 109.48 million parameters represent the weights of the BERT layers and their self-attention mechanisms.
  - Key Tasks:
    - Processing the input tokenized text (input IDs, attention masks).
    - Generating contextualized word embeddings based on the input text, which the classifier can use to make predictions.

2. dropout_37 (Dropout Layer):
  - Output Shape: multiple (Usually the same shape as the input shape)
  - Parameters: 0 (Dropout layers don't have trainable parameters)
    -  Dropout is a regularization technique used to prevent overfitting. During training, it randomly drops some of the units (by setting them to zero) in the input to the next layer, helping the model generalize better.

3. classifier (Dense Layer):
  - Output Shape: multiple (Output depends on the number of classes for classification, usually 2 for binary classification)
  - Parameters: 1,538
    - Weights: Connecting the BERT output (usually 768-dimensional for BERT-base) to the number of output classes.
    - Biases: One bias term per output class.
    - 768*2+2=1538 params
- Trainable Parameters: 109,483,778 (These are the parameters that will be updated during training).
- Non-trainable Parameters: 0 (There are no frozen layers, meaning all layers are trainable).


# Step-11: Creating an Optimizer

In [None]:
num_epochs = 2

steps_per_epoch = len(train_dataset)                                    # 56 steps per epoch(each batch will be sent one after the other)
num_train_steps = steps_per_epoch * num_epochs                          # 56*2 no. of train steps

optimizer, schedule = create_optimizer(init_lr=2e-5,                    # ADAM
                                       num_warmup_steps=0,
                                       num_train_steps=num_train_steps)

-  This code creates an optimizer (used for updating model parameters) and a learning rate scheduler (used to control the learning rate during training).
1. init_lr: This is the initial learning rate for the optimizer, set to 2e-5 (which equals 0.00002)
2. Warmup Steps: In some training schedules, the learning rate starts small and gradually increases over the first few steps.
  - After the warmup period, the learning rate typically decays over time.
  - **num_warmup_steps=0** means that no warmup is being used; the learning rate will start at the initial value (2e-5) and decay directly.

# Step-12: Creating a loss function

In [None]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metrics = [tf.keras.metrics.SparseCategoricalAccuracy()]

1. tf.keras.losses.SparseCategoricalCrossentropy: This is a commonly used loss function for multi-class classification tasks where the labels are integers (instead of one-hot encoded vectors).
  - Sparse: This term refers to the fact that the labels are integers
  - Categorical Crossentropy: This is the standard cross-entropy loss used for multi-class classification. It measures the difference between the predicted class probabilities (logits or softmax output) and the true labels.
  - This loss function penalizes the model for making predictions that are far from the true label, and it is minimized during training to improve model accuracy.
  - **from_logits=True**: This means that the model's output is raw logits, not probabilities. A logit is the raw output from the model before applying any activation function like softmax(logits converts into probabilities after softmax activation func).
2. tf.keras.metrics.SparseCategoricalAccuracy(): This metric is used to evaluate the accuracy of the model's predictions, particularly for multi-class classification tasks with integer labels.
  - This metric calculates the percentage of correct predictions made by the model. It compares the index of the highest predicted probability to the true label

# Step-13: Compile the model

In [None]:
model.compile(optimizer=optimizer, loss=loss_fn, metrics=metrics)

# Step-14: train the model

In [None]:
history=model.fit(train_dataset,
                  validation_data=eval_dataset,
                  epochs=num_epochs,
                  verbose=True)

Epoch 1/2


- **validation_data** is a dataset that the model evaluates at the end of each epoch during training, but it is not used for updating the model's weights.
- It is used to monitor the model's performance on data not used for training, which helps detect overfitting and assess how well the model generalizes to unseen data.

# Save the model

In [None]:
model.save('/content/drive/MyDrive/checkpoints/my_bert_model')

# Load the model

In [None]:
loaded_model = tf.keras.models.load_model('/content/drive/MyDrive/checkpoints/my_bert_model')

# Step-15: Model Evaluation

In [None]:
eval_results = model.evaluate(eval_dataset)
#print(f"Evaluation results: {eval_results}")

1. model.evaluate(eval_dataset):
  - This method is used to evaluate the model on a dataset that it has not seen during training, typically the validation or test dataset.
  - It returns the loss and any additional metrics (like accuracy) that were specified when compiling the model.
  - During evaluation, the model runs in inference mode, meaning it processes the data and computes the loss and metrics, but no weight updates are made, and no training occurs.
  - If the dataset is batched, it processes each batch and averages the loss and metric values across all the batches.

2. eval_results:
  - This variable stores the evaluation results. Depending on the model's configuration, it usually includes:
    - Loss: The first value returned is the loss.(SparseCategoricalCrossentropy)
    - Metrics: The subsequent values are the metrics specified when compiling the model.(SparseCategoricalAccuracy)

# Step-16: Model Prediction

In [None]:
predictions = model.predict(eval_dataset)
logits = predictions['logits']
pred_labels = tf.argmax(logits, axis=-1).numpy()

1. The output of model.predict() contains the predictions made by the model for each sample in the dataset.
2. predictions['logits']:
  - This line extracts the logits from the predictions.
3. tf.argmax(logits, axis=-1):
  - tf.argmax(): This function returns the index of the maximum value along a specified axis.
  - In this case, axis=-1 means it is selecting the index of the largest value along the last axis (which is the class dimension in the logits). This corresponds to the class with the highest predicted score.
4. .numpy(): This converts the TensorFlow tensor output into a NumPy array for easier manipulation and inspection.
5. The 'logits' are passed as an argument to the function tf.argmax(), which looks for the index (class) with the maximum value along the specified axis.

# Step-17: Classification metrics

In [None]:
true_labels = [y for x, y in eval_dataset]  # Extract true labels from the dataset (here it is stored in batches format(14 batches))

In [None]:
len(pred_labels)
pred_labels

array([0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0,
       1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0,
       0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1,
       0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0,
       0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1,
       1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0,
       0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1,
       1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1,
       0, 0, 0])

In [None]:
true_labels

[<tf.Tensor: shape=(16,), dtype=int64, numpy=array([0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0])>,
 <tf.Tensor: shape=(16,), dtype=int64, numpy=array([1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1])>,
 <tf.Tensor: shape=(16,), dtype=int64, numpy=array([0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1])>,
 <tf.Tensor: shape=(16,), dtype=int64, numpy=array([0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0])>,
 <tf.Tensor: shape=(16,), dtype=int64, numpy=array([0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1])>,
 <tf.Tensor: shape=(16,), dtype=int64, numpy=array([1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0])>,
 <tf.Tensor: shape=(16,), dtype=int64, numpy=array([0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0])>,
 <tf.Tensor: shape=(16,), dtype=int64, numpy=array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1])>,
 <tf.Tensor: shape=(16,), dtype=int64, numpy=array([1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1])>,
 <tf.Tensor: shape=(16,), dtype=int64, numpy=array([0, 1, 0, 1, 1, 1, 0, 

In [None]:
len(true_labels)

14

In [None]:
true_labels[0].numpy().tolist()

[0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0]

In [None]:
len(true_labels[0].numpy().tolist())

16

In [None]:
true_labels[0].numpy().tolist()[0]

0

In [None]:
# converting batched tensor format to list format
true_label_list=[]
for i in range(14):
  for j in range(len(true_labels[i].numpy().tolist())):
    true_label_list.append(true_labels[i].numpy().tolist()[j])

print(true_label_list)

[0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0]


In [None]:
len(true_label_list)

223

In [None]:
# Compute metrics
precision, recall, f1, _ = precision_recall_fscore_support(true_label_list, pred_labels, average='binary')
accuracy = accuracy_score(true_label_list, pred_labels)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")


Accuracy: 0.9506726457399103
Precision: 0.9322033898305084
Recall: 0.9734513274336283
F1 Score: 0.9523809523809523


# Testing on new data

In [None]:
from transformers import BertTokenizer                          # import required libraries
import numpy as np

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')  # import tokenizer

test_sample = "I love this product! It's amazing."              # give a sample

inputs = tokenizer(test_sample, return_tensors='tf', padding='max_length', truncation=True, max_length=128) # tokenize the i/p(store in tensor form)

print("Tokenized Inputs:", inputs)

Tokenized Inputs: {'input_ids': <tf.Tensor: shape=(1, 128), dtype=int32, numpy=
array([[ 101, 1045, 2293, 2023, 4031,  999, 2009, 1005, 1055, 6429, 1012,
         102,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0]], dtype=int32)>, 'token_type_ids': <tf.Tensor: shape=(1,



In [None]:
predictions = model(inputs)          # predictions

logits = predictions.logits          # get logits
predicted_class = np.argmax(logits.numpy(), axis=-1)        # find the class with max value of logits

predicted_class = "Stays" if predicted_class == 1 else "Leaves"

print("Predicted Class:", predicted_class)                   # get the output