In [1]:
import tensorflow as tf
from transformers import TFAutoModel

In [2]:
model = TFAutoModel.from_pretrained('bert-base-cased')

Some layers from the model checkpoint at bert-base-cased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-cased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [3]:
model.summary()

Model: "tf_bert_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bert (TFBertMainLayer)       multiple                  108310272 
Total params: 108,310,272
Trainable params: 108,310,272
Non-trainable params: 0
_________________________________________________________________


In [4]:
#2 input layers 1 for ids and the other for mask

input_ids = tf.keras.layers.Input(shape = (512,), name = 'input_ids', dtype='int32')
mask = tf.keras.layers.Input(shape = (512,), name = 'attention_masks', dtype = 'int32')

#transformer
embeddings = model.bert(input_ids, attention_mask = mask)[1]

#Classifier head
x = tf.keras.layers.Dense(1024,activation = 'relu')(embeddings)
y = tf.keras.layers.Dense(5,activation = 'softmax', name = 'outputs')(embeddings)



In [5]:
model = tf.keras.Model(inputs = [input_ids, mask], outputs = y)

In [6]:
model.layers[2].trainable=False

In [7]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_ids (InputLayer)          [(None, 512)]        0                                            
__________________________________________________________________________________________________
attention_masks (InputLayer)    [(None, 512)]        0                                            
__________________________________________________________________________________________________
bert (TFBertMainLayer)          TFBaseModelOutputWit 108310272   input_ids[0][0]                  
                                                                 attention_masks[0][0]            
__________________________________________________________________________________________________
outputs (Dense)                 (None, 5)            3845        bert[0][1]                   

In [8]:
optimizer = tf.keras.optimizers.Adam(learning_rate = 0.0001, decay = 0.00001)
loss = tf.keras.losses.CategoricalCrossentropy()
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')

model.compile(optimizer=optimizer, loss=loss, metrics=[acc])

In [9]:
element_spec = ({'input_ids': tf.TensorSpec(shape=(1, 512), dtype=tf.int32, name=None),
  'attention_masks': tf.TensorSpec(shape=(1, 512), dtype=tf.int32, name=None)},
 tf.TensorSpec(shape=(1, 5), dtype=tf.float64, name=None))

In [10]:
train_ds = tf.data.experimental.load('train', element_spec)
val_ds = tf.data.experimental.load('val', element_spec)

In [11]:
train_ds

<_LoadDataset shapes: ({input_ids: (1, 512), attention_masks: (1, 512)}, (1, 5)), types: ({input_ids: tf.int32, attention_masks: tf.int32}, tf.float64)>

In [None]:
history = model.fit(
    train_ds,
    validation_data = val_ds,
    epochs=3
)

Epoch 1/3
   868/140454 [..............................] - ETA: 17:23:23 - loss: 1.2527 - accuracy: 0.5392

In [12]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]