In [45]:
import numpy as np
import tensorflow as tf
from transformers import TFAutoModel #for bert model

In [46]:
#Loading data
with open('./data/preprocessed/features/X_train_ids.npy', 'rb') as f:
    X_train_ids = np.load(f, allow_pickle=True)
with open('./data/preprocessed/features/X_valid_ids.npy', 'rb') as f:
    X_valid_ids = np.load(f, allow_pickle=True)
with open('./data/preprocessed/features/X_test_ids.npy', 'rb') as f:
    X_test_ids = np.load(f, allow_pickle=True)

In [47]:
with open('./data/preprocessed/features/X_train_mask.npy', 'rb') as f:
    X_train_mask = np.load(f, allow_pickle=True)
with open('./data/preprocessed/features/X_valid_mask.npy', 'rb') as f:
    X_valid_mask = np.load(f, allow_pickle=True)
with open('./data/preprocessed/features/X_test_mask.npy', 'rb') as f:
    X_test_mask = np.load(f, allow_pickle=True)

In [48]:
with open('./data/preprocessed/labels/y_train.npy', 'rb') as f:
    y_train = np.load(f, allow_pickle=True)
with open('./data/preprocessed/labels/y_valid.npy', 'rb') as f:
    y_valid = np.load(f, allow_pickle=True)
with open('./data/preprocessed/labels/y_test.npy', 'rb') as f:
    y_test = np.load(f, allow_pickle=True)


In [49]:
#input pipelining

train_dataset = tf.data.Dataset.from_tensor_slices((X_train_ids, X_train_mask, y_train)) # Create a dataset from the data
train_dataset.take(1)


<TakeDataset element_spec=(TensorSpec(shape=(512,), dtype=tf.int64, name=None), TensorSpec(shape=(512,), dtype=tf.int64, name=None), TensorSpec(shape=(5,), dtype=tf.float64, name=None))>

In [50]:
#mapping features to labels
#{(ids, masks): labels} since the model will only take 2 params -> input and output
def map_features(input_ids, mask, label):
    return {
        "input_ids": input_ids, "attention_mask": mask, "label": label 
    }

In [51]:
train_dataset = train_dataset.map(map_features)

In [52]:
batch_size = 16
train_dataset = train_dataset.shuffle(10000).batch(batch_size, drop_remainder=True)
train_dataset = tf.data.Dataset.save(train_dataset,'train')

In [54]:
model = TFAutoModel.from_pretrained('bert-base-cased')

Downloading (…)"tf_model.h5";: 100%|██████████| 527M/527M [00:39<00:00, 13.4MB/s] 
Some layers from the model checkpoint at bert-base-cased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-cased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [57]:
#input layers
input_ids = tf.keras.layers.Input(shape=(512,), dtype=tf.int32, name="input_ids")
masks = tf.keras.layers.Input(shape=(512,), dtype=tf.int32, name="attention_mask")

In [59]:
#embbed the inputs using bert

embeddings = model.bert(input_ids, attention_mask=masks)[1]

#classifier head
x = tf.keras.layers.Dense(1024, activation='relu')(embeddings)
y = tf.keras.layers.Dense(5, activation='softmax', name='outputs')(x)

In [60]:
model = tf.keras.Model(inputs=[input_ids, masks], outputs=y)

In [61]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_ids (InputLayer)         [(None, 512)]        0           []                               
                                                                                                  
 attention_mask (InputLayer)    [(None, 512)]        0           []                               
                                                                                                  
 bert (TFBertMainLayer)         TFBaseModelOutputWi  108310272   ['input_ids[0][0]',              
                                thPoolingAndCrossAt               'attention_mask[0][0]']         
                                tentions(last_hidde                                               
                                n_state=(None, 512,                                           

In [62]:
model.layers[2].trainable = False

In [68]:
optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5, weight_decay=1e-6)
loss = tf.keras.losses.CategoricalCrossentropy()
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')

model.compile(optimizer=optimizer, loss=loss, metrics=[acc])

In [None]:
element_spec = 