In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('train.tsv', sep='\t')
df.head()

Unnamed: 0,PhraseId,SentenceId,Phrase,Sentiment
0,1,1,A series of escapades demonstrating the adage ...,1
1,2,1,A series of escapades demonstrating the adage ...,2
2,3,1,A series,2
3,4,1,A,2
4,5,1,series,2


In [3]:
df = df[['Phrase', 'Sentiment']].head(1000)
df.head()

Unnamed: 0,Phrase,Sentiment
0,A series of escapades demonstrating the adage ...,1
1,A series of escapades demonstrating the adage ...,2
2,A series,2
3,A,2
4,series,2


In [4]:
from transformers import BertTokenizer

In [5]:
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

In [6]:
def tokenize(sentence):
    tokens = tokenizer.encode_plus(sentence, max_length=512,
                                   truncation=True, padding='max_length',
                                   add_special_tokens=True, return_token_type_ids=False,
                                   return_tensors='tf')
    return tokens['input_ids'], tokens['attention_mask']

In [7]:
import numpy as np

In [8]:
Xids = np.zeros((len(df), 512))
Xmask = np.zeros((len(df), 512))

In [9]:
Xids.shape

(1000, 512)

In [10]:
Xids

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [11]:
Xmask

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [12]:
for i, sequence in enumerate(df['Phrase']):
    tokens = tokenize(sequence)
    Xids[i, :], Xmask[i, :] = tokens[0], tokens[1]

Metal device set to: Apple M1 Pro

systemMemory: 32.00 GB
maxCacheSize: 10.67 GB



2023-02-06 15:36:12.375449: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-02-06 15:36:12.375475: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [13]:
arr = df['Sentiment'].values

In [14]:
arr

array([1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
       3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 3, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 3, 2,
       4, 3, 2, 3, 3, 3, 2, 2, 4, 2, 3, 4, 2, 2, 2, 1, 2, 2, 2, 3, 2, 2,
       2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 0, 2, 0, 2, 1, 1, 1, 2, 2,
       1, 2, 2, 2, 2, 2, 3, 4, 4, 3, 3, 3, 3, 4, 2, 2, 2, 2, 2, 2, 2, 1,
       2, 3, 2, 1, 2, 1, 1, 2, 1, 1, 2, 2, 2, 1, 2, 2, 1, 2, 3, 3, 3, 1,
       2, 2, 1, 0, 2, 0, 1, 2, 1, 1, 2, 2, 4, 3, 2, 2, 3, 2, 4, 2, 3, 2,
       4, 3, 3, 3, 4, 2, 4, 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2,
       1, 2, 1, 0, 2, 1, 2, 2, 2, 1, 0, 1, 0, 1, 1, 3, 2, 3, 2, 3, 2, 2,
       3, 3, 2, 2, 3, 2, 3, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 2, 2, 1, 1, 0, 2, 1,
       0, 0, 2, 2, 2, 2, 2, 1, 2, 1, 1, 1, 1, 2, 2, 2, 4, 3, 2, 2, 2, 1,
       2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 1, 2, 3, 2,

In [15]:
labels = np.zeros((arr.size, arr.max()+1))

In [16]:
labels[np.arange(arr.size), arr] = 1

In [17]:
labels

array([[0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 1., 0., 0.],
       ...,
       [0., 0., 1., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 1., 0., 0., 0.]])

In [18]:
with open('movie-xids.npy', 'wb') as f:
    np.save(f, Xids)
with open('movie-xmask.npy', 'wb') as f:
    np.save(f, Xmask)
with open('movie-labels.npy', 'wb') as f:
    np.save(f, labels)

In [19]:
del df, Xids, Xmask, labels

In [20]:
import tensorflow as tf

In [21]:
with open('movie-xids.npy', 'rb') as f:
    Xids = np.load(f, allow_pickle=True)
with open('movie-xmask.npy', 'rb') as f:
    Xmask = np.load(f, allow_pickle=True)
with open('movie-labels.npy', 'rb') as f:
    labels = np.load(f, allow_pickle=True)

In [22]:
#gpu_devices = tf.config.experimental.list_physical_devices('GPU')
#for device in gpu_devices: tf.config.experimental.set_memory_growth(device, True)  # required to avoid GPU LSTM Internal Error
tf.config.experimental.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [23]:
tf.__version__

'2.11.0'

In [24]:
data = tf.data.Dataset.from_tensor_slices((Xids, Xmask, labels))  # [750000:850000]

In [25]:
SHUFFLE = 100000
BATCH_SIZE = 16

In [26]:
def map_func(input_ids, masks, labels):
    return {'input_ids': input_ids, 'attention_mask': masks}, labels

In [27]:
data = data.map(map_func)

In [28]:
data = data.shuffle(SHUFFLE).batch(BATCH_SIZE) #, drop_remainder=True)

In [29]:
SIZE = Xids.shape[0]/BATCH_SIZE
SIZE

62.5

In [30]:
SPLIT = 0.9

train = data.take(int(SIZE*SPLIT))
val = data.skip(int(SIZE*SPLIT))

del data

---

# Model Setup

In [31]:
from transformers import TFAutoModel

In [32]:
bert = TFAutoModel.from_pretrained('bert-base-cased')  #, output_hidden_states=False

Some layers from the model checkpoint at bert-base-cased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-cased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [33]:
bert.summary()

Model: "tf_bert_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bert (TFBertMainLayer)      multiple                  108310272 
                                                                 
Total params: 108,310,272
Trainable params: 108,310,272
Non-trainable params: 0
_________________________________________________________________


In [34]:
input_ids = tf.keras.layers.Input(shape=(512,), name='input_ids', dtype='int32')
mask = tf.keras.layers.Input(shape=(512,), name='attention_mask', dtype='int32')

embeddings = bert.bert(input_ids, attention_mask=mask)[0]  # we access the transformer model within our bert object using the bert attribute (eg bert.bert instead of bert)

x = tf.keras.layers.Dropout(0.1)(embeddings)
x = tf.keras.layers.GlobalMaxPool1D()(x)
y = tf.keras.layers.Dense(5, activation='softmax', name='outputs')(x)

model = tf.keras.Model(inputs=[input_ids, mask], outputs=y)

model.layers[2].trainable = False

In [35]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_ids (InputLayer)         [(None, 512)]        0           []                               
                                                                                                  
 attention_mask (InputLayer)    [(None, 512)]        0           []                               
                                                                                                  
 bert (TFBertMainLayer)         TFBaseModelOutputWi  108310272   ['input_ids[0][0]',              
                                thPoolingAndCrossAt               'attention_mask[0][0]']         
                                tentions(last_hidde                                               
                                n_state=(None, 512,                                           

In [36]:
optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=0.01)
loss = tf.keras.losses.CategoricalCrossentropy()
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')

model.compile(optimizer=optimizer, loss=loss, metrics=[acc])

In [37]:
# 800K
history = model.fit(
    train,
    validation_data=val,
    epochs=2)

Epoch 1/2


2023-02-06 15:36:23.841742: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2023-02-06 15:36:23.847799: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-02-06 15:37:19.557523: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/2


### Save

In [38]:
model.get_config()

{'name': 'model',
 'layers': [{'class_name': 'InputLayer',
   'config': {'batch_input_shape': (None, 512),
    'dtype': 'int32',
    'sparse': False,
    'ragged': False,
    'name': 'input_ids'},
   'name': 'input_ids',
   'inbound_nodes': []},
  {'class_name': 'InputLayer',
   'config': {'batch_input_shape': (None, 512),
    'dtype': 'int32',
    'sparse': False,
    'ragged': False,
    'name': 'attention_mask'},
   'name': 'attention_mask',
   'inbound_nodes': []},
  {'class_name': 'Custom>TFBertMainLayer',
   'config': {'name': 'bert',
    'trainable': False,
    'dtype': 'float32',
    'config': {'return_dict': True,
     'output_hidden_states': False,
     'output_attentions': False,
     'torchscript': False,
     'torch_dtype': None,
     'use_bfloat16': False,
     'tf_legacy_loss': False,
     'pruned_heads': {},
     'tie_word_embeddings': True,
     'is_encoder_decoder': False,
     'is_decoder': False,
     'cross_attention_hidden_size': None,
     'add_cross_attention': 

In [39]:
model.save('sentiment_model')



INFO:tensorflow:Assets written to: sentiment_model/assets


INFO:tensorflow:Assets written to: sentiment_model/assets


In [40]:
del model

---

### Load

In [41]:
model = tf.keras.models.load_model('sentiment_model')
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_ids (InputLayer)         [(None, 512)]        0           []                               
                                                                                                  
 attention_mask (InputLayer)    [(None, 512)]        0           []                               
                                                                                                  
 bert (TFBertMainLayer)         TFBaseModelOutputWi  108310272   ['input_ids[0][0]',              
                                thPoolingAndCrossAt               'attention_mask[0][0]']         
                                tentions(last_hidde                                               
                                n_state=(None, 512,                                           

### Test

In [42]:
loss, acc = model.evaluate(val)

2023-02-06 15:39:07.544156: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




In [43]:
model.predict(val.take(1))

2023-02-06 15:39:18.750185: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




array([[0.00098069, 0.02076018, 0.8793351 , 0.05680634, 0.04211769],
       [0.01247122, 0.22705382, 0.45784566, 0.16848308, 0.13414626],
       [0.00536518, 0.10245841, 0.49711308, 0.11541619, 0.27964717],
       [0.01064614, 0.60000044, 0.23348697, 0.08410457, 0.07176197],
       [0.00648862, 0.05065601, 0.17500797, 0.06153512, 0.70631236],
       [0.02665318, 0.13846502, 0.58556855, 0.11262773, 0.1366855 ],
       [0.01028407, 0.06969323, 0.7406626 , 0.06528687, 0.1140732 ],
       [0.01987173, 0.21409963, 0.5324512 , 0.14752373, 0.08605366],
       [0.01216913, 0.10916478, 0.676737  , 0.17119808, 0.03073106],
       [0.01213326, 0.07015457, 0.71528125, 0.0866842 , 0.11574672],
       [0.00427896, 0.05245286, 0.7721695 , 0.07136258, 0.09973622],
       [0.00569212, 0.044359  , 0.6804062 , 0.10001447, 0.16952825],
       [0.01512514, 0.32133347, 0.3653545 , 0.21935803, 0.07882892],
       [0.00525202, 0.02040598, 0.69365203, 0.07186995, 0.20882002],
       [0.00908552, 0.06923031, 0.

In [44]:
val.take(1)

<TakeDataset element_spec=({'input_ids': TensorSpec(shape=(None, 512), dtype=tf.float64, name=None), 'attention_mask': TensorSpec(shape=(None, 512), dtype=tf.float64, name=None)}, TensorSpec(shape=(None, 5), dtype=tf.float64, name=None))>