In [1]:
import tensorflow as tf

In [3]:
from datasets import load_dataset

dataset = load_dataset('financial_phrasebank', 'sentences_allagree')

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
test_dataset = dataset["train"]

In [5]:
test_dataset[0]

{'sentence': 'According to Gran , the company has no plans to move all production to Russia , although that is where the company is growing .',
 'label': 1}

In [6]:
train_dataset = load_dataset('financial_phrasebank', 'sentences_50agree')

In [7]:
train_dataset = train_dataset["train"]

In [8]:
train_dataset[10]

{'sentence': "TeliaSonera TLSN said the offer is in line with its strategy to increase its ownership in core business holdings and would strengthen Eesti Telekom 's offering to its customers .",
 'label': 2}

In [2]:
label_desc = {
    0:'negative',
    1:'neutral',
    2:'positive'
}

In [3]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam
from transformers import TFBertModel

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from transformers import BertTokenizer

In [5]:
from tensorflow.keras.losses import SparseCategoricalCrossentropy

In [6]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [11]:
bert_model = TFBertModel.from_pretrained('bert-base-uncased')

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

In [12]:
bert_model.trainable = False

## Model Arch for fine tuning

In [13]:
input_ids = Input(shape = (400, ), dtype = tf.int32, name = 'input_ids')
attention_mask = Input(shape = (400,), dtype = tf.int32, name = 'attention_mask')

### Embedding layer
It will take out input and make the embeddings for us to feed it to BERT.

In [16]:
embeddings = bert_model(input_ids, attention_mask = attention_mask)[0]

In [17]:
x = tf.keras.layers.GlobalAveragePooling1D()(embeddings)
x = Dense(64, activation='relu')(x)
output = Dense(3, activation='softmax')(x)

In [18]:
model = Model(inputs = [input_ids, attention_mask], outputs = output)

In [19]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_ids (InputLayer)         [(None, 400)]        0           []                               
                                                                                                  
 attention_mask (InputLayer)    [(None, 400)]        0           []                               
                                                                                                  
 tf_bert_model (TFBertModel)    TFBaseModelOutputWi  109482240   ['input_ids[0][0]',              
                                thPoolingAndCrossAt               'attention_mask[0][0]']         
                                tentions(last_hidde                                               
                                n_state=(None, 400,                                           

In [20]:
model.compile(optimizer='adam', loss= SparseCategoricalCrossentropy(), metrics=['accuracy'])

## Here we are going to split out test_dataset into validation and test dataset.

In [24]:
## Converting train_dataset into tensorflow dataset
train_encoded_sentences = tokenizer(train_dataset["sentence"], truncation = True, max_length = 400, padding = "max_length", return_tensors='tf')
training_dataset = tf.data.Dataset.from_tensor_slices((
    dict(
        input_ids = train_encoded_sentences['input_ids'],
        attention_mask = train_encoded_sentences['attention_mask']
    ),
    train_dataset["label"]
))

In [25]:
## Converting test_dataset into tensorflow dataset
test_encoded_sentences = tokenizer(test_dataset["sentence"], truncation = True, max_length = 400, padding = "max_length", return_tensors='tf')
test_dataset = tf.data.Dataset.from_tensor_slices((
    dict(
        input_ids = test_encoded_sentences['input_ids'],
        attention_mask = test_encoded_sentences['attention_mask']
    ),
    test_dataset["label"]
))

In [26]:
length_of_test_dataset = len(test_dataset)

valid_size = int(0.45 * length_of_test_dataset)

test_dataset = test_dataset.shuffle(buffer_size = length_of_test_dataset)
validation_dataset = test_dataset.take(valid_size)

test_dataset = test_dataset.skip(valid_size)


In [27]:
print(len(training_dataset),len(validation_dataset),len(test_dataset))


4846 1018 1246


In [28]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5, 
    verbose=1, 
    restore_best_weights=True 
)

model_checkpoint = ModelCheckpoint(
    filepath='best_model.h5', 
    monitor='val_loss', 
    save_best_only=True,  
    verbose=1
)


In [51]:
history = model.fit(
    training_dataset.batch(32),  
    epochs=5,  
    validation_data=validation_dataset.batch(32),
    callbacks=[early_stopping, model_checkpoint]
)

Epoch 1/5
Epoch 1: val_loss did not improve from 0.00000
Epoch 2/5
Epoch 2: val_loss did not improve from 0.00000
Epoch 3/5
Epoch 3: val_loss did not improve from 0.00000
Epoch 4/5
Epoch 4: val_loss did not improve from 0.00000
Epoch 5/5
Epoch 5: val_loss did not improve from 0.00000


In [22]:
encoding = tokenizer.encode_plus("According to Gran , the company has no plans to move all production to Russia , although that is where the company is growing .", max_length=400, truncation=True, padding='max_length', return_tensors='tf')
# predictions = model.predict({'input_ids': encoding['input_ids'], 'attention_mask': encoding['attention_mask']})

In [62]:
max(predictions[0])

0.71160805

In [63]:
model.save('fine_tuned_model')



INFO:tensorflow:Assets written to: fine_tuned_model\assets


INFO:tensorflow:Assets written to: fine_tuned_model\assets


In [21]:
loaded_model = model.load_weights('fine_tuned_model')

In [24]:
model2 = tf.saved_model.load('fine_tuned_model')

In [26]:
prediction_func = model2.signatures['serving_default']

In [28]:
res = prediction_func(attention_mask = encoding["attention_mask"], input_ids = encoding["input_ids"])

In [31]:
res

{'dense_9': <tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[0.71160805, 0.2508122 , 0.03757977]], dtype=float32)>}

In [40]:
res_arr = res["dense_9"].numpy()

In [43]:
import numpy as np

In [44]:
np.argmax(res_arr)

0

In [53]:
encoding = tokenizer.encode_plus("Viking Line 's cargo revenue increased by 5.4 % to EUR 21.46 mn , and cargo volume increased by 2.4 % to 70,116 cargo units .", max_length=400, truncation=True, padding='max_length', return_tensors='tf')
    
predictions = prediction_func(input_ids =  encoding['input_ids'], attention_mask = encoding['attention_mask'])["dense_9"].numpy()
    
prediction = np.argmax(predictions)

label = label_desc[prediction]

print(label, predictions)

negative [[0.7620019  0.13888818 0.09910992]]
