In [1]:
# Imports 
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
from keras import backend as K
import matplotlib.pyplot as plt 

In [2]:
# Import data
data = pd.read_csv("../../../data/mturk_experiment_2.csv",encoding='unicode_escape')
labels = data["Formality"]
samples = data["Sentence"]

train_samples, test_samples, train_labels,test_labels = train_test_split(samples, labels, test_size=0.2,random_state=5)

train_samples = np.array(train_samples)
test_samples = np.array(test_samples)
train_labels = np.array(train_labels)
test_labels = np.array(test_labels)

In [3]:
# Model setup

# Attention layer
class peel_the_layer(tf.keras.layers.Layer): 

    def __init__(self,**kwargs):    
        ##Nothing special to be done here
        super(peel_the_layer, self).__init__(**kwargs)

    def build(self, input_shape):
        ##Define the shape of the weights and bias in this layer
        ##This is a 1 unit layer. 
        units=1
        ##last index of the input_shape is the number of dimensions of the prev
        ##RNN layer. last but 1 index is the num of timesteps
        self.w=self.add_weight(name="att_weights", shape=(input_shape[-1], units), initializer="normal") #name property is useful for avoiding RuntimeError: Unable to create link.
        self.b=self.add_weight(name="att_bias", shape=(input_shape[-2], units), initializer="zeros")
        super(peel_the_layer,self).build(input_shape)

    def call(self, x):
        ##x is the input tensor..each word that needs to be attended to
        ##Below is the main processing done during training
        ##K is the Keras Backend import
        e = K.tanh(K.dot(x,self.w)+self.b)
        a = K.softmax(e, axis=1)
        output = x*a

        ##return the ouputs. 'a' is the set of attention weights
        ##the second variable is the 'attention adjusted o/p state' or context
        return a, K.sum(output, axis=1)


bert_model_name = 'small_bert/bert_en_uncased_L-4_H-512_A-8' 

tfhub_handle_encoder = 'https://tfhub.dev/tensorflow/talkheads_ggelu_bert_en_large/2'
tfhub_handle_preprocess =   'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3'
bert_preprocess_model = hub.KerasLayer(tfhub_handle_preprocess)
bert_model = hub.KerasLayer(tfhub_handle_encoder)

text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
preprocessing_layer = hub.KerasLayer(tfhub_handle_preprocess, name='preprocessing')
encoder_inputs = preprocessing_layer(text_input)
encoder = hub.KerasLayer(tfhub_handle_encoder, trainable=True, name='BERT_encoder')
outputs = encoder(encoder_inputs)
net = outputs['pooled_output']
reshaped = tf.reshape(net,[-1, 1024, 1])
lstm = tf.keras.layers.LSTM(512,return_sequences=True)(reshaped)
a, context = peel_the_layer()(lstm)

dense = tf.keras.layers.Dense(1)(context)
model = tf.keras.Model(text_input, dense)
model.summary()

loss = 'mse'
metrics = [tf.keras.metrics.MeanSquaredError(),tf.keras.losses.MeanAbsoluteError(),tf.keras.losses.MeanAbsolutePercentageError()]
optimiser = tf.optimizers.Adam(learning_rate= 5e-5)

model.compile(optimizer=optimiser, loss=loss, metrics=metrics)



Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
text (InputLayer)               [(None,)]            0                                            
__________________________________________________________________________________________________
preprocessing (KerasLayer)      {'input_type_ids': ( 0           text[0][0]                       
__________________________________________________________________________________________________
BERT_encoder (KerasLayer)       {'sequence_output':  435915777   preprocessing[0][0]              
                                                                 preprocessing[0][1]              
                                                                 preprocessing[0][2]              
______________________________________________________________________________________________

In [4]:
epochs = 6
batch_size = 32
results = pd.DataFrame()

# CHANGE EARLY STOPPING 
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3,min_delta=0.01)
history = model.fit(x=train_samples,y=train_labels,batch_size=batch_size,epochs=epochs,verbose=2,callbacks=[callback])

scores = model.evaluate(x=test_samples,y=test_labels)
model_label = "BERT Large - Epochs - " + str(epochs) + " Batch Size - " + str(batch_size)

results[model_label] = scores

mse = history.history['mean_squared_error']

plt.plot(mse)
plt.title("Model: " + model_label)
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.ylim(bottom=0)
plt.savefig(model_label + ".jpeg",dpi=100,bbox_inches='tight')

results

Epoch 1/6
176/176 - 16117s - loss: 3.5281 - mean_squared_error: 3.5281 - mean_absolute_error: 1.3188 - mean_absolute_percentage_error: 35.0680
Epoch 2/6
176/176 - 15753s - loss: 0.4930 - mean_squared_error: 0.4930 - mean_absolute_error: 0.5625 - mean_absolute_percentage_error: 15.8698
Epoch 3/6
176/176 - 15886s - loss: 0.3028 - mean_squared_error: 0.3028 - mean_absolute_error: 0.4321 - mean_absolute_percentage_error: 11.9034
Epoch 4/6
176/176 - 16083s - loss: 0.1792 - mean_squared_error: 0.1792 - mean_absolute_error: 0.3247 - mean_absolute_percentage_error: 8.8901
Epoch 5/6
176/176 - 16225s - loss: 0.1317 - mean_squared_error: 0.1317 - mean_absolute_error: 0.2780 - mean_absolute_percentage_error: 7.5024
Epoch 6/6


In [None]:
# IF THIS MODEL IS ANY GOOD, REMEMBER TO SAVE IT!

In [None]:
#model.save('bertissimo')
model.save('bertissimo_6_epochs.h5')

In [None]:
predictions = model.predict(test_samples[0:100])
predictions = pd.DataFrame(predictions)
predictions = predictions*10000
truths = pd.DataFrame(test_labels[0:100])

In [None]:
diff = pd.DataFrame()
diff["Preds"] = predictions.round(1)
diff["Truths"] = truths

diff["Diff"] = abs(diff["Preds"] - diff["Truths"])
diff["Diff"].mean()