In [2]:
# Imports 
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
from keras import backend as K
import matplotlib.pyplot as plt 
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import LearningRateScheduler

In [4]:
# Import data
data = pd.read_csv("../../../data/side_information.csv",encoding='unicode_escape')
checkpoint_path = "C:/Users/jack-/Documents/University/Project/src/deep_learning/bert_tests/checkpoints"
feature_names = ['Sentence',
                 'Length in Words', 'Length in Characters', 'F-score', 'I-score',
                 'Lexical Density','FK Reading Ease', 'FOG Scale', 'SMOG Index', 'ARI',
                 'CL Index', 'LW Formula', 'DC Score', 'Readability Consensus',
                 'Spache Formula']

samples = data[feature_names]
labels = data["Formality"]
train_samples, test_samples, train_labels,test_labels = train_test_split(samples, labels, test_size=0.2,random_state=5)

bert_train_samples = np.array(train_samples["Sentence"])
bert_test_samples = np.array(test_samples["Sentence"])
side_train_samples = np.array(train_samples[feature_names[1:]])
side_test_samples = np.array(test_samples[feature_names[1:]])

train_samples = np.array(train_samples)
test_samples = np.array(test_samples)
train_labels = np.array(train_labels)
test_labels = np.array(test_labels)

In [11]:
# Model setup

# Attention layer
class peel_the_layer(tf.keras.layers.Layer): 

    def __init__(self):    
        ##Nothing special to be done here
        super(peel_the_layer, self).__init__()

    def build(self, input_shape):
        ##Define the shape of the weights and bias in this layer
        ##This is a 1 unit layer. 
        units=1
        ##last index of the input_shape is the number of dimensions of the prev
        ##RNN layer. last but 1 index is the num of timesteps
        self.w=self.add_weight(name="att_weights", shape=(input_shape[-1], units), initializer="normal") #name property is useful for avoiding RuntimeError: Unable to create link.
        self.b=self.add_weight(name="att_bias", shape=(input_shape[-2], units), initializer="zeros")
        super(peel_the_layer,self).build(input_shape)

    def call(self, x):
        ##x is the input tensor..each word that needs to be attended to
        ##Below is the main processing done during training
        ##K is the Keras Backend import
        e = K.tanh(K.dot(x,self.w)+self.b)
        a = K.softmax(e, axis=1)
        output = x*a

        ##return the ouputs. 'a' is the set of attention weights
        ##the second variable is the 'attention adjusted o/p state' or context
        return a, K.sum(output, axis=1)

bert_model_name = 'small_bert/bert_en_uncased_L-4_H-512_A-8' 



tfhub_handle_encoder = 'https://tfhub.dev/tensorflow/bert_en_cased_L-12_H-768_A-12/4'
tfhub_handle_preprocess = 'https://tfhub.dev/tensorflow/bert_en_cased_preprocess/3'
bert_preprocess_model = hub.KerasLayer(tfhub_handle_preprocess)
bert_model = hub.KerasLayer(tfhub_handle_encoder)

In [12]:
# Bert model

text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
preprocessing_layer = hub.KerasLayer(tfhub_handle_preprocess, name='preprocessing')
encoder_inputs = preprocessing_layer(text_input)
encoder = hub.KerasLayer(tfhub_handle_encoder, trainable=True, name='BERT_encoder')
outputs = encoder(encoder_inputs)
net = outputs['pooled_output']
reshaped = tf.reshape(net,[-1, 768, 1])
lstm = tf.keras.layers.LSTM(512,return_sequences=True)(reshaped)
#a, context = peel_the_layer()(lstm)

#dense = tf.keras.layers.Dense(1)(context)
final_bert_model = tf.keras.Model(text_input, lstm)

final_bert_model.summary()

Model: "model_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
text (InputLayer)               [(None,)]            0                                            
__________________________________________________________________________________________________
preprocessing (KerasLayer)      {'input_word_ids': ( 0           text[0][0]                       
__________________________________________________________________________________________________
BERT_encoder (KerasLayer)       {'encoder_outputs':  108310273   preprocessing[0][0]              
                                                                 preprocessing[0][1]              
                                                                 preprocessing[0][2]              
____________________________________________________________________________________________

In [13]:
# Side information model

side_input = tf.keras.layers.Input(shape=(14))
reshaped = tf.reshape(side_input,[-1, 1, 14])
lstm_1 = tf.keras.layers.LSTM(512,return_sequences=True)(reshaped)
lstm_2 = tf.keras.layers.LSTM(512,return_sequences=True)(lstm_1)
side_model = tf.keras.Model(side_input, lstm_2)
side_model.summary()

Model: "model_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 14)]              0         
_________________________________________________________________
tf.reshape_3 (TFOpLambda)    (None, 1, 14)             0         
_________________________________________________________________
lstm_4 (LSTM)                (None, 1, 512)            1079296   
_________________________________________________________________
lstm_5 (LSTM)                (None, 1, 512)            2099200   
Total params: 3,178,496
Trainable params: 3,178,496
Non-trainable params: 0
_________________________________________________________________


In [14]:
# Combine models and predict
combined = tf.keras.layers.concatenate([final_bert_model.output, side_model.output],axis=1)
a, context = peel_the_layer()(combined)
dense = tf.keras.layers.Dense(1)(context)
model = tf.keras.Model(inputs=[final_bert_model.input, side_model.input], outputs=dense)
model.compile(optimizer=Adam(learning_rate=0.0001),loss='mean_squared_error',metrics=[tf.keras.losses.MeanAbsoluteError(),tf.keras.losses.MeanAbsolutePercentageError()])

model.summary()

Model: "model_5"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
text (InputLayer)               [(None,)]            0                                            
__________________________________________________________________________________________________
preprocessing (KerasLayer)      {'input_word_ids': ( 0           text[0][0]                       
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 14)]         0                                            
__________________________________________________________________________________________________
BERT_encoder (KerasLayer)       {'encoder_outputs':  108310273   preprocessing[0][0]              
                                                                 preprocessing[0][1]        

In [None]:
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=1,min_delta=0.01)

In [None]:
history = model.fit(x=[bert_train_samples,side_train_samples],y=train_labels,batch_size=32,epochs=8,verbose=2,callbacks=[callback])

In [None]:
results = pd.DataFrame()
scores = model.evaluate(x=[bert_test_samples,side_test_samples],y=test_labels)
results["E8 B32"] = scores
results

In [None]:
mse = history.history['loss']

plt.plot(mse)
plt.title("Model: Italian Sentences + Side Information")
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.ylim(bottom=0)
plt.savefig("Italian Sentences and Side Information E8 B32" + ".jpeg",dpi=100,bbox_inches='tight')

In [None]:
model = tf.keras.Model(inputs=[final_bert_model.input, side_model.input], outputs=dense)
model.compile(optimizer=Adam(learning_rate=0.0001),loss='mean_squared_error',metrics=[tf.keras.losses.MeanAbsoluteError(),tf.keras.losses.MeanAbsolutePercentageError()])
history = model.fit(x=[bert_train_samples,side_train_samples],y=train_labels,batch_size=32,epochs=20,verbose=2,callbacks=[callback])

In [None]:
scores = model.evaluate(x=[bert_test_samples,side_test_samples],y=test_labels)
results["E20 B32"] = scores
results

In [None]:
mse = history.history['loss']

plt.plot(mse)
plt.title("Model: Italian Sentences + Side Information")
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.ylim(bottom=0)
plt.savefig("Italian Sentences and Side Information" + ".jpeg",dpi=100,bbox_inches='tight')

In [None]:
results.to_csv("Side_information_8_and_20_epochs.csv")